无法使用 iText7 将使用 OpenXML 的带有图像的 HTML 转换为 PDF

问题描述

我正在使用 Open XML SDK 2.12.1 version 将 docx word 转换为 html 文件

d1 = {'Alanine,aspartate and glutamate Metabolism': 4,'Ascorbate and aldarate Metabolism': 4,'Benzoate degradation': 6,'Biosynthesis of antibiotics': 16}

d2 = {'Amino sugar and nucleotide sugar Metabolism': 5,'Arginine and proline Metabolism': 4,'Biosynthesis of antibiotics': 11,'Biosynthesis of secondary Metabolites': 21}

print(merge(d1,d2))

并使用 iText (Version 7.1.14) 使用 {'Biosynthesis of antibiotics': [16,11],'Amino sugar and nucleotide sugar Metabolism': [0,5],'Arginine and proline Metabolism': [0,4],'Alanine,aspartate and glutamate Metabolism': [4,0],'Ascorbate and aldarate Metabolism': [4,'Benzoate degradation': [6,'Biosynthesis of secondary Metabolites': [0,21]}

将 html 转换为 pdf
protected void btndisplay_Click(object sender,EventArgs e)
{
    byte[] byteArray = (byte[])(Session["ByteArray"]);

    if (byteArray != null)
    {
        try
        {
            DirectoryInfo convertedDocsDirectory =
                new DirectoryInfo(Server.MapPath(DocxConvertedToHtmlDirectory));

            if (!convertedDocsDirectory.Exists)
                convertedDocsDirectory.Create();

            Guid g = Guid.NewGuid();

            var htmlFileName = g.ToString() + ".html";

            ConvertToHtml(byteArray,convertedDocsDirectory,htmlFileName);

            Response.Redirect(DocxConvertedToHtmlDirectory + htmlFileName);
        }
        catch (Exception ex)
        {
            lblMessage.Text = "ERROR: " + ex.Message.ToString();
        }
    }
    else
    {
        lblMessage.Text = "You have not specified a file.";
    }
}


public static void ConvertToHtml(byte[] byteArray,DirectoryInfo destDirectory,string htmlFileName)
{
    FileInfo fiHtml = new FileInfo(Path.Combine(destDirectory.FullName,htmlFileName));

    using (MemoryStream memoryStream = new MemoryStream())
    {
        memoryStream.Write(byteArray,byteArray.Length);

        using (WordprocessingDocument wDoc = WordprocessingDocument.Open(memoryStream,true))
        {
            var imageDirectoryFullName =
                fiHtml.FullName.Substring(0,fiHtml.FullName.Length - fiHtml.Extension.Length) + "_files";

            var imageDirectoryRelativeName =
                fiHtml.Name.Substring(0,fiHtml.Name.Length - fiHtml.Extension.Length) + "_files";

            int imageCounter = 0;

            var pageTitle = (string)wDoc
                .CoreFilePropertiesPart
                .GetXDocument()
                .Descendants(DC.title)
                .FirstOrDefault();

            HtmlConverterSettings settings = new HtmlConverterSettings()
            {
                PageTitle = pageTitle,FabricateCssClasses = true,CssClassprefix = "pt-",RestrictToSupportedLanguages = false,RestrictToSupportednumberingFormats = false,ImageHandler = imageInfo =>
                {
                    DirectoryInfo localDirInfo = new DirectoryInfo(imageDirectoryFullName);
                    if (!localDirInfo.Exists)
                        localDirInfo.Create();
                    ++imageCounter;

                    string extension = imageInfo.ContentType.Split('/')[1].ToLower();

                    ImageFormat imageFormat = null;
                    if (extension == "png")
                    {
                        // Convert png to jpeg.
                        extension = "gif";
                        imageFormat = ImageFormat.Gif;
                    }
                    else if (extension == "gif")
                        imageFormat = ImageFormat.Gif;
                    else if (extension == "bmp")
                        imageFormat = ImageFormat.Bmp;
                    else if (extension == "jpeg")
                        imageFormat = ImageFormat.Jpeg;
                    else if (extension == "tiff")
                    {
                        // Convert tiff to gif.
                        extension = "gif";
                        imageFormat = ImageFormat.Gif;
                    }
                    else if (extension == "x-wmf")
                    {
                        extension = "wmf";
                        imageFormat = ImageFormat.Wmf;
                    }

                    // If the image format isn't one that we expect,ignore it,// and don't return markup for the link.
                    if (imageFormat == null)
                        return null;

                    FileInfo imageFileName = new FileInfo(imageDirectoryFullName + "/image" +
                        imageCounter.ToString() + "." + extension);
                    try
                    {
                        imageInfo.Bitmap.Save(imageFileName.FullName,imageFormat);
                    }

                    catch (System.Runtime.InteropServices.ExternalException)
                    {
                        return null;
                    }

                    XElement img = new XElement(Xhtml.img,new XAttribute(NoNamespace.src,imageDirectoryRelativeName + "/" + imageFileName.Name),imageInfo.ImgStyleAttribute,imageInfo.AltText != null ?
                            new XAttribute(NoNamespace.alt,imageInfo.AltText) : null);
                    return img;
                }
            };

            XElement html = HtmlConverter.ConvertToHtml(wDoc,settings);

            var body = html.Descendants(Xhtml.body).First();

            body.AddFirst(
                new XElement(Xhtml.p,new XElement(Xhtml.a,new XAttribute("href","/Default2.aspx"),"Go back to Upload Page")));

            var htmlString = html.ToString(SaveOptions.disableFormatting);

            File.WriteallText(fiHtml.FullName,htmlString,Encoding.UTF8);
        }
    }
}

结果中的 PDF 根本没有图像!

PDF 文件应该是这样的

enter image description here

但结果是没有图像,因为itext7支持base64图像。

我试过没有成功

HtmlConverter (html2pdf version 2.0.1)

如何使用支持 base64 图像编辑我的代码以将 docx word 转换为 html 文件

HMTL

这是 HTML 代码

要查看 pdf 文件中的图像,我需要这部分

protected void Page_Load(object sender,EventArgs e)
{
    PdfWriter writer = new PdfWriter(Response.OutputStream);
    PdfDocument pdf = new PdfDocument(writer);
    Document document = new Document(pdf);

    using (MysqLConnection con =
        new MysqLConnection(ConfigurationManager.ConnectionStrings["con"].ConnectionString))
    {
        using (MysqLCommand cmd =
            new MysqLCommand("SELECT `HEAD` FROM `xmltable`;",myConnectionString))
        {
            cmd.Connection.open();
            cmd.CommandType = CommandType.Text;

            MysqLDataReader reader = cmd.ExecuteReader();

            if (reader.HasRows)
            {
                while (reader.Read())
                {
                    Paragraph contents = new Paragraph(reader.GetString("HEAD"))
                    .SetTextAlignment(TextAlignment.JUSTIFIED)
                    .SetFontSize(12);                        

                    nomefile = @"C:\\Desktop\Management_" + Guid.NewGuid() + ".pdf";
                    html = reader.GetString("HEAD").ToString();
                    dest = nomefile.ToString();

                    document.Add(contents);

                    HtmlConverter.ConvertToPdf(html,new FileStream(dest,FileMode.Create));
                }
            }

            reader.Close();
            cmd.Connection.Close();
        }
    }

    document.Close();

    Response.Clear();
    Response.ContentType = "application/pdf";
    Response.AppendHeader("Content-disposition","attachment; filename=" + dest);
    Response.TransmitFile(dest);
    Response.End();
}

改为

try
{
    imageInfo.Bitmap.Save(imageFileName.FullName,imageFormat);
    byte[] imageBytes = memoryStream.ToArray();
    string base64String = Convert.ToBase64String(imageBytes);
}

(请注意,base64 编码的图像被截断以适合此页面

带有 base64 编码图像的文件的浏览器视图和结果 PDF

Reference

<img src="01a5094b-2d44-4399-93b2-2cc2db7ebfc2_files/image1.gif" style="width: 2.875in; height: 0.4583333in" alt="Immagine 9" />

解决方法

暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!

如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。

小编邮箱:dio#foxmail.com (将#修改为@)