Tuesday, 6 August 2013

Convert HTML To PDF using itextsharp ASP.NET

It is very general task to convert the html to pdf in web development. There are number of ways available to convert html to pdf, some of them are paid, but as human nature if we get the something free of cost which having a good price in market then we say wow!!! I am too lucky so I got this. In the same sense why we pay for paid tools when we have free ways to convert html to pdf. I have used itextsharp for this game. I have created a function to which just you need to pass the html and the path where you want to save the pdf. Reset all the things will be managed by the function itself.



protected void ConvertHTMLToPDF(string HTMLCode, string FilePath)
    {
        HttpContext context = HttpContext.Current;

        //Render PlaceHolder to temporary stream
        System.IO.StringWriter stringWrite = new StringWriter();
        System.Web.UI.HtmlTextWriter htmlWrite = new HtmlTextWriter(stringWrite);

        /********************************************************************************/
        //Try adding source strings for each image in content
        string tempPostContent = getImage(HTMLCode);
        /*********************************************************************************/

        StringReader reader = new StringReader(tempPostContent);

        //Create PDF document
        Document doc = new Document(PageSize.A4);
        HTMLWorker parser = new HTMLWorker(doc);
        PdfWriter.GetInstance(doc, new FileStream(FilePath,FileMode.Create));
        doc.Open();

        try
        {
            //Parse Html and dump the result in PDF file
            parser.Parse(reader);
        }
        catch (Exception ex)
        {
            //Display parser errors in PDF.
            Paragraph paragraph = new Paragraph("Error!" + ex.Message);
            Chunk text = paragraph.Chunks[0] as Chunk;
            if (text != null)
            {
                text.Font.Color = BaseColor.RED;
            }
            doc.Add(paragraph);
        }
        finally
        {
            doc.Close();
        }
    }

now after this i faced issue with images, to resolve those i have used the below code:

public string getImage(string input)
    {
        if (input == null)
            return string.Empty;
        string tempInput = input;
        string pattern = @"<img(.|\n)+?>";
        string src = string.Empty;
        HttpContext context = HttpContext.Current;

        //Change the relative URL's to absolute URL's for an image, if any in the HTML code.
        foreach (Match m in Regex.Matches(input, pattern, RegexOptions.IgnoreCase | RegexOptions.Multiline |

RegexOptions.RightToLeft))
        {
            if (m.Success)
            {
                string tempM = m.Value;
                string pattern1 = "src=[\'|\"](.+?)[\'|\"]";
                Regex reImg = new Regex(pattern1, RegexOptions.IgnoreCase | RegexOptions.Multiline);
                Match mImg = reImg.Match(m.Value);

                if (mImg.Success)
                {
                    src = mImg.Value.ToLower().Replace("src=", "").Replace("\"", "");

                    if (src.ToLower().Contains("http://") == false)
                    {
                        //Insert new URL in img tag
                        src = "src=\'" + GetWebUrl() + "/" + src.Substring(1, src.Length - 2) + "\'";
                        try
                        {
                            tempM = tempM.Remove(mImg.Index, mImg.Length);
                            tempM = tempM.Insert(mImg.Index, src);

                            //insert new url img tag in whole html code
                            tempInput = tempInput.Remove(m.Index, m.Length);
                            tempInput = tempInput.Insert(m.Index, tempM);
                        }
                        catch (Exception e)
                        {

                        }
                    }
                }
            }
        }
        return tempInput;
    }

    string getSrc(string input)
    {
        string pattern = "src=[\'|\"](.+?)[\'|\"]";
        System.Text.RegularExpressions.Regex reImg = new System.Text.RegularExpressions.Regex(pattern,
            System.Text.RegularExpressions.RegexOptions.IgnoreCase |

System.Text.RegularExpressions.RegexOptions.Multiline);
        System.Text.RegularExpressions.Match mImg = reImg.Match(input);
        if (mImg.Success)
        {
            return mImg.Value.Replace("src=", "").Replace("\"", ""); ;
        }

        return string.Empty;
    }

    public string GetWebUrl()
    {
        return HttpContext.Current.Request.Url.GetLeftPart(UriPartial.Authority) + HttpRuntime.AppDomainAppVirtualPath;
    }
hope this article will save your a lot time of R&D.

3 comments:

  1. Replies
    1. Very good introduction, learned the html to pdf method, I also tried to develop such software.

      Delete
  2. Very good introduction, learned the html to pdf method, I also tried to develop such software.

    ReplyDelete