ITextSharp PDFReader崩溃了应用程序(即使在try catch中)

时间:2015-05-02 21:39:48

标签: c# itext

ITextSharp大部分时间都能正常工作,并且大多数错误都会在try catch块中捕获。但是,它经常会遇到一个它不喜欢的文件,并且整个应用程序都会崩溃。

当我尝试从Windows打开pdf文件时,我收到消息

  

"无法打开此文件。文件格式存在问题"

很明显文件已损坏。

有没有办法捕获此错误?

Link to the file on drop box

public string GetPDFText(int ScanId, String pdfPath, out int NumWords, out int NumCharacters)
    {
        string Content = "";
        NumWords = 0;
        NumCharacters = 0;

        try
        {
            //PdfReader reader = new PdfReader(pdfPath);

            using (PdfReader reader = new PdfReader(pdfPath))
            {
                StringBuilder text = new StringBuilder();

                for (int i = 1; i <= reader.NumberOfPages; i++)
                {
                    ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
                    string currentText = PdfTextExtractor.GetTextFromPage(reader, i, strategy);
                    currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));
                    text.Append(currentText);
                }

                Content = text.ToString();
                var regex = new Regex(@"\b[\s,\.-:;]*");
                var words = regex.Split(Content).Where(x => !string.IsNullOrEmpty(x));
                NumWords = words.Count();
                NumCharacters = Content.Count(char.IsLetterOrDigit);
            }
        }

        catch (iTextSharp.text.DocumentException ex)
        {
            AddErrorLog(new ErrorLog(-1, ex.GetType().Name, MethodBase.GetCurrentMethod().Name, ScanId, pdfPath, ex.Message, ex.TargetSite.ToString(), DateTime.Now, Environment.UserName, false));
            NumFileErrors++;
        }
        catch (iTextSharp.text.exceptions.InvalidPdfException ex)
        {
            AddErrorLog(new ErrorLog(-1, ex.GetType().Name, MethodBase.GetCurrentMethod().Name, ScanId, pdfPath, ex.Message, ex.TargetSite.ToString(), DateTime.Now, Environment.UserName, false));
            NumFileErrors++;
        }
        catch (Exception ex)
        {
            AddErrorLog(new ErrorLog(-1, ex.GetType().Name, MethodBase.GetCurrentMethod().Name, ScanId, pdfPath, ex.Message, ex.TargetSite.ToString(), DateTime.Now, Environment.UserName, false));
            NumFileErrors++;
        }

        return Content;
    }

0 个答案:

没有答案