Searching data from Text,Doc,Excel and PDF File using dot net Core

Going to develop dot net core MVC application to search data from text,doc,excel and PDF File.

For Development I have Used below libraries

using System.Text;
using System.Text.RegularExpressions;
using System.IO;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
using ExcelDataReader;
using iTextSharp.text.pdf;

 

Reading Text File data

string searchString="search from text file";

var path = Path.Combine(Directory.GetCurrentDirectory(), "DocumentsFiles", "testing.txt");
if (System.IO.File.Exists(path))
{
    string text = System.IO.File.ReadAllText(path);
    if (Regex.IsMatch(text, searchString, RegexOptions.IgnoreCase))
    {
        ViewBag.result = "True";
    }
     else
     {
         ViewBag.result = "False";
     }
 }

 

Reading PDFFile data :-

Add "iTextSharp" library using Nuget Package Manager and write below code

string searchString="search from PDF file";

var path = Path.Combine(Directory.GetCurrentDirectory(), "DocumentsFiles", "0900000120.pdf");
if (System.IO.File.Exists(path))
{                   
   StringBuilder text = new StringBuilder();
   using (PdfReader reader = new PdfReader(path))
   {
       for (int i = 1; i <= reader.NumberOfPages; i++)
       {
           text.Append(iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(reader, i));
       }
    }

   if (Regex.IsMatch(text.ToString(), searchString, RegexOptions.IgnoreCase))
   {
       ViewBag.result = "True";
   }
   else
   {
      ViewBag.result = "False";
   }
}

 

Reading Doc File data

Add "DocumentFormat.OpenXml" package using Nuget Package manager and write below code -

string searchString="search from DOC file";

var path = Path.Combine(Directory.GetCurrentDirectory(), "DocumentsFiles", "phpblog.docx");
if (System.IO.File.Exists(path))
{                   
   StringBuilder text = new StringBuilder();
   using (var doc = WordprocessingDocument.Open(path, false))
   {
       foreach (var el in doc.MainDocumentPart.Document.Body.Elements().OfType<Paragraph>())
       {
           text.Append(el.InnerText);
       }
    }

   if (Regex.IsMatch(text.ToString(), searchString, RegexOptions.IgnoreCase))
   {
       ViewBag.result = "True";
   }
   else
   {
      ViewBag.result = "False";
   }
}

 

Reading Excel File Data

Add "ExcelDataReader" Package  using Nuget package Manager and write below code :-

string searchString="search from Excel file";

var path = Path.Combine(Directory.GetCurrentDirectory(), "DocumentsFiles", "Daily_Target_Net.xlsx");
if (System.IO.File.Exists(path))
{                   
   StringBuilder text = new StringBuilder();
   System.Text.Encoding.RegisterProvider(System.Text.CodePagesEncodingProvider.Instance);
   using (var stream = System.IO.File.Open(path, FileMode.Open, FileAccess.Read))
   {
       using (var reader = ExcelReaderFactory.CreateReader(stream))
       {
           do
           {
               while (reader.Read()) //Each ROW
               {
                   for (int column = 0; column < reader.FieldCount; column++)
                   {
                        text.Append(reader.GetValue(column));
                    }
                }
            } while (reader.NextResult()); //Move to NEXT SHEET

        }
    }

   if (Regex.IsMatch(text.ToString(), searchString, RegexOptions.IgnoreCase))
   {
       ViewBag.result = "True";
   }
   else
   {
      ViewBag.result = "False";
   }
}

 

Leave a Comment