Searching data from Text,Doc,Excel and PDF File using dot net Core
Going to develop dot net core MVC application to search data from text,doc,excel and PDF File.
For Development I have Used below libraries
using System.Text;
using System.Text.RegularExpressions;
using System.IO;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
using ExcelDataReader;
using iTextSharp.text.pdf;
Reading Text File data
string searchString="search from text file";
var path = Path.Combine(Directory.GetCurrentDirectory(), "DocumentsFiles", "testing.txt");
if (System.IO.File.Exists(path))
{
string text = System.IO.File.ReadAllText(path);
if (Regex.IsMatch(text, searchString, RegexOptions.IgnoreCase))
{
ViewBag.result = "True";
}
else
{
ViewBag.result = "False";
}
}
Reading PDFFile data :-
Add "iTextSharp" library using Nuget Package Manager and write below code
string searchString="search from PDF file";
var path = Path.Combine(Directory.GetCurrentDirectory(), "DocumentsFiles", "0900000120.pdf");
if (System.IO.File.Exists(path))
{
StringBuilder text = new StringBuilder();
using (PdfReader reader = new PdfReader(path))
{
for (int i = 1; i <= reader.NumberOfPages; i++)
{
text.Append(iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(reader, i));
}
}
if (Regex.IsMatch(text.ToString(), searchString, RegexOptions.IgnoreCase))
{
ViewBag.result = "True";
}
else
{
ViewBag.result = "False";
}
}
Reading Doc File data
Add "DocumentFormat.OpenXml" package using Nuget Package manager and write below code -
string searchString="search from DOC file";
var path = Path.Combine(Directory.GetCurrentDirectory(), "DocumentsFiles", "phpblog.docx");
if (System.IO.File.Exists(path))
{
StringBuilder text = new StringBuilder();
using (var doc = WordprocessingDocument.Open(path, false))
{
foreach (var el in doc.MainDocumentPart.Document.Body.Elements().OfType<Paragraph>())
{
text.Append(el.InnerText);
}
}
if (Regex.IsMatch(text.ToString(), searchString, RegexOptions.IgnoreCase))
{
ViewBag.result = "True";
}
else
{
ViewBag.result = "False";
}
}
Reading Excel File Data
Add "ExcelDataReader" Package using Nuget package Manager and write below code :-
string searchString="search from Excel file";
var path = Path.Combine(Directory.GetCurrentDirectory(), "DocumentsFiles", "Daily_Target_Net.xlsx");
if (System.IO.File.Exists(path))
{
StringBuilder text = new StringBuilder();
System.Text.Encoding.RegisterProvider(System.Text.CodePagesEncodingProvider.Instance);
using (var stream = System.IO.File.Open(path, FileMode.Open, FileAccess.Read))
{
using (var reader = ExcelReaderFactory.CreateReader(stream))
{
do
{
while (reader.Read()) //Each ROW
{
for (int column = 0; column < reader.FieldCount; column++)
{
text.Append(reader.GetValue(column));
}
}
} while (reader.NextResult()); //Move to NEXT SHEET
}
}
if (Regex.IsMatch(text.ToString(), searchString, RegexOptions.IgnoreCase))
{
ViewBag.result = "True";
}
else
{
ViewBag.result = "False";
}
}