提取文本
using Spire.Pdf; using System; using System.IO; using System.Text; namespace ExtractText { class Program { static void Main(string[] args) { //加载文档 PdfDocument document = new PdfDocument(); document.LoadFromFile("测试文档.pdf"); //实例化StringBuilder类,获取文本 StringBuilder content = new StringBuilder(); content.Append(document.Pages[0].ExtractText()); //保存提取后的文本内容到.txt文档 String fileName = "TextFromPDF.txt"; File.WriteAllText(fileName, content.ToString()); System.Diagnostics.Process.Start("TextFromPDF.txt"); } } }
提取图片
using System; using System.Collections.Generic; using System.Text; using System.Drawing; using Spire.Pdf; namespace ExtractImagesFromPDF { class Program { static void Main(string[] args) { //实例化PdfDocument类,并加载测试文档 PdfDocument doc = new PdfDocument(); doc.LoadFromFile("测试文档.pdf"); //实例化List类 List<Image> ListImage = new List<Image>(); for (int i = 0; i < doc.Pages.Count; i++) { // 获取 Spire.Pdf.PdfPageBase类对象 PdfPageBase page = doc.Pages[i]; // 提取图片 Image[] images = page.ExtractImages(); if (images != null && images.Length > 0) { ListImage.AddRange(images); } } if (ListImage.Count > 0) { for (int i = 0; i < ListImage.Count; i++) { Image image = ListImage[i]; image.Save("image" + (i + 1).ToString() + ".png", System.Drawing.Imaging.ImageFormat.Png); } System.Diagnostics.Process.Start("image1.png"); } } } }
这篇文档对您是否有帮助?