File file = new File("filename.pdf"); OutputStream outputStream = // ? PDDocument doc = PDDocument.load(file); List<PDPage> pages = (List<PDPage>) doc.getDocumentCatalog().getAllPages(); PDPage page = pages.get(0); // 24 bit image, 100dpi: BufferedImage image = page.convertToImage(BufferedImage.TYPE_3BYTE_BGR, 100); ImageIO.write(image, "jpg", outputStream);
File pdfFile = new File ... PDDocument document = PDDocument.load(pdfFile); if(document != null){ @SuppressWarnings("unchecked") List<PDPage> pdfPages = document.getDocumentCatalog().getAllPages(); if(pdfPages != null && !pdfPages.isEmpty()){ for(PDPage page : pdfPages){ BufferedImage image = page.convertToImage(BufferedImage.TYPE_INT_RGB, 96); BufferedImage resized = resizeImage(image); File tmpFile = createTmpFile(); writeImage(resized, tmpFile); pages.add(new Page(tmpFile.getAbsolutePath(), numOfColumns)); } } }
PDDocument document = PDDocument.load(new File("C:\\Temp\\stuff.pdf")); List<PDPage> allPages = document.getDocumentCatalog().getAllPages(); PDPage firstPage = allPages.get(0); BufferedImage bi = firstPage.convertToImage();
PDDocument document = PDDocument.load(pdffile); List<PDPage> pages = document.getDocumentCatalog().getAllPages(); for (int i = 0; i < pages.size(); i++) { PDPage page = pages.get(i); BufferedImage image = page.convertToImage(BufferedImage.TYPE_INT_RGB, 72); ImageIO.write(image, "jpg", new File(pdffile.getAbsolutePath() + "_" + i + ".jpg")); }
import java.awt.image.BufferedImage; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; import javax.imageio.ImageIO; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDPageNode; public List<String> generateImages(String pdfFile) throws IOException { String imagePath = "/Users/$user/pdfimages/"; List <String> fileNames = new ArrayList<String>(); document = PDDocument.load(pdfFile); //// load pdf node = document.getDocumentCatalog().getPages(); ///// get pages List<PDPage> kids = node.getKids(); int count=0; for(PDPage page : kids) { ///// iterate BufferedImage img = page.convertToImage(BufferedImage.TYPE_INT_RGB,128); File imageFile = new File(imagePath+ count++ + ".jpg"); ImageIO.write(img, "jpg", imageFile); fileNames.add(imageFile.getName()); } return fileNames; }
private static Boolean isBlank(PDPage pdfPage) throws IOException { BufferedImage bufferedImage = pdfPage.convertToImage(); long count = 0; int height = bufferedImage.getHeight(); int width = bufferedImage.getWidth(); Double areaFactor = (width * height) * 0.99; for (int x = 0; x < width ; x++) { for (int y = 0; y < height ; y++) { Color c = new Color(bufferedImage.getRGB(x, y)); // verify light gray and white if (c.getRed() == c.getGreen() && c.getRed() == c.getBlue() && c.getRed() >= 248) { count++; } } } if (count >= areaFactor) { return true; } return false; }
import java.awt.image.BufferedImage; import java.io.File; import java.io.IOException; import java.util.List; import javax.imageio.ImageIO; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; public class PDFUtil { public static void saveFirstPageThumbnail() throws IOException { PDDocument document = PDDocument.load("C:\\testbook.pdf"); List<PDPage> pages = document.getDocumentCatalog().getAllPages(); PDPage page = pages.get(0); //first one BufferedImage bufferedImage = page.convertToImage(); File outputFile = new File( "C:\\image.jpg"); ImageIO.write(bufferedImage, "jpg", outputFile); } }
final List<PDPage> allPages = document.getDocumentCatalog().getAllPages(); final PDPage pdPage = allPages.get(0); final BufferedImage image = pdPage.convertToImage(TYPE_INT_RGB, 300); final ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
public void convertPDFToJPG(String src,String FolderPath){ try{ File folder1 = new File(FolderPath+"\\"); comparePDF cmp=new comparePDF(); cmp.rmdir(folder1); //load pdf file in the document object PDDocument doc=PDDocument.load(new FileInputStream(src)); //Get all pages from document and store them in a list List<PDPage> pages=doc.getDocumentCatalog().getAllPages(); //create iterator object so it is easy to access each page from the list Iterator<PDPage> i= pages.iterator(); int count=1; //count variable used to separate each image file //Convert every page of the pdf document to a unique image file System.out.println("Please wait..."); while(i.hasNext()){ PDPage page=i.next(); BufferedImage bi=page.convertToImage(); ImageIO.write(bi, "jpg", new File(FolderPath+"\\Page"+count+".jpg")); count++; } System.out.println("Conversion complete"); }catch(IOException ie){ie.printStackTrace();} }
for (int i = 0; i < pages.size(); i++) { PDPage page = pages.get(i); BufferedImage image = page.convertToImage(BufferedImage.TYPE_INT_RGB, 72); for (int h = 0; h < image.getHeight(); h++) { for (int w = 0; w < image.getWidth(); w++) {
BufferedImage image = page.convertToImage();
private static String convertEncryptedPDFDocument(String url) throws IOException, TesseractException{ int imageDPIValue = 300; PDDocument document = PDDocument.loadNonSeq(new File(url), null); @SuppressWarnings("unchecked") List<PDPage> pdPages = document.getDocumentCatalog().getAllPages(); List<BufferedImage> imagesPages = new ArrayList<>(); for (PDPage pdPage : pdPages){ imagesPages.add(pdPage.convertToImage(BufferedImage.TYPE_INT_RGB, imageDPIValue)); } document.close(); Tesseract tessaract = TessaractManager.getInstance().getTessaract(); List<IIOImage> pagesToOCR = new ArrayList<>(); for(BufferedImage image :imagesPages){ List<IIOImage> content = ImageIOHelper.getIIOImageList(image); pagesToOCR.addAll(content); } String originalText = tessaract.doOCR(pagesToOCR, null); return originalText; }
java.awt.image.BufferedImage image = page.convertToImage(BufferedImage.TYPE_INT_ARGB, 72);
try { image = page.convertToImage(BufferedImage.TYPE_INT_RGB, 300); // does not work images[i] = image; } catch (IOException e) {
BufferedImage firstPageImage = firstPage.convertToImage(BufferedImage.TYPE_USHORT_565_RGB , 200);
image = pdPage.convertToImage(BufferedImage.TYPE_INT_RGB, 72 * SCALE); PDRectangle cropBox = pdPage.getCropBox();
PDPage page = (PDPage) pageObject; resultFileName = inFileName + "-" + (pages.indexOf(page) + 1); BufferedImage bim = page.convertToImage(BufferedImage.TYPE_INT_RGB, 300); File resultFile = Framework.createTempFile(resultFileName, ".png"); FileOutputStream resultFileStream = new FileOutputStream(resultFile);