PDDocument doc = PDDocument.load(new File("file.pdf")); int count = doc.getNumberOfPages();
import java.io.File; import java.util.HashMap; import org.apache.pdfbox.multipdf.Overlay; import org.apache.pdfbox.pdmodel.PDDocument; public class TestPDF { public static void main(String[] args) throws Exception{ PDDocument realDoc = PDDocument.load(new File("originaldocument.pdf")); //the above is the document you want to watermark //for all the pages, you can add overlay guide, indicating watermark the original pages with the watermark document. HashMap<Integer, String> overlayGuide = new HashMap<Integer, String>(); for(int i=0; i<realDoc.getNumberOfPages(); i++){ overlayGuide.put(i+1, "watermark.pdf"); //watermark.pdf is the document which is a one page PDF with your watermark image in it. //Notice here, you can skip pages from being watermarked. } Overlay overlay = new Overlay(); overlay.setInputPDF(realDoc); overlay.setOutputFile("final.pdf"); overlay.setOverlayPosition(Overlay.Position.BACKGROUND); overlay.overlay(overlayGuide); } }
@Override public int getNumberOfPages() { return document.getNumberOfPages(); }
/** * Creates a new instance of PageExtractor * @param sourceDocument The document to split. */ public PageExtractor(PDDocument sourceDocument) { this.sourceDocument = sourceDocument; endPage = sourceDocument.getNumberOfPages(); }
@Override public void newLabel(int pageIndex, String label) { if(pageIndex < doc.getNumberOfPages()) { map[pageIndex] = label; } } });
/** * Returns a mapping with 0-based page indices as keys and corresponding * page labels as values as an array. The array will have exactly as much * entries as the document has pages. * * @return an array mapping from 0-based page indices to labels. */ public String[] getLabelsByPageIndices() { final String[] map = new String[doc.getNumberOfPages()]; computeLabels(new LabelHandler() { @Override public void newLabel(int pageIndex, String label) { if(pageIndex < doc.getNumberOfPages()) { map[pageIndex] = label; } } }); return map; }
/** * Returns a mapping with computed page labels as keys and corresponding * 0-based page indices as values. The returned map will contain at most as * much entries as the document has pages. * * <p> * <strong>NOTE:</strong> If the document contains duplicate page labels, * the returned map will contain <em>less</em> entries than the document has * pages. The page index returned in this case is the <em>highest</em> index * among all pages sharing the same label. * </p> * * @return a mapping from labels to 0-based page indices. */ public Map<String, Integer> getPageIndicesByLabels() { final Map<String, Integer> labelMap = new HashMap<>(doc.getNumberOfPages()); computeLabels(new LabelHandler() { @Override public void newLabel(int pageIndex, String label) { labelMap.put(label, pageIndex); } }); return labelMap; }
public PageIterator extract() { return extract(Utils.range(1, this.pdfDocument.getNumberOfPages() + 1)); }
/** * Gets PDF Page Count. * * @param inputPdfFile input file * @return number of pages */ public static int getPdfPageCount(File inputPdfFile) { PDDocument document = null; try { document = PDDocument.load(inputPdfFile); return document.getNumberOfPages(); } catch (IOException ioe) { logger.error("Error counting PDF pages => " + ioe); return - 1; } finally { if (document != null) { try { document.close(); } catch (Exception e) { } } } }
/** * Interface method to handle the start of the page processing. * * @throws IOException If an IO error occurs. */ private void processPages() throws IOException { for (int i = 0; i < sourceDocument.getNumberOfPages(); i++) { PDPage page = sourceDocument.getPage(i); if (currentPageNumber + 1 >= startPage && currentPageNumber + 1 <= endPage) { processPage(page); currentPageNumber++; } else { if (currentPageNumber > endPage) { break; } else { currentPageNumber++; } } } }
doc.getNumberOfPages() - lastEntry.getKey()); while (gen.hasNext())
if( document.getNumberOfPages() <= 1 )
/** * This will print the documents data. * * @param args The command line arguments. * * @throws IOException If there is an error parsing the document. */ public static void main( String[] args ) throws IOException { if( args.length != 1 ) { usage(); } else { try (PDDocument document = PDDocument.load(new File(args[0]))) { PDFTextStripper stripper = new PrintTextLocations(); stripper.setSortByPosition( true ); stripper.setStartPage( 0 ); stripper.setEndPage( document.getNumberOfPages() ); Writer dummy = new OutputStreamWriter(new ByteArrayOutputStream()); stripper.writeText(document, dummy); } } }
/** * This will print the documents data. * * @param args The command line arguments. * * @throws IOException If there is an error parsing the document. */ public static void main(String[] args) throws IOException { if (args.length != 1) { usage(); } else { try (PDDocument document = PDDocument.load(new File(args[0]))) { PDFTextStripper stripper = new PrintTextColors(); stripper.setSortByPosition(true); stripper.setStartPage(0); stripper.setEndPage(document.getNumberOfPages()); Writer dummy = new OutputStreamWriter(new ByteArrayOutputStream()); stripper.writeText(document, dummy); } } }
/** * This will print the documents data to System.out. * * @param document The document to get the metadata from. * * @throws IOException If there is an error getting the page count. */ public void printMetadata( PDDocument document ) throws IOException { PDDocumentInformation info = document.getDocumentInformation(); PDDocumentCatalog cat = document.getDocumentCatalog(); PDMetadata metadata = cat.getMetadata(); System.out.println( "Page Count=" + document.getNumberOfPages() ); System.out.println( "Title=" + info.getTitle() ); System.out.println( "Author=" + info.getAuthor() ); System.out.println( "Subject=" + info.getSubject() ); System.out.println( "Keywords=" + info.getKeywords() ); System.out.println( "Creator=" + info.getCreator() ); System.out.println( "Producer=" + info.getProducer() ); System.out.println( "Creation Date=" + formatDate( info.getCreationDate() ) ); System.out.println( "Modification Date=" + formatDate( info.getModificationDate() ) ); System.out.println( "Trapped=" + info.getTrapped() ); if( metadata != null ) { String string = new String( metadata.toByteArray(), "ISO-8859-1" ); System.out.println( "Metadata=" + string ); } }
/** * This will print the documents data. * * @param args The command line arguments. * * @throws IOException If there is an error parsing the document. */ public static void main(String[] args) throws IOException { if (args.length != 1) { usage(); } else { try (PDDocument document = PDDocument.load(new File(args[0]))) { DrawPrintTextLocations stripper = new DrawPrintTextLocations(document, args[0]); stripper.setSortByPosition(true); for (int page = 0; page < document.getNumberOfPages(); ++page) { stripper.stripPage(page); } } } }
private Map<Integer,LayoutPage> getLayoutPages(PDDocument doc) throws IOException { int numberOfPages = doc.getNumberOfPages(); Map<Integer,LayoutPage> layoutPages = new HashMap<>(numberOfPages); for (int i=0;i<numberOfPages;i++) { PDPage page = doc.getPage(i); COSBase contents = page.getCOSObject().getDictionaryObject(COSName.CONTENTS); PDResources resources = page.getResources(); if (resources == null) { resources = new PDResources(); } layoutPages.put(i, new LayoutPage(page.getMediaBox(), createCombinedContentStream(contents), resources.getCOSObject())); } return layoutPages; }
private List<String> extractStrings(File pdfFile) throws IOException { PDDocument pdf = PDDocument.load(pdfFile); assertThat(pdf.getNumberOfPages()).isEqualTo(1); StringExtractor stringExtractor = new StringExtractor(); stringExtractor.processPage(pdf.getPage(0)); return stringExtractor.getStrings(); }
protected Page extractPage(Integer pageNumber) throws IOException { if (pageNumber > this.pdfDocument.getNumberOfPages() || pageNumber < 1) { throw new java.lang.IndexOutOfBoundsException( "Page number does not exist"); } PDPage p = this.pdfDocument.getPage(pageNumber - 1); ObjectExtractorStreamEngine se = new ObjectExtractorStreamEngine(p); se.processPage(p); TextStripper pdfTextStripper = new TextStripper(this.pdfDocument, pageNumber); pdfTextStripper.process(); Utils.sort(pdfTextStripper.textElements, Rectangle.ILL_DEFINED_ORDER); float w, h; int pageRotation = p.getRotation(); if (Math.abs(pageRotation) == 90 || Math.abs(pageRotation) == 270) { w = p.getCropBox().getHeight(); h = p.getCropBox().getWidth(); } else { w = p.getCropBox().getWidth(); h = p.getCropBox().getHeight(); } return new Page(0, 0, w, h, pageRotation, pageNumber, p, pdfTextStripper.textElements, se.rulings, pdfTextStripper.minCharWidth, pdfTextStripper.minCharHeight, pdfTextStripper.spatialIndex); }
/** * Prints using a custom page size and custom margins. */ private static void printWithPaper(PDDocument document) throws IOException, PrinterException { PrinterJob job = PrinterJob.getPrinterJob(); job.setPageable(new PDFPageable(document)); // define custom paper Paper paper = new Paper(); paper.setSize(306, 396); // 1/72 inch paper.setImageableArea(0, 0, paper.getWidth(), paper.getHeight()); // no margins // custom page format PageFormat pageFormat = new PageFormat(); pageFormat.setPaper(paper); // override the page format Book book = new Book(); // append all pages book.append(new PDFPrintable(document), pageFormat, document.getNumberOfPages()); job.setPageable(book); job.print(); } }