import org.apache.pdfbox.exceptions.COSVisitorException; import org.apache.pdfbox.exceptions.CryptographyException; import org.apache.pdfbox.exceptions.InvalidPasswordException; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocumentCatalog; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDResources; import java.io.IOException; public class Main { public static void main(String[] argv) throws COSVisitorException, InvalidPasswordException, CryptographyException, IOException { PDDocument document = PDDocument.load("input.pdf"); if (document.isEncrypted()) { document.decrypt(""); } PDDocumentCatalog catalog = document.getDocumentCatalog(); for (Object pageObj : catalog.getAllPages()) { PDPage page = (PDPage) pageObj; PDResources resources = page.findResources(); resources.getImages().clear(); } document.save("strippedOfImages.pdf"); } }
PDDocumentCatalog catalog = doc.getDocumentCatalog(); int pageNumber = catalog.getAllPages().indexOf(yourField.getWidget().getPage());
public void insertHeadersAndFooters(XSSFSheet sheet, int fromPage, int toPage) throws Exception { List <?> pages = document.getDocumentCatalog().getAllPages(); setSheet(sheet); for (int pageNumber = fromPage; pageNumber < toPage + 1; pageNumber++) { currentPage = pageNumber + 1; addHeaderFooter((PDPage) pages.get(pageNumber), pageNumber + 1); } }
PDPage page = (PDPage)pdf.getDocumentCatalog().getAllPages().get(0); BufferedImage image = page.convertToImage();
private static String convertEncryptedPDFDocument(String url) throws IOException, TesseractException{ int imageDPIValue = 300; PDDocument document = PDDocument.loadNonSeq(new File(url), null); @SuppressWarnings("unchecked") List<PDPage> pdPages = document.getDocumentCatalog().getAllPages(); List<BufferedImage> imagesPages = new ArrayList<>(); for (PDPage pdPage : pdPages){ imagesPages.add(pdPage.convertToImage(BufferedImage.TYPE_INT_RGB, imageDPIValue)); } document.close(); Tesseract tessaract = TessaractManager.getInstance().getTessaract(); List<IIOImage> pagesToOCR = new ArrayList<>(); for(BufferedImage image :imagesPages){ List<IIOImage> content = ImageIOHelper.getIIOImageList(image); pagesToOCR.addAll(content); } String originalText = tessaract.doOCR(pagesToOCR, null); return originalText; }
List<PDPage> pages = document.getDocumentCatalog().getAllPages();
PDDocumentCatalog docCatalog = pdfDoc.getDocumentCatalog(); List<PDPage> pages = docCatalog.getAllPages(); Map<COSDictionary, Integer> pageNrByAnnotDict = new HashMap<COSDictionary, Integer>(); for (int i = 0; i < pages.size(); i++) {
List allPages = document.getDocumentCatalog().getAllPages();
byte[] template = generateSimpleTemplate(); Files.write(new File(RESULT_FOLDER, "template.pdf").toPath(), template); try ( PDDocument finalDoc = new PDDocument(); ) { List<PDField> fields = new ArrayList<PDField>(); int i = 0; for (String value : new String[]{"eins", "zwei"}) { PDDocument doc = new PDDocument().load(new ByteArrayInputStream(template)); PDDocumentCatalog docCatalog = doc.getDocumentCatalog(); PDAcroForm acroForm = docCatalog.getAcroForm(); PDField field = acroForm.getField("SampleField"); field.setValue(value); field.setPartialName("SampleField" + i++); List<PDPage> pages = docCatalog.getAllPages(); finalDoc.addPage(pages.get(0)); fields.add(field); } PDAcroForm finalForm = new PDAcroForm(finalDoc); finalDoc.getDocumentCatalog().setAcroForm(finalForm); finalForm.setFields(fields); finalDoc.save(new File(RESULT_FOLDER, "form-two-templates.pdf")); }
.getDocumentCatalog().getAllPages();
PDFileSpecification fspec; List<LinkInfo> li = new ArrayList<>(); List allPages = pdfDoc.getDocumentCatalog().getAllPages(); for (Object pageObject : allPages) { PDPage page = (PDPage) pageObject;
List allPages = document.getDocumentCatalog().getAllPages(); int numPages = allPages.size();
try { stripper = new PDFTextStripperByArea(); for (Object pageObject : pdfDoc.getDocumentCatalog().getAllPages()) { PDPage page = (PDPage) pageObject; List pageAnnotations = page.getAnnotations();
List pages = pdfDoc.getDocumentCatalog().getAllPages();
/** * Parses a document extracting the images * * @param filename PDF document path */ public void parse(String filename) throws IOException { PDDocument document = null; try { document = PDDocument.load(filename, false); List allPages = document.getDocumentCatalog().getAllPages(); for( int i=0; i<allPages.size(); i++ ) { PDPage page = (PDPage)allPages.get( i ); currentPage = i; processStream( page, page.findResources(), page.getContents().getStream() ); } } finally { if (document != null) { document.close(); } } }
PDXObjectImage ximage = new PDPixelMap(pdfDoc, image); for (PDPage page : (List<PDPage>)pdfDoc.getDocumentCatalog().getAllPages()) { PDRectangle pageSize = page.findMediaBox(); PDResources resources = page.findResources();
/** * Parses a document extracting the colors for the specified words in * the constructor * * @param filename PDF document path */ public void parse (String filename) throws IOException { PDDocument document = null; try { document = PDDocument.load(filename, false); List allPages = document.getDocumentCatalog().getAllPages(); for( int i=0; i<allPages.size(); i++ ) { PDPage page = (PDPage)allPages.get( i ); PDStream contents = page.getContents(); if (contents != null) { processStream( page, page.getResources(), page.getContents().getStream() ); } } } finally { if (document != null) { document.close(); } } }
int[] rgb = PDFUtils.hex255ToRGB(properties.getHex255Color()); for (PDPage page : (List<PDPage>)pdfDoc.getDocumentCatalog().getAllPages()) { PDRectangle pageSize = page.findMediaBox(); PDResources resources = page.findResources();
List allPages = docCatalog.getAllPages(); boolean gotMediaBox = false, gotCropBox = false; for (Object pageObject : allPages) {
/** * Output a PDF with as many pages as there are interesting areas in the * input document */ @Override public PDDocument extract() throws IOException { PDDocument extractedDocument = new PDDocument(); extractedDocument.setDocumentInformation(sourceDocument.getDocumentInformation()); extractedDocument.getDocumentCatalog().setViewerPreferences( sourceDocument.getDocumentCatalog().getViewerPreferences()); @SuppressWarnings("unchecked") List<PDPage> pages = sourceDocument.getDocumentCatalog().getAllPages(); int pageCounter = 1; for (PDPage page : pages) { if (pageCounter >= startPage && pageCounter <= endPage) { List<PDRectangle> zoomedFragments = getFragments(page); for (PDRectangle fragment : zoomedFragments) { PDPage outputPage = extractedDocument.importPage(page); outputPage.setCropBox(fragment); outputPage.setMediaBox(page.getMediaBox()); outputPage.setResources(page.findResources()); outputPage.setRotation(page.findRotation()); // TODO: rotate the page in landscape mode is width > height } } pageCounter++; } return extractedDocument; }