org.apache.pdfbox.pdmodel.PDDocumentCatalog.getAllPages java code examples

 import org.apache.pdfbox.exceptions.COSVisitorException;
import org.apache.pdfbox.exceptions.CryptographyException;
import org.apache.pdfbox.exceptions.InvalidPasswordException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
import java.io.IOException;

public class Main {
  public static void main(String[] argv) throws COSVisitorException, InvalidPasswordException, CryptographyException, IOException {
    PDDocument document = PDDocument.load("input.pdf");

    if (document.isEncrypted()) {
      document.decrypt("");
    }

    PDDocumentCatalog catalog = document.getDocumentCatalog();
    for (Object pageObj :  catalog.getAllPages()) {
      PDPage page = (PDPage) pageObj;
      PDResources resources = page.findResources();
      resources.getImages().clear();
    }

    document.save("strippedOfImages.pdf");
  }
}

 PDDocumentCatalog catalog = doc.getDocumentCatalog();

int pageNumber = catalog.getAllPages().indexOf(yourField.getWidget().getPage());

public void insertHeadersAndFooters(XSSFSheet sheet, int fromPage, int toPage) throws Exception {
  List <?> pages = document.getDocumentCatalog().getAllPages();
  setSheet(sheet);
  for (int pageNumber = fromPage; pageNumber < toPage + 1; pageNumber++) {
      currentPage = pageNumber + 1;
      addHeaderFooter((PDPage) pages.get(pageNumber), pageNumber + 1);
    }
  }

PDPage page = (PDPage)pdf.getDocumentCatalog().getAllPages().get(0);
BufferedImage image = page.convertToImage();

private static String convertEncryptedPDFDocument(String url) throws IOException, TesseractException{
  int imageDPIValue = 300;
  PDDocument document = PDDocument.loadNonSeq(new File(url), null);
  @SuppressWarnings("unchecked")
  List<PDPage> pdPages = document.getDocumentCatalog().getAllPages();
  List<BufferedImage> imagesPages = new ArrayList<>();
  for (PDPage pdPage : pdPages){ 
    imagesPages.add(pdPage.convertToImage(BufferedImage.TYPE_INT_RGB, imageDPIValue));
  }
  document.close();
  Tesseract tessaract = TessaractManager.getInstance().getTessaract();
  List<IIOImage> pagesToOCR = new ArrayList<>();
  for(BufferedImage image :imagesPages){
    List<IIOImage> content = ImageIOHelper.getIIOImageList(image);
    pagesToOCR.addAll(content);
  }
  String originalText = tessaract.doOCR(pagesToOCR, null);
  return originalText;
}

List<PDPage> pages = document.getDocumentCatalog().getAllPages();

PDDocumentCatalog docCatalog = pdfDoc.getDocumentCatalog();
List<PDPage> pages = docCatalog.getAllPages();
Map<COSDictionary, Integer> pageNrByAnnotDict = new HashMap<COSDictionary, Integer>();
for (int i = 0; i < pages.size(); i++) {

List allPages = document.getDocumentCatalog().getAllPages();

 byte[] template = generateSimpleTemplate();
Files.write(new File(RESULT_FOLDER,  "template.pdf").toPath(), template);

try (   PDDocument finalDoc = new PDDocument(); )
{
  List<PDField> fields = new ArrayList<PDField>();
  int i = 0;

  for (String value : new String[]{"eins", "zwei"})
  {
    PDDocument doc = new PDDocument().load(new ByteArrayInputStream(template));
    PDDocumentCatalog docCatalog = doc.getDocumentCatalog();
    PDAcroForm acroForm = docCatalog.getAcroForm();
    PDField field = acroForm.getField("SampleField");
    field.setValue(value);
    field.setPartialName("SampleField" + i++);
    List<PDPage> pages = docCatalog.getAllPages();
    finalDoc.addPage(pages.get(0));
    fields.add(field);
  }

  PDAcroForm finalForm = new PDAcroForm(finalDoc);
  finalDoc.getDocumentCatalog().setAcroForm(finalForm);
  finalForm.setFields(fields);

  finalDoc.save(new File(RESULT_FOLDER, "form-two-templates.pdf"));
}

.getDocumentCatalog().getAllPages();

PDFileSpecification fspec;
List<LinkInfo> li = new ArrayList<>();
List allPages = pdfDoc.getDocumentCatalog().getAllPages();
for (Object pageObject : allPages) {
  PDPage page = (PDPage) pageObject;

List allPages = document.getDocumentCatalog().getAllPages();
int numPages = allPages.size();

try {
  stripper = new PDFTextStripperByArea();
  for (Object pageObject : pdfDoc.getDocumentCatalog().getAllPages()) {
    PDPage page = (PDPage) pageObject;
    List pageAnnotations = page.getAnnotations();

List pages = pdfDoc.getDocumentCatalog().getAllPages();

/**
 * Parses a document extracting the images
 *
 * @param filename PDF document path
 */
public void parse(String filename) throws IOException {
  PDDocument document = null;
  try {
    document = PDDocument.load(filename, false);
    List allPages = document.getDocumentCatalog().getAllPages();
    for( int i=0; i<allPages.size(); i++ ) {
      PDPage page = (PDPage)allPages.get( i );
      currentPage = i;
      processStream( page, page.findResources(), page.getContents().getStream() );
    }
  } finally {
    if (document != null) {
      document.close();
    }
  }
}

PDXObjectImage ximage = new PDPixelMap(pdfDoc, image);
for (PDPage page : (List<PDPage>)pdfDoc.getDocumentCatalog().getAllPages()) {
  PDRectangle pageSize = page.findMediaBox();
  PDResources resources = page.findResources();

/**
 * Parses a document extracting the colors for the specified words in
 * the constructor
 *
 * @param filename PDF document path
 */
public void parse (String filename) throws IOException {
  PDDocument document = null;
  try {
    document = PDDocument.load(filename, false);
    List allPages = document.getDocumentCatalog().getAllPages();
    for( int i=0; i<allPages.size(); i++ ) {
      PDPage page = (PDPage)allPages.get( i );
      PDStream contents = page.getContents();
      if (contents != null) {
        processStream( page, page.getResources(),
          page.getContents().getStream() );
      }
    }
  } finally {
    if (document != null) {
      document.close();
    }
  }
}

int[] rgb = PDFUtils.hex255ToRGB(properties.getHex255Color());
for (PDPage page : (List<PDPage>)pdfDoc.getDocumentCatalog().getAllPages()) {
  PDRectangle pageSize = page.findMediaBox();
  PDResources resources = page.findResources();

List allPages = docCatalog.getAllPages();
boolean gotMediaBox = false, gotCropBox = false;
for (Object pageObject : allPages) {

/**
 * Output a PDF with as many pages as there are interesting areas in the
 * input document
 */
@Override
public PDDocument extract() throws IOException {
  PDDocument extractedDocument = new PDDocument();
  extractedDocument.setDocumentInformation(sourceDocument.getDocumentInformation());
  extractedDocument.getDocumentCatalog().setViewerPreferences(
      sourceDocument.getDocumentCatalog().getViewerPreferences());
  @SuppressWarnings("unchecked")
  List<PDPage> pages = sourceDocument.getDocumentCatalog().getAllPages();
  int pageCounter = 1;
  for (PDPage page : pages) {
    if (pageCounter >= startPage && pageCounter <= endPage) {
      List<PDRectangle> zoomedFragments = getFragments(page);
      for (PDRectangle fragment : zoomedFragments) {
        PDPage outputPage = extractedDocument.importPage(page);
        outputPage.setCropBox(fragment);
        outputPage.setMediaBox(page.getMediaBox());
        outputPage.setResources(page.findResources());
        outputPage.setRotation(page.findRotation());
        // TODO: rotate the page in landscape mode is width > height
      }
    }
    pageCounter++;
  }
  return extractedDocument;
}

Popular methods of PDDocumentCatalog

getAcroForm
Get the documents AcroForm. This will return null if no AcroForm is part of the document.
getMetadata
Get the metadata that is part of the document catalog. This will return null if there is no meta dat
getCOSObject
Convert this standard java object to a COS object.
getDocumentOutline
Get the outline associated with this document or null if it does not exist.
getNames
getOpenAction
Get the Document Open Action for this object.
getPages
Returns all pages in the document, as a page tree.
getOCProperties
Get the optional content properties dictionary associated with this document.
setMetadata
Sets the metadata for this object. This can be null.
getLanguage
Returns the language for the document, or null.
getOutputIntents
Get the list of OutputIntents defined in the document.
getViewerPreferences
Get the viewer preferences associated with this document or null if they do not exist.

Popular in Java

Reactive rest calls using spring rest template
compareTo (BigDecimal)
notifyDataSetChanged (ArrayAdapter)
putExtra (Intent)
InputStreamReader (java.io)
A class for turning a byte stream into a character stream. Data read from the source input stream is
URI (java.net)
A Uniform Resource Identifier that identifies an abstract or physical resource, as specified by RFC
Semaphore (java.util.concurrent)
A counting semaphore. Conceptually, a semaphore maintains a set of permits. Each #acquire blocks if
ServletException (javax.servlet)
Defines a general exception a servlet can throw when it encounters difficulty.
Menu (java.awt)
Table (org.hibernate.mapping)
A relational table
Best IntelliJ plugins

How to use getAllPagesmethodin org.apache.pdfbox.pdmodel.PDDocumentCatalog

Best Java code snippets using org.apache.pdfbox.pdmodel.PDDocumentCatalog.getAllPages (Showing top 20 results out of 315)

How to use
getAllPages
method
in
org.apache.pdfbox.pdmodel.PDDocumentCatalog