PDXObject xObject = resources.getXObject(xObjectNames.next()); if (xObject instanceof PDFormXObject)
@Override public void process(Operator operator, List<COSBase> arguments) throws IOException { if (arguments.size() < 1) { throw new MissingOperandException(operator, arguments); } COSBase base0 = arguments.get(0); if (!(base0 instanceof COSName)) { return; } COSName name = (COSName) base0; PDXObject xobject = context.getResources().getXObject(name); ((PDFMarkedContentExtractor) context).xobject(xobject); if (xobject instanceof PDTransparencyGroup) { context.showTransparencyGroup((PDTransparencyGroup) xobject); } else if (xobject instanceof PDFormXObject) { PDFormXObject form = (PDFormXObject) xobject; context.showForm(form); } }
try xObject = resources.getXObject(name);
@Override public void process(Operator operator, List<COSBase> arguments) throws IOException { if (arguments.size() < 1) { throw new MissingOperandException(operator, arguments); } COSBase base0 = arguments.get(0); if (!(base0 instanceof COSName)) { return; } COSName name = (COSName) base0; if (context.getResources().isImageXObject(name)) { // we're done here, don't decode images when doing text extraction return; } PDXObject xobject = context.getResources().getXObject(name); if (xobject instanceof PDTransparencyGroup) { context.showTransparencyGroup((PDTransparencyGroup) xobject); } else if (xobject instanceof PDFormXObject) { PDFormXObject form = (PDFormXObject) xobject; context.showForm(form); } }
object = resources.getXObject(name); } catch (MissingImageReaderException e) { EmbeddedDocumentUtil.recordException(e, metadata);
PDXObject xobject = getResources().getXObject( objectName ); if( xobject instanceof PDImageXObject)
private static void processResources(PDResources resources) throws IOException { for (COSName name : resources.getXObjectNames()) { PDXObject xobject = resources.getXObject(name); if (xobject instanceof PDFormXObject) { PDFormXObject formXObject = (PDFormXObject) xobject; writeTokensToStream(formXObject.getContentStream(), createTokensWithoutText(formXObject)); processResources(formXObject.getResources()); } } for (COSName name : resources.getPatternNames()) { PDAbstractPattern pattern = resources.getPattern(name); if (pattern instanceof PDTilingPattern) { PDTilingPattern tilingPattern = (PDTilingPattern) pattern; writeTokensToStream(tilingPattern.getContentStream(), createTokensWithoutText(tilingPattern)); processResources(tilingPattern.getResources()); } } }
PDXObject xobject = context.getResources().getXObject(objectName);
PDXObject xobject = resources.getXObject(name); if (xobject instanceof PDFormXObject)
public PDXObject getXObject(COSName name) throws IOException { PDXObject object = this.currentResources.getXObject(name); if (object != null) { return object; } object = this.inheritedResources.getXObject(name); if (object != null) { object.setInherited(true); return object; } return null; }
private static void parseResourcesXObjects(PDResources resources) { for (COSName name : resources.getXObjectNames()) { try { PDXObject xObject = resources.getXObject(name); processXObject(xObject); } catch (IOException e) { LOGGER.error("Can not obtain xobject from resources", e); } } }
protected void processImageOperation(List<COSBase> arguments) throws IOException { COSName objectName = (COSName)arguments.get( 0 ); PDXObject xobject = getResources().getXObject( objectName ); if (xobject instanceof PDImageXObject) { PDImageXObject pdfImage = (PDImageXObject) xobject; BufferedImage outputImage = pdfImage.getImage(); outputImage = rotateImage(outputImage); ImageResource imageData = new ImageResource(getTitle(), outputImage); Rectangle2D bounds = calculateImagePosition(pdfImage); float x = (float) bounds.getX(); float y = (float) bounds.getY(); renderImage(x, y, (float) bounds.getWidth(), (float) bounds.getHeight(), imageData); } }
public static void testPDFBoxExtractImages() throws Exception { PDDocument document = PDDocument.load(new File("D:/Temp/Test.pdf")); PDPageTree list = document.getPages(); for (PDPage page : list) { PDResources pdResources = page.getResources(); for (COSName c : pdResources.getXObjectNames()) { PDXObject o = pdResources.getXObject(c); if (o instanceof org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject) { File file = new File("D:/Temp/" + System.nanoTime() + ".png"); ImageIO.write(((org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject)o).getImage(), "png", file); } } } }
@Override public void process(Operator operator, List<COSBase> arguments) throws IOException { if (arguments.size() < 1) { throw new MissingOperandException(operator, arguments); } COSBase base0 = arguments.get(0); if (!(base0 instanceof COSName)) { return; } COSName name = (COSName) base0; PDXObject xobject = context.getResources().getXObject(name); ((PDFMarkedContentExtractor) context).xobject(xobject); if (xobject instanceof PDTransparencyGroup) { context.showTransparencyGroup((PDTransparencyGroup) xobject); } else if (xobject instanceof PDFormXObject) { PDFormXObject form = (PDFormXObject) xobject; context.showForm(form); } }
public List<RenderedImage> getImagesFromPDF(PDDocument document) throws IOException { List<RenderedImage> images = new ArrayList<>(); for (PDPage page : document.getPages()) { images.addAll(getImagesFromResources(page.getResources())); } return images; } private List<RenderedImage> getImagesFromResources(PDResources resources) throws IOException { List<RenderedImage> images = new ArrayList<>(); for (COSName xObjectName : resources.getXObjectNames()) { PDXObject xObject = resources.getXObject(xObjectName); if (xObject instanceof PDFormXObject) { images.addAll(getImagesFromResources(((PDFormXObject) xObject).getResources())); } else if (xObject instanceof PDImageXObject) { images.add(((PDImageXObject) xObject).getImage()); } } return images; }
@Override public void process(Operator operator, List<COSBase> arguments) throws IOException { if (arguments.size() < 1) { throw new MissingOperandException(operator, arguments); } COSBase base0 = arguments.get(0); if (!(base0 instanceof COSName)) { return; } COSName name = (COSName) base0; PDXObject xobject = context.getResources().getXObject(name); ((PDFMarkedContentExtractor) context).xobject(xobject); if (xobject instanceof PDTransparencyGroup) { context.showTransparencyGroup((PDTransparencyGroup) xobject); } else if (xobject instanceof PDFormXObject) { PDFormXObject form = (PDFormXObject) xobject; context.showForm(form); } }
private void extractFontResources(PDResources resources) throws IOException { for (COSName key : resources.getFontNames()) { PDFont font = resources.getFont(key); extractStrategy.extract(font); } for (COSName name : resources.getXObjectNames()) { PDXObject xobject = resources.getXObject(name); if (xobject instanceof PDFormXObject) { PDFormXObject xObjectForm = (PDFormXObject) xobject; PDResources formResources = xObjectForm.getResources(); if (formResources != null) extractFontResources(formResources); } } }
@Override public void process(Operator operator, List<COSBase> arguments) throws IOException { if (arguments.size() < 1) { throw new MissingOperandException(operator, arguments); } COSBase base0 = arguments.get(0); if (!(base0 instanceof COSName)) { return; } COSName name = (COSName) base0; if (context.getResources().isImageXObject(name)) { // we're done here, don't decode images when doing text extraction return; } PDXObject xobject = context.getResources().getXObject(name); if (xobject instanceof PDTransparencyGroup) { context.showTransparencyGroup((PDTransparencyGroup) xobject); } else if (xobject instanceof PDFormXObject) { PDFormXObject form = (PDFormXObject) xobject; context.showForm(form); } }
private static void processResources(PDResources resources) throws IOException { for (COSName name : resources.getXObjectNames()) { PDXObject xobject = resources.getXObject(name); if (xobject instanceof PDFormXObject) { PDFormXObject formXObject = (PDFormXObject) xobject; writeTokensToStream(formXObject.getContentStream(), createTokensWithoutText(formXObject)); processResources(formXObject.getResources()); } } for (COSName name : resources.getPatternNames()) { PDAbstractPattern pattern = resources.getPattern(name); if (pattern instanceof PDTilingPattern) { PDTilingPattern tilingPattern = (PDTilingPattern) pattern; writeTokensToStream(tilingPattern.getContentStream(), createTokensWithoutText(tilingPattern)); processResources(tilingPattern.getResources()); } } }
@Override protected void processOperator(Operator operator, List<COSBase> operands) throws IOException { String operation = operator.getName(); if ("Do".equals(operation)) { COSName objectName = (COSName)operands.get(0); PDXObject xobject = getResources().getXObject(objectName); if (xobject instanceof PDImageXObject) { PDImageXObject image = (PDImageXObject)xobject; Matrix ctmNew = getGraphicsState().getCurrentTransformationMatrix(); PDRectangle pageRect = this.getCurrentPage().getCropBox(); float w = ctmNew.getScalingFactorX(); float h = ctmNew.getScalingFactorY(); float x = ctmNew.getTranslateX(); float y = pageRect.getHeight() - ctmNew.getTranslateY() - h; buffer.add(new ImageOperator(x, y, w, h)); } else if (xobject instanceof PDFormXObject) { PDFormXObject form = (PDFormXObject)xobject; showForm(form); } } else { super.processOperator(operator, operands); } } }