private boolean shouldHandleXFAOnly(PDDocument pdDocument, PDFParserConfig config) { if (config.getIfXFAExtractOnlyXFA() && pdDocument.getDocumentCatalog() != null && pdDocument.getDocumentCatalog().getAcroForm() != null && pdDocument.getDocumentCatalog().getAcroForm().getXFA() != null) { return true; } return false; }
private void handleXFAOnly(PDDocument pdDocument, ContentHandler handler, Metadata metadata, ParseContext context) throws SAXException, IOException, TikaException { XFAExtractor ex = new XFAExtractor(); XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); try (InputStream is = new ByteArrayInputStream( pdDocument.getDocumentCatalog().getAcroForm().getXFA().getBytes())) { ex.extract(is, xhtml, metadata, context); } catch (XMLStreamException e) { throw new TikaException("XML error in XFA", e); } xhtml.endDocument(); }
PDXFAResource pdxfa = form.getXFA();
PDDocument doc = PDDocument.load("filename"); doc.setAllSecurityToBeRemoved(true); PDDocumentCatalog docCatalog = doc.getDocumentCatalog(); PDAcroForm form = docCatalog.getAcroForm(); PDXFA xfa = form.getXFA(); COSBase cos = xfa.getCOSObject(); COSStream coss = (COSStream) cos; InputStream cosin = coss.getUnfilteredStream(); Document document = documentBuilder.parse(cosin); COSStream cosout = new COSStream(new RandomAccessBuffer()); OutputStream out = cosout.createUnfilteredStream(); TransformerFactory tFactory = TransformerFactory.newInstance(); Transformer transformer = tFactory.newTransformer(); DOMSource source = new DOMSource(xmlDoc); StreamResult result = new StreamResult(out); transformer.transform(source, result); PDXFA xfaout = new PDXFA(cosout); form.setXFA(xfaout);
private boolean shouldHandleXFAOnly(PDDocument pdDocument, PDFParserConfig config) { if (config.getIfXFAExtractOnlyXFA() && pdDocument.getDocumentCatalog() != null && pdDocument.getDocumentCatalog().getAcroForm() != null && pdDocument.getDocumentCatalog().getAcroForm().getXFA() != null) { return true; } return false; }
private boolean shouldHandleXFAOnly(PDDocument pdDocument, PDFParserConfig config) { if (config.getIfXFAExtractOnlyXFA() && pdDocument.getDocumentCatalog() != null && pdDocument.getDocumentCatalog().getAcroForm() != null && pdDocument.getDocumentCatalog().getAcroForm().getXFA() != null) { return true; } return false; }
private boolean shouldHandleXFAOnly(PDDocument pdDocument, PDFPureJavaParserConfig config) { if (config.getIfXFAExtractOnlyXFA() && pdDocument.getDocumentCatalog() != null && pdDocument.getDocumentCatalog().getAcroForm() != null && pdDocument.getDocumentCatalog().getAcroForm().getXFA() != null) { return true; } return false; }
private void handleXFAOnly(PDDocument pdDocument, ContentHandler handler, Metadata metadata, ParseContext context) throws SAXException, IOException, TikaException { XFAExtractor ex = new XFAExtractor(); XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); try (InputStream is = new ByteArrayInputStream( pdDocument.getDocumentCatalog().getAcroForm().getXFA().getBytes())) { ex.extract(is, xhtml, metadata, context); } catch (XMLStreamException e) { throw new TikaException("XML error in XFA", e); } xhtml.endDocument(); }
private void handleXFAOnly(PDDocument pdDocument, ContentHandler handler, Metadata metadata, ParseContext context) throws SAXException, IOException, TikaException { XFAExtractor ex = new XFAExtractor(); XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); try (InputStream is = new ByteArrayInputStream( pdDocument.getDocumentCatalog().getAcroForm().getXFA().getBytes())) { ex.extract(is, xhtml, metadata, context); } catch (XMLStreamException e) { throw new TikaException("XML error in XFA", e); } xhtml.endDocument(); }
PDXFAResource pdxfa = form.getXFA();
private void handleXFAOnly(PDDocument pdDocument, ContentHandler handler, Metadata metadata, ParseContext context) throws SAXException, IOException, TikaException { XFAExtractor ex = new XFAExtractor(); XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); try (InputStream is = new ByteArrayInputStream( pdDocument.getDocumentCatalog().getAcroForm().getXFA().getBytes())) { ex.extract(is, xhtml, metadata, context); } catch (XMLStreamException e) { throw new TikaException("XML error in XFA", e); } xhtml.endDocument(); }
PDXFAResource pdxfa = form.getXFA();
PDXFAResource pdxfa = form.getXFA();