public void parse( InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { XMLReaderUtils.parseSAX( new CloseShieldInputStream(stream), new OfflineContentHandler(handler), context); }
XMLReaderUtils.parseSAX( new CloseShieldInputStream(stream), new OfflineContentHandler( new XPSPageContentHandler(xhtml, embeddedImages) ),
@Before public void setUp() throws Exception { parser = SAXParserFactory.newInstance().newSAXParser(); offline = new OfflineContentHandler(new DefaultHandler()); }
/** * @since Apache Tika 0.9 */ public QName extractRootElement(InputStream stream) { ExtractorHandler handler = new ExtractorHandler(); try { XMLReaderUtils.parseSAX( new CloseShieldInputStream(stream), new OfflineContentHandler(handler), EMPTY_CONTEXT); } catch (Exception ignore) { } return handler.rootElement; }
private void onDocumentLoad(ParseContext parseContext, InputStream stream) throws TikaException, IOException, SAXException { XMLReaderUtils.parseSAX(stream, new OfflineContentHandler(new StylesStripper()), parseContext); }
void parseInternal( InputStream stream, final ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { DefaultHandler dh = new OpenDocumentElementMappingContentHandler(handler, MAPPINGS); XMLReaderUtils.parseSAX( new CloseShieldInputStream(stream), new OfflineContentHandler( new NSNormalizerContentHandler(dh)), context); }
private void handleDocumentRef(String docRef) throws SAXException { //docRef is a path to a FixedDocumentSequence document, // e.g. /Documents/1/FixedDoc.fdoc //relative root is /Documents/1 ..need this Pages... String relativeRoot = null; int i = docRef.lastIndexOf("/"); if (i > 0) { relativeRoot = docRef.substring(0, i); } else { relativeRoot = ""; } String zipPath = (docRef.startsWith("/") ? docRef.substring(1) : docRef); if (pkg instanceof ZipPackage) { try (InputStream stream = getZipStream(zipPath, pkg)) { XMLReaderUtils.parseSAX( new CloseShieldInputStream(stream), new OfflineContentHandler(new EmbeddedContentHandler( new PageContentPartHandler(relativeRoot, xhtml))), context); } catch (IOException | TikaException e) { throw new SAXException(new TikaException("IOException trying to read: " + docRef)); } } else { throw new SAXException(new TikaException("Package must be ZipPackage")); } }
new OfflineContentHandler(handler)); try { XMLReaderUtils.parseSAX(
private void extractHOCROutput(InputStream is, ParseContext parseContext, XHTMLContentHandler xhtml) throws TikaException, IOException, SAXException { if (parseContext == null) { parseContext = new ParseContext(); } xhtml.startElement("div", "class", "ocr"); XMLReaderUtils.parseSAX(is, new OfflineContentHandler(new HOCRPassThroughHandler(xhtml)), parseContext); xhtml.endElement("div"); }
private void handleDocuments(PackageRelationship packageRelationship, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException { try (InputStream stream = pkg.getPart(packageRelationship).getInputStream()) { XMLReaderUtils.parseSAX( new CloseShieldInputStream(stream), new OfflineContentHandler(new EmbeddedContentHandler( new FixedDocSeqHandler(xhtml))), context); } }
try (InputStream stream = relatedPartPart.getInputStream()) { XMLReaderUtils.parseSAX(stream, new OfflineContentHandler(new EmbeddedContentHandler(contentHandler)), context);
XMLReaderUtils.parseSAX( new CloseShieldInputStream(entryStream), new OfflineContentHandler(contentHandler), context );
@Override public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { //set OfficeParserConfig if the user hasn't specified one configure(context); final XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); try { XMLReaderUtils.parseSAX( new CloseShieldInputStream(stream), new OfflineContentHandler(new EmbeddedContentHandler( new Word2006MLDocHandler(xhtml, metadata, context))), context); } catch (SAXException e) { throw new TikaException("XML parse error", e); } xhtml.endDocument(); } }
@Override public void parse( InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { setContentType(metadata); final XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); TaggedContentHandler tagged = new TaggedContentHandler(xhtml); try { XMLReaderUtils.parseSAX( new CloseShieldInputStream(stream), new OfflineContentHandler(new EmbeddedContentHandler( getContentHandler(tagged, metadata, context))), context); } catch (SAXException e) { tagged.throwIfCauseOf(e); throw new TikaException("XML parse error", e); } finally { xhtml.endDocument(); } }
@Override public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { // TODO Auto-generated method stub final XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); xhtml.startElement("p"); TaggedContentHandler tagged = new TaggedContentHandler(handler); try { XMLReaderUtils.parseSAX( new CloseShieldInputStream(stream), new OfflineContentHandler(new EmbeddedContentHandler( getContentHandler(tagged, metadata, context))), context); } catch (SAXException e) { tagged.throwIfCauseOf(e); throw new TikaException("XML parse error", e); } finally { xhtml.endElement("p"); xhtml.endDocument(); } }
private void handlePart(PackagePart packagePart, XWPFStylesShim styles, XWPFListManager listManager, XHTMLContentHandler xhtml) throws IOException, SAXException { Map<String, String> linkedRelationships = loadLinkedRelationships(packagePart, true, metadata); try (InputStream stream = packagePart.getInputStream()) { XMLReaderUtils.parseSAX( new CloseShieldInputStream(stream), new OfflineContentHandler(new EmbeddedContentHandler( new OOXMLWordAndPowerPointTextHandler( new OOXMLTikaBodyPartHandler(xhtml, styles, listManager, config), linkedRelationships, config.getIncludeShapeBasedContent(), config.getConcatenatePhoneticRuns()))), context); } catch (TikaException|IOException e) { metadata.add(TikaCoreProperties.TIKA_META_EXCEPTION_WARNING, ExceptionUtils.getStackTrace(e)); } }
public void parse( InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { if (metadata.get(Metadata.CONTENT_TYPE) == null) { metadata.set(Metadata.CONTENT_TYPE, "application/xml"); } final XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); xhtml.startElement("p"); TaggedContentHandler tagged = new TaggedContentHandler(handler); try { XMLReaderUtils.parseSAX( new CloseShieldInputStream(stream), new OfflineContentHandler(new EmbeddedContentHandler( getContentHandler(tagged, metadata, context))), context); } catch (SAXException e) { tagged.throwIfCauseOf(e); throw new TikaException("XML parse error", e); } finally { xhtml.endElement("p"); xhtml.endDocument(); } }
XMLReaderUtils.parseSAX( new CloseShieldInputStream(stream), new OfflineContentHandler(new XSLFCommentAuthorHandler()), context);
public void parse( InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { SAXParser parser = context.getSAXParser(); parser.parse( new CloseShieldInputStream(stream), new OfflineContentHandler(handler)); }
public void parse( InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { XMLReaderUtils.parseSAX( new CloseShieldInputStream(stream), new OfflineContentHandler(handler), context); }