/** * If you don't want content from embedded documents, send in * a {@link org.apache.tika.parser.ParseContext} that does contains a * {@link EmptyParser}. * * @return The content of a file. */ public String parseNoEmbeddedExample() throws IOException, SAXException, TikaException { AutoDetectParser parser = new AutoDetectParser(); BodyContentHandler handler = new BodyContentHandler(); Metadata metadata = new Metadata(); ParseContext parseContext = new ParseContext(); parseContext.set(Parser.class, new EmptyParser()); try (InputStream stream = ParsingExample.class.getResourceAsStream("test_recursive_embedded.docx")) { parser.parse(stream, handler, metadata, parseContext); return handler.toString(); } }
EmptyParser.INSTANCE.parse(stream, baseHandler, metadata, context); return;
EmptyParser pNothing = new EmptyParser();
if (type == null || OOXMLParser.UNSUPPORTED_OOXML_TYPES.contains(type)) { EmptyParser.INSTANCE.parse(stream, baseHandler, metadata, context); return;
DummyParser pContent = new DummyParser(onlyOct, new HashMap<String,String>(), "Fell back!"); EmptyParser pNothing = new EmptyParser();
EmptyParser.INSTANCE.parse(stream, baseHandler, metadata, context); return;
DummyParser pContent1 = new DummyParser(onlyOct, m1, "Fell back 1!"); DummyParser pContent2 = new DummyParser(onlyOct, m2, "Fell back 2!"); EmptyParser pNothing = new EmptyParser();
final EmptyParser emptyParser = new EmptyParser();
@Override protected ParseContext buildParseContext(Metadata metadata, String targetMimeType, TransformationOptions options) { ParseContext context = super.buildParseContext(metadata, targetMimeType, options); boolean recurse = includeContents; if(options.getIncludeEmbedded() != null) { recurse = options.getIncludeEmbedded(); } if(recurse) { // Use an auto detect parser to handle the contents if(tikaConfig == null) { tikaConfig = TikaConfig.getDefaultConfig(); } context.set(Parser.class, new AutoDetectParser(tikaConfig)); } else { // REPO-1066: an AutoDetectParser is the default in Tika after: https://issues.apache.org/jira/browse/TIKA-2096 // so we need to specify an empty one if we don't want the recurse parsing to happen context.set(Parser.class, new EmptyParser()); } return context; }
@Override protected ParseContext buildParseContext(Metadata metadata, String targetMimeType, TransformationOptions options) { ParseContext context = super.buildParseContext(metadata, targetMimeType, options); boolean recurse = includeContents; if(options.getIncludeEmbedded() != null) { recurse = options.getIncludeEmbedded(); } if(recurse) { // Use an auto detect parser to handle the contents if(tikaConfig == null) { tikaConfig = TikaConfig.getDefaultConfig(); } context.set(Parser.class, new AutoDetectParser(tikaConfig)); } else { // REPO-1066: an AutoDetectParser is the default in Tika after: https://issues.apache.org/jira/browse/TIKA-2096 // so we need to specify an empty one if we don't want the recurse parsing to happen context.set(Parser.class, new EmptyParser()); } return context; }