org.apache.tika.parser.ParseContext.get java code examples

/**
 * Returns the parser instance to which parsing tasks should be delegated.
 * The default implementation looks up the delegate parser from the given
 * parse context, and uses an {@link EmptyParser} instance as a fallback.
 * Subclasses can override this method to implement alternative delegation
 * strategies.
 *
 * @since Apache Tika 0.7
 * @param context parse context
 * @return delegate parser
 */
protected Parser getDelegateParser(ParseContext context) {
  return context.get(Parser.class, EmptyParser.INSTANCE);
}

public PasswordProvider getPasswordProvider() {
  return context.get(PasswordProvider.class);
}

/**
 * Returns the object in this context that implements the given interface,
 * or the given default value if such an object is not found.
 *
 * @param key the interface implemented by the requested object
 * @param defaultValue value to return if the requested object is not found
 * @return the object that implements the given interface,
 *         or the given default value if not found
 */
public <T> T get(Class<T> key, T defaultValue) {
  T value = get(key);
  if (value != null) {
    return value;
  } else {
    return defaultValue;
  }
}

@Override
public Set<MediaType> getSupportedTypes(ParseContext context) {
  // If Tesseract is installed, offer our supported image types
  TesseractOCRConfig config = context.get(TesseractOCRConfig.class, defaultConfig);
  if (hasTesseract(config)) {
    return SUPPORTED_TYPES;
  }
  // Otherwise don't advertise anything, so the other image parsers
  //  can be selected instead
  return Collections.emptySet();
}

protected AbstractPOIFSExtractor(ParseContext context, Metadata parentMetadata) {
  embeddedDocumentUtil = new EmbeddedDocumentUtil(context);
  this.passwordProvider = context.get(PasswordProvider.class);
  this.officeParserConfig = context.get(OfficeParserConfig.class, new OfficeParserConfig());
  this.parentMetadata = parentMetadata;
  this.context = context;
}

public AbstractOOXMLExtractor(ParseContext context, POIXMLTextExtractor extractor) {
  this.context = context;
  this.extractor = extractor;
  embeddedExtractor = EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context);
  // This has already been set by OOXMLParser's call to configure()
  // We can rely on this being non-null.
  this.config = context.get(OfficeParserConfig.class);
}

/**
 * Checks to see if the user has specified an {@link OfficeParserConfig}.
 * If so, no changes are made; if not, one is added to the context.
 *
 * @param parseContext
 */
public void configure(ParseContext parseContext) {
  OfficeParserConfig officeParserConfig = parseContext.get(OfficeParserConfig.class, defaultOfficeParserConfig);
  parseContext.set(OfficeParserConfig.class, officeParserConfig);
}

/**
 * Look for an EncodingDetetor in the ParseContext.  If it hasn't been
 * passed in, use the original EncodingDetector from initialization.
 *
 * @param parseContext
 * @return
 */
protected EncodingDetector getEncodingDetector(ParseContext parseContext) {
  EncodingDetector fromParseContext = parseContext.get(EncodingDetector.class);
  if (fromParseContext != null) {
    return fromParseContext;
  }
  return getEncodingDetector();
}

public boolean shouldParseEmbedded(Metadata metadata) {
  DocumentSelector selector = context.get(DocumentSelector.class);
  if (selector != null) {
    return selector.select(metadata);
  }
  FilenameFilter filter = context.get(FilenameFilter.class);
  if (filter != null) {
    String name = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
    if (name != null) {
      return filter.accept(ABSTRACT_PATH, name);
    }
  }
  return true;
}

/**
 * @return Returns a {@link TikaConfig} -- trying to find it first in the ParseContext
 * that was included during initialization, and then creating a new one from
 * via {@link TikaConfig#getDefaultConfig()} if it can't find one in the
 * ParseContext. This caches the default config so that it only has to be created once.
 */
public TikaConfig getTikaConfig() {
  //be as lazy as possible and cache the TikaConfig
  if (tikaConfig == null) {
    tikaConfig = context.get(TikaConfig.class);
    if (tikaConfig == null) {
      tikaConfig = TikaConfig.getDefaultConfig();
    }
  }
  return tikaConfig;
}

/**
 * @return Returns a {@link TikaConfig} -- trying to find it first in the ParseContext
 * that was included in the initialization, and then creating a new one from
 * via {@link TikaConfig#getDefaultConfig()} if it can't find one in the
 * ParseContext.
 *
 * @deprecated as of 1.17, use {@link #getTikaConfig()} instead
 */
@Deprecated
public TikaConfig getConfig() {
  TikaConfig config = context.get(TikaConfig.class);
  if (config == null) {
    config = TikaConfig.getDefaultConfig();
  }
  return config;
}

public Detector getDetector() {
  //be as lazy as possible and cache
  Detector localDetector = context.get(Detector.class);
  if (localDetector != null) {
    return localDetector;
  }
  if (detector != null) {
    return detector;
  }
  detector = getTikaConfig().getDetector();
  return detector;
}

public MimeTypes getMimeTypes() {
  MimeTypes localMimeTypes = context.get(MimeTypes.class);
  //be as lazy as possible and cache the mimeTypes
  if (localMimeTypes != null) {
    return localMimeTypes;
  }
  if (mimeTypes != null) {
    return mimeTypes;
  }
  mimeTypes = getTikaConfig().getMimeRepository();
  return mimeTypes;
}

private void extractInlineImagesFromPDFs() {
  if (configFilePath == null && context.get(PDFParserConfig.class) == null) {
    PDFParserConfig pdfParserConfig = new PDFParserConfig();
    pdfParserConfig.setExtractInlineImages(true);
    String warn = "As a convenience, TikaCLI has turned on extraction of\n" +
        "inline images for the PDFParser (TIKA-2374).\n" +
        "Aside from the -z option, this is not the default behavior\n"+
        "in Tika generally or in tika-server.";
    LOG.info(warn);
    context.set(PDFParserConfig.class, pdfParserConfig);
  }
}

@Override
protected void parserPrepare(Parser parser, Metadata metadata,
    ParseContext context) {
  super.parserPrepare(parser, metadata, context);
  
  // Specify which charset to try
  String charset = context.get(CharsetTester.class).getNextCharset();
  Charset charsetCS = Charset.forName(charset);
  context.set(EncodingDetector.class, 
        new NonDetectingEncodingDetector(charsetCS));
}

@Override
public void parse(InputStream stream, ContentHandler handler,
    Metadata metadata, ParseContext context) throws IOException,
    SAXException, TikaException {
  CTAKESConfig config = context.get(CTAKESConfig.class,
      new CTAKESConfig());
  CTAKESContentHandler ctakesHandler = new CTAKESContentHandler(handler,
      metadata, config);
  super.parse(stream, ctakesHandler, metadata, context);
}

public OutlookExtractor(DirectoryNode root, ParseContext context) throws TikaException {
  super(context);
  this.parseContext = context;
  this.extractAllAlternatives = context.get(OfficeParserConfig.class).getExtractAllAlternativesFromMSG();
  try {
    this.msg = new MAPIMessage(root);
  } catch (IOException e) {
    throw new TikaException("Failed to parse Outlook message", e);
  }
}

protected EmbeddedDocumentExtractor getEmbeddedDocumentExtractor(ParseContext context) {
  EmbeddedDocumentExtractor extractor =
      context.get(EmbeddedDocumentExtractor.class);
  if (extractor == null) {
    Parser p = context.get(Parser.class);
    if (p == null) {
      context.set(Parser.class, new MockParser());
    }
    extractor = new ParsingEmbeddedDocumentExtractor(context);
  }
  return extractor;
}

void handleMacros(PackagePart macroPart, ContentHandler handler) throws TikaException, SAXException {
  OfficeParserConfig officeParserConfig = context.get(OfficeParserConfig.class);
  if (officeParserConfig.getExtractMacros()) {
    try (InputStream is = macroPart.getInputStream()) {
      try (POIFSFileSystem poifs = new POIFSFileSystem(is)) {
        //Macro reading exceptions are already swallowed here
        OfficeParser.extractMacros(poifs, handler, embeddedExtractor);
      }
    } catch (IOException e) {
      throw new TikaException("Broken OOXML file", e);
    }
  }
}

public static void parseInvestigation(InputStream stream, XHTMLContentHandler handler, Metadata metadata, ParseContext context, String studyFileName) throws IOException, TikaException, SAXException {
  TikaConfig tikaConfig = context.get(TikaConfig.class);
  if (tikaConfig == null) {
    tikaConfig = TikaConfig.getDefaultConfig();
  }
  // Automatically detect the character encoding
  try (AutoDetectReader reader = new AutoDetectReader(new CloseShieldInputStream(stream),
      metadata, tikaConfig.getEncodingDetector())) {
    extractMetadata(reader, metadata, studyFileName);
  }
}

Javadoc

Returns the object in this context that implements the given interface.

Popular methods of ParseContext

<init>
set
Adds the given value to the context as an implementation of the given interface.
getDocumentBuilder
Returns the DOM builder specified in this parsing context. If a builder is not explicitly specified,
getSAXParser
Returns the SAX parser specified in this parsing context. If a parser is not explicitly specified, t
getXMLInputFactory
Returns the StAX input factory specified in this parsing context. If a factory is not explicitly spe
getDocumentBuilderFactory
Returns the DOM builder factory specified in this parsing context. If a factory is not explicitly sp
getSAXParserFactory
Returns the SAX parser factory specified in this parsing context. If a factory is not explicitly spe
getXMLReader
Returns the XMLReader specified in this parsing context. If a reader is not explicitly specified, th
setVersion
tryToSetSAXFeatureOnDOMFactory
tryToSetStaxProperty
tryToSetXercesManager

Popular in Java

Making http post requests using okhttp
notifyDataSetChanged (ArrayAdapter)
onCreateOptionsMenu (Activity)
startActivity (Activity)
Thread (java.lang)
A thread is a thread of execution in a program. The Java Virtual Machine allows an application to ha
Path (java.nio.file)
FlowLayout (java.awt)
A flow layout arranges components in a left-to-right flow, much like lines of text in a paragraph. F
Kernel (java.awt.image)
JCheckBox (javax.swing)
JTextField (javax.swing)
From CI to AI: The AI layer in your organization

How to use getmethodin org.apache.tika.parser.ParseContext

Best Java code snippets using org.apache.tika.parser.ParseContext.get (Showing top 20 results out of 315)

How to use
get
method
in
org.apache.tika.parser.ParseContext