protected void initDefaultParsers() { // Fallback parser fallbackParser = new FallbackParser(); //TODO delete when released in Tika: //https://issues.apache.org/jira/browse/TIKA-2222 // PureEdge XFDL parsers.put( ContentType.valueOf("application/vnd.xfdl"), new XFDLParser()); }
/** * Detects the content type from the given input stream. * @param content the content on which to detect content type * @return the detected content type * @throws IOException problem detecting content type */ public ContentType detect(InputStream content) throws IOException { Tika tika = new Tika(); String contentType = tika.detect(content); if (LOG.isDebugEnabled()) { LOG.debug("Detected \"" + contentType + "\" content-type for input stream."); } return ContentType.valueOf(contentType); } /**
ContentType.valueOf(cmd.getOptionValue(ARG_CONTENTTYPE)); String contentEncoding = cmd.getOptionValue(ARG_CONTENTENCODING); String output = cmd.getOptionValue(ARG_OUTPUTFILE);
private ContentType doDetect( InputStream is, String fileName) throws IOException { try (TikaInputStream tikaStream = TikaInputStream.get(is)) { Metadata meta = new Metadata(); String extension = extPattern.matcher(fileName).replaceFirst("$1"); meta.set(Metadata.RESOURCE_NAME_KEY, "file:///detect" + extension); MediaType media = getTikaConfig().getDetector().detect(tikaStream, meta); if (LOG.isDebugEnabled()) { LOG.debug("Detected \"" + media.toString() + "\" content-type for: " + fileName); } return ContentType.valueOf(media.toString()); } } }
@Override public ICrawlData toCrawlData(String table, ResultSet rs) throws SQLException { if (rs == null) { return null; } BaseCrawlData data = new BaseCrawlData(); data.setReference(rs.getString("reference")); data.setParentRootReference(rs.getString("parentRootReference")); data.setRootParentReference(rs.getBoolean("isRootParentReference")); data.setState(CrawlState.valueOf(rs.getString("state"))); data.setMetaChecksum(rs.getString("metaChecksum")); data.setContentChecksum(rs.getString("contentChecksum")); String contentType = rs.getString("contentType"); if (StringUtils.isNoneBlank(contentType)) { data.setContentType(ContentType.valueOf(contentType)); } long crawlDate = rs.getLong("crawlDate"); if (crawlDate > 0) { data.setCrawlDate(new Date(crawlDate)); } return data; } }
ContentType ct = ContentType.valueOf(name); if (ct != null) { embedMeta.setEmbeddedType("file-object");
ImporterMetadata.DOC_CONTENT_TYPE); if (StringUtils.isNotBlank(ct)) { doc.setContentType(ContentType.valueOf(ct));
@Override public void loadFromXML(Reader in) throws IOException { XMLConfiguration xml = XMLConfigurationUtil.newXMLConfiguration(in); setIgnoredContentTypesRegex(xml.getString( "ignoredContentTypes", getIgnoredContentTypesRegex())); // Parse hints loadParseHintsFromXML(xml); // Fallback parser fallbackParser = XMLConfigurationUtil.newInstance( xml, "fallbackParser", fallbackParser); // Parsers List<HierarchicalConfiguration> parserNodes = xml.configurationsAt("parsers.parser"); for (HierarchicalConfiguration node : parserNodes) { IDocumentParser parser = XMLConfigurationUtil.newInstance(node); String contentType = node.getString("[@contentType]"); if (StringUtils.isBlank(contentType)) { throw new ConfigurationException( "Attribute \"contentType\" missing for parser: " + node.getString("[@class]")); } parsers.put(ContentType.valueOf(contentType), parser); } } private void loadParseHintsFromXML(XMLConfiguration xml) {
String contentType = (String) doc.get(FIELD_CONTENT_TYPE); if (StringUtils.isNotBlank(contentType)) { data.setContentType(ContentType.valueOf(contentType));
+ "\"application/octet-stream\".", e); safeContentType = ContentType.valueOf("application/octet-stream");