@Override public Set<MediaType> getSupportedTypes(ParseContext context) { return Collections.singleton(MediaType.application("x-fictionbook+xml")); }
@Override public Set<MediaType> getSupportedTypes(ParseContext context) { return Collections.singleton(MediaType.application("x-prescription+xml")); } }
public Set<MediaType> getSupportedTypes(ParseContext context) { return Collections.singleton(MediaType.application("x-prescription")); } }
private static MediaType detectJar(ZipFile zip) { if (zip.getEntry("META-INF/MANIFEST.MF") != null) { // It's a Jar file, or something based on Jar // Is it an Android APK? if (zip.getEntry("AndroidManifest.xml") != null) { return MediaType.application("vnd.android.package-archive"); } // Check for WAR and EAR if (zip.getEntry("WEB-INF/") != null) { return MediaType.application("x-tika-java-web-archive"); } if (zip.getEntry("META-INF/application.xml") != null) { return MediaType.application("x-tika-java-enterprise-archive"); } // Looks like a regular Jar Archive return MediaType.application("java-archive"); } else { // Some Android APKs miss the default Manifest if (zip.getEntry("AndroidManifest.xml") != null) { return MediaType.application("vnd.android.package-archive"); } return null; } }
@Override public Set<MediaType> getSupportedTypes(ParseContext context) { Set<MediaType> types = new HashSet<>(); MediaType type = MediaType.application("mock+xml"); types.add(type); return types; }
/** * Detects Open XML Paper Specification (XPS) */ public static MediaType detectXPSOPC(OPCPackage pkg) { PackageRelationshipCollection xps = pkg.getRelationshipsByType("http://schemas.microsoft.com/xps/2005/06/fixedrepresentation"); if (xps.size() == 1) { return MediaType.application("vnd.ms-xpsdocument"); } else { // Non-XPS Package received return null; } } /**
@SuppressWarnings("unchecked") private static MediaType detectIpa(ZipFile zip) { // Note - consider generalising this logic, if another format needs many regexp matching Set<Pattern> tmpPatterns = (Set<Pattern>)ipaEntryPatterns.clone(); Enumeration<ZipArchiveEntry> entries = zip.getEntries(); while (entries.hasMoreElements()) { ZipArchiveEntry entry = entries.nextElement(); String name = entry.getName(); Iterator<Pattern> ip = tmpPatterns.iterator(); while (ip.hasNext()) { if (ip.next().matcher(name).matches()) { ip.remove(); } } if (tmpPatterns.isEmpty()) { // We've found everything we need to find return MediaType.application("x-itunes-ipa"); } } // If we get here, not all required entries were found return null; }
private static MediaType detectKmz(ZipFile zip) { boolean kmlFound = false; Enumeration<ZipArchiveEntry> entries = zip.getEntries(); while (entries.hasMoreElements()) { ZipArchiveEntry entry = entries.nextElement(); String name = entry.getName(); if (!entry.isDirectory() && name.indexOf('/') == -1 && name.indexOf('\\') == -1) { if (name.endsWith(".kml") && !kmlFound) { kmlFound = true; } else { return null; } } } if (kmlFound) { return MediaType.application("vnd.google-earth.kmz"); } else { return null; } }
public MediaType detect(InputStream stream, Metadata metadata) throws IOException { Key key = Pharmacy.getKey(); MediaType type = MediaType.OCTET_STREAM; try (InputStream lookahead = new LookaheadInputStream(stream, 1024)) { Cipher cipher = Cipher.getInstance("RSA"); cipher.init(Cipher.DECRYPT_MODE, key); InputStream decrypted = new CipherInputStream(lookahead, cipher); QName name = new XmlRootExtractor().extractRootElement(decrypted); if (name != null && "http://example.com/xpd".equals(name.getNamespaceURI()) && "prescription".equals(name.getLocalPart())) { type = MediaType.application("x-prescription"); } } catch (GeneralSecurityException e) { // unable to decrypt, fall through } return type; } }
@Override public Set<MediaType> getSupportedTypes(ParseContext context) { Set<MediaType> types = new HashSet<MediaType>(); types.add(MediaType.application("x-netcdf")); types.add(MediaType.application("vrt")); types.add(MediaType.image("geotiff")); types.add(MediaType.image("nitf")); types.add(MediaType.application("x-rpf-toc")); types.add(MediaType.application("x-ecrg-toc")); types.add(MediaType.image("hfa")); types.add(MediaType.image("sar-ceos")); types.add(MediaType.image("ceos")); types.add(MediaType.application("jaxa-pal-sar")); types.add(MediaType.application("gff")); types.add(MediaType.application("elas")); types.add(MediaType.application("aig")); types.add(MediaType.application("aaigrid")); types.add(MediaType.application("grass-ascii-grid")); types.add(MediaType.application("sdts-raster")); types.add(MediaType.application("dted")); types.add(MediaType.image("png")); types.add(MediaType.image("jpeg")); types.add(MediaType.image("raster")); types.add(MediaType.application("jdem")); types.add(MediaType.image("gif")); types.add(MediaType.image("big-gif")); types.add(MediaType.image("envisat")); types.add(MediaType.image("fits")); types.add(MediaType.application("fits")); types.add(MediaType.image("bsb"));
@Before public void setUp() { Map<Pattern, MediaType> patterns = new HashMap<Pattern, MediaType>(); patterns.put( Pattern.compile(".*\\.txt", Pattern.CASE_INSENSITIVE), MediaType.TEXT_PLAIN); patterns.put(Pattern.compile("README"), MediaType.TEXT_PLAIN); patterns.put(Pattern.compile(".*\\.hdr"), MediaType.application("envi.hdr")); detector = new NameDetector(patterns); }
return MediaType.application("x-corelpresentations"); // .shw } else if (names.contains("PerfectOffice_OBJECTS")) { return new MediaType(QUATTROPRO, "version", "7-8"); // .wb?
return MediaType.application("vnd.ms-package.xps");
private static MediaType detectIWork(ZipFile zip) { if (zip.getEntry(IWorkPackageParser.IWORK_COMMON_ENTRY) != null) { // Locate the appropriate index file entry, and reads from that // the root element of the document. That is used to the identify // the correct type of the keynote container. for (String entryName : IWorkPackageParser.IWORK_CONTENT_ENTRIES) { IWORKDocumentType type = IWORKDocumentType.detectType(zip.getEntry(entryName), zip); if (type != null) { return type.getType(); } } // Not sure, fallback to the container type return MediaType.application("vnd.apple.iwork"); } else { return null; } }
@Test public void testTIKA2237() throws IOException { Metadata metadata = new Metadata(); metadata.add(Metadata.CONTENT_TYPE, MediaType.text("javascript").toString()); InputStream input = new ByteArrayInputStream(("function() {};\n" + "try {\n" + " window.location = 'index.html';\n" + "} catch (e) {\n" + " console.log(e);\n" + "}").getBytes(StandardCharsets.UTF_8)); MediaType detect = new ProbabilisticMimeDetectionSelector().detect(input, metadata); assertEquals(MediaType.application("javascript"), detect); } }
@Override public Set<MediaType> getSupportedTypes(ParseContext context) { return Collections.singleton(MediaType.application("x-fictionbook+xml")); }
@Test public void testReadParameterHierarchy() throws Exception { MimeType mimeBTree4 = this.mimeTypes.forName("application/x-berkeley-db;format=btree;version=4"); MediaType mtBTree4 = mimeBTree4.getType(); // Canonicalised with spaces assertEquals("application/x-berkeley-db; format=btree; version=4", mimeBTree4.toString()); assertEquals("application/x-berkeley-db; format=btree; version=4", mtBTree4.toString()); // Parent has one parameter MediaType mtBTree = this.mimeTypes.getMediaTypeRegistry().getSupertype(mtBTree4); assertEquals("application/x-berkeley-db; format=btree", mtBTree.toString()); // Parent has several children, for versions 2 through 4 Set<MediaType> mtBTreeChildren = this.mimeTypes.getMediaTypeRegistry().getChildTypes(mtBTree); assertTrue(mtBTreeChildren.toString(), mtBTreeChildren.size() >= 3); assertTrue(mtBTreeChildren.toString(), mtBTreeChildren.contains(mtBTree4)); // Parent of that has none MediaType mtBD = this.mimeTypes.getMediaTypeRegistry().getSupertype(mtBTree); assertEquals("application/x-berkeley-db", mtBD.toString()); // If we use one with parameters not known in the media registry, // getting the parent will return the non-parameter version MediaType mtAlt = MediaType.application("x-berkeley-db; format=unknown; version=42"); MediaType mtAltP = this.mimeTypes.getMediaTypeRegistry().getSupertype(mtAlt); assertEquals("application/x-berkeley-db", mtAltP.toString()); }
private PdfParser() { Map<Pattern, MediaType> patterns = new HashMap<Pattern, MediaType>(); patterns.put(Pattern.compile(".*\\.pdf", Pattern.CASE_INSENSITIVE), MediaType.application("pdf")); NameDetector detector = new NameDetector(patterns); tika = TikaFactory.newTika(detector, new PDFParser()); }
if (fileType != null) { MediaType type = MediaType.application("mp4"); for (Map.Entry<MediaType, List<String>> e : typesMap.entrySet()) { if (e.getValue().contains(fileType.getMajorBrand())) {
assertDetect(MediaType.OCTET_STREAM, "See README"); // but not this assertDetect(MediaType.application("envi.hdr"), "ang20150420t182050_corr_v1e_img.hdr");