AssetMimeHandler() { detector = new DefaultDetector(); }
return DETECTOR.detect(tikaIS, metadata).toString(); } finally { if (tikaIS != null) {
public NNTrainedModel(final int nInput, final int nHidden, final int nOutput, final float[] nn_params) { this.numOfInputs = nInput; this.numOfHidden = nHidden; this.numOfOutputs = nOutput; this.Theta1 = new float[numOfHidden][numOfInputs + 1]; this.Theta2 = new float[numOfOutputs][numOfHidden + 1]; populateThetas(nn_params); }
/** * Counts "safe" (i.e. seven-bit non-control) ASCII characters. * * @see #countControl() * @return count of safe ASCII characters */ public int countSafeAscii() { return count(0x20, 128) + countSafeControl(); }
/** * Counts eight bit characters, i.e. bytes with their highest bit set. * * @return count of eight bit characters */ public int countEightBit() { return count(128, 256); }
/** * * @param input text document input stream, or <code>null</code> * @param metadata input metadata for the document * @return the detected Charset or null if no charset could be detected * @throws IOException */ @Override public Charset detect(InputStream input, Metadata metadata) throws IOException { for (EncodingDetector detector : getDetectors()) { Charset detected = detector.detect(input, metadata); if (detected != null) { return detected; } } return null; }
private synchronized MagicDetector getDetector() { if (detector == null) { detector = MagicDetector.parse(mediaType, type, offset, value, mask); } return detector; }
public NNExampleModelDetector(final Path modelFile) { loadDefaultModels(modelFile); }
public NNTrainedModel build() { return new NNTrainedModel(numOfInputs, numOfHidden, numOfOutputs, params); } }
/** * * @param stream stream from which to read -- make sure that it supports mark! * @param metadata * @param detectors * @param handler * @throws IOException * @throws TikaException */ private AutoDetectReader( InputStream stream, Metadata metadata, List<EncodingDetector> detectors, LoadErrorHandler handler) throws IOException, TikaException { this(stream, detect(stream, metadata, detectors, handler)); }
protected static CompositeEncodingDetector getDefaultEncodingDetector( ServiceLoader loader) { return new DefaultEncodingDetector(loader); }
/** * Creates an auto-detecting parser instance using the specified set of parser. * This allows one to create a Tika configuration where only a subset of the * available parsers have their 3rd party jars included, as otherwise the * use of the default TikaConfig will throw various "ClassNotFound" exceptions. * * @param parsers */ public AutoDetectParser(Parser...parsers) { this(new DefaultDetector(), parsers); }
/** * Checks whether at least one byte was seen and that the bytes that * were seen were mostly plain text (i.e. < 2% control, > 90% ASCII range). * * @see <a href="https://issues.apache.org/jira/browse/TIKA-483">TIKA-483</a> * @see <a href="https://issues.apache.org/jira/browse/TIKA-688">TIKA-688</a> * @return <code>true</code> if the seen bytes were mostly safe ASCII, * <code>false</code> otherwise */ public boolean isMostlyAscii() { int control = count(0, 0x20); int ascii = count(0x20, 128); int safe = countSafeControl(); return total > 0 && (control - safe) * 100 < total * 2 && (ascii + safe) * 100 > total * 90; }
public NNExampleModelDetector(final File modelFile) { loadDefaultModels(modelFile); }
public AbstractEncodingDetectorParser() { encodingDetector = new DefaultEncodingDetector(); }
@Override public void process(final InputStream stream) throws IOException { try (final InputStream in = new BufferedInputStream(stream)) { TikaInputStream tikaStream = TikaInputStream.get(in); Metadata metadata = new Metadata(); if (filename != null && context.getProperty(USE_FILENAME_IN_DETECTION).asBoolean()) { metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, filename); } // Get mime type MediaType mediatype = detector.detect(tikaStream, metadata); mimeTypeRef.set(mediatype.toString()); } } });
protected static CompositeDetector getDefaultDetector( MimeTypes types, ServiceLoader loader) { return new DefaultDetector(types, loader); }
public DefaultParser(MediaTypeRegistry registry, ServiceLoader loader) { this(registry, loader, null, new DefaultEncodingDetector(loader)); }