/** * Creates a TikaInputStream instance. This private constructor is used * by the static factory methods based on the available information. * * @param path the path to the file that contains the stream * @throws IOException if an I/O error occurs */ private TikaInputStream(Path path) throws IOException { super(new BufferedInputStream(Files.newInputStream(path))); this.path = path; this.tmp = new TemporaryResources(); this.length = Files.size(path); }
/** * Creates a TikaInputStream instance. This private constructor is used * by the static factory methods based on the available information. * * @param file the file that contains the stream * @throws FileNotFoundException if the file does not exist * @deprecated use {@link #TikaInputStream(Path)} */ @Deprecated private TikaInputStream(File file) throws FileNotFoundException { super(new BufferedInputStream(new FileInputStream(file))); this.path = file.toPath(); this.tmp = new TemporaryResources(); this.length = file.length(); }
/** * Creates a TikaInputStream from a Factory which can create * fresh {@link InputStream}s for the same resource multiple times. * <p>This is typically desired when working with {@link Parser}s that * need to re-read the stream multiple times, where other forms * of buffering (eg File) are slower than just getting a fresh * new stream each time. */ public static TikaInputStream get(InputStreamFactory factory) throws IOException { return get(factory, new TemporaryResources()); } /**
/** * for testing purposes; this method write the histogram vector to a file. * * @param histogram * @throws IOException */ private void writeHisto(final float[] histogram) throws IOException { Path histPath = new TemporaryResources().createTempFile(); try (Writer writer = Files.newBufferedWriter(histPath, UTF_8)) { for (float bin : histogram) { writer.write(String.valueOf(bin) + "\t"); // writer.write(i + "\t"); } writer.write("\r\n"); } }
/** * Creates a TikaInputStream from the given array of bytes. The length of * the array is stored as input metadata in the given metadata instance. * <p> * Note that you must always explicitly close the returned stream as in * some cases it may end up writing the given data to a temporary file. * * @param data input data * @param metadata metadata instance * @return a TikaInputStream instance * @throws IOException */ public static TikaInputStream get(byte[] data, Metadata metadata) { metadata.set(Metadata.CONTENT_LENGTH, Integer.toString(data.length)); return new TikaInputStream( new ByteArrayInputStream(data), new TemporaryResources(), data.length); }
static boolean hasPython() { // check if python is installed and it has the required dependencies for the rotation program to run boolean hasPython = false; TemporaryResources tmp = null; try { tmp = new TemporaryResources(); File importCheck = tmp.createTemporaryFile(); String prg = "import numpy, matplotlib, skimage, _tkinter"; OutputStreamWriter out = new OutputStreamWriter(new FileOutputStream(importCheck), Charset.forName("UTF-8")); out.write(prg); out.close(); Process p = Runtime.getRuntime().exec("python " + importCheck.getAbsolutePath()); if (p.waitFor() == 0) { hasPython = true; } } catch (Exception e) { } finally { IOUtils.closeQuietly(tmp); } return hasPython; }
/** * Casts or wraps the given stream to a TikaInputStream instance. * This method can be used to access the functionality of this class * even when given just a normal input stream instance. * <p> * Use this method instead of the * {@link #get(InputStream, TemporaryResources)} alternative when you * <em>do</em> explicitly close the returned stream. The recommended * access pattern is: * <pre> * try (TikaInputStream stream = TikaInputStream.get(...)) { * // process stream * } * </pre> * <p> * The given stream instance will be closed along with any other resources * associated with the returned TikaInputStream instance when the * {@link #close()} method is called by the try-with-resources statement. * * @param stream normal input stream * @return a TikaInputStream instance */ public static TikaInputStream get(InputStream stream) { return get(stream, new TemporaryResources()); }
public void parse(Image image, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { TemporaryResources tmp = new TemporaryResources(); FileOutputStream fos = null; TikaInputStream tis = null; try { int w = image.getWidth(null); int h = image.getHeight(null); BufferedImage bImage = new BufferedImage(w, h, BufferedImage.TYPE_INT_RGB); File file = tmp.createTemporaryFile(); fos = new FileOutputStream(file); ImageIO.write(bImage, "png", fos); tis = TikaInputStream.get(file); parse(tis, handler, metadata, context); } finally { tmp.dispose(); if (tis != null) tis.close(); if (fos != null) fos.close(); } }
public void parse( InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { TemporaryResources tmp = new TemporaryResources(); try { TikaInputStream tis = TikaInputStream.get(stream, tmp); parse(tis, handler, metadata, context); } finally { tmp.dispose(); } }
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { TikaInputStream tis = TikaInputStream.get(stream, new TemporaryResources()); File tmpFile = tis.getFile(); GrobidRESTParser grobidParser = new GrobidRESTParser(); grobidParser.parse(tmpFile.getAbsolutePath(), handler, metadata, context); PDFParser parser = new PDFParser(); parser.parse(new FileInputStream(tmpFile), handler, metadata, context); } }
@Override public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { TemporaryResources tmp = new TemporaryResources(); TikaInputStream tis = TikaInputStream.get(stream, tmp); try { if (digester != null) { digester.digest(tis, metadata, context); } super.parse(tis, handler, metadata, context); } finally { tmp.dispose(); } } }
@Override public void digest(InputStream is, Metadata m, ParseContext parseContext) throws IOException { TemporaryResources tmp = new TemporaryResources(); TikaInputStream tis = TikaInputStream.get(is, tmp); try { for (DigestingParser.Digester digester : digesters) { digester.digest(tis, m, parseContext); } } finally { try { tmp.dispose(); } catch (TikaException e) { throw new IOExceptionWithCause(e); } } } }
public void parse( InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { TemporaryResources tmp = null; try { if (!TikaInputStream.isTikaInputStream(stream)) { tmp = new TemporaryResources(); stream = TikaInputStream.get(stream, tmp); } //AutoDetectReader can throw exceptions during //initialization. If we just created a //TemporaryResources, we need to make sure to close it. parseImpl(stream, handler, metadata, context); } finally { if (tmp != null) { tmp.close(); } } }
/** * Executes the configured external command and passes the given document * stream as a simple XHTML document to the given SAX content handler. * Metadata is only extracted if {@link #setMetadataExtractionPatterns(Map)} * has been called to set patterns. */ public void parse( InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); TemporaryResources tmp = new TemporaryResources(); try { parse(TikaInputStream.get(stream, tmp), xhtml, metadata, tmp); } finally { tmp.dispose(); } }
@Test public void testFileDeletion() throws IOException { Path tempFile; try (TemporaryResources tempResources = new TemporaryResources()) { tempFile = tempResources.createTempFile(); assertTrue("Temp file should exist while TempResources is used", Files.exists(tempFile)); } assertTrue("Temp file should not exist after TempResources is closed", Files.notExists(tempFile)); }
public void parse( InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { TemporaryResources tmp = new TemporaryResources(); try { TikaInputStream tis = TikaInputStream.get(stream, tmp); new ImageMetadataExtractor(metadata).parseWebP(tis.getFile()); } finally { tmp.dispose(); } XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); xhtml.endDocument(); } }
public void parse( InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { TemporaryResources tmp = new TemporaryResources(); try { TikaInputStream tis = TikaInputStream.get(stream, tmp); new ImageMetadataExtractor(metadata).parseTiff(tis.getFile()); new JempboxExtractor(metadata).parse(tis); } finally { tmp.dispose(); } XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); xhtml.endDocument(); }
public void parse( InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { TemporaryResources tmp = new TemporaryResources(); try { TikaInputStream tis = TikaInputStream.get(stream, tmp); new ImageMetadataExtractor(metadata).parseJpeg(tis.getFile()); new JempboxExtractor(metadata).parse(tis); } finally { tmp.dispose(); } XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); xhtml.endDocument(); }
@Override public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { if (!ExternalParser.check("gdalinfo")) { return; } // first set up and run GDAL // process the command TemporaryResources tmp = new TemporaryResources(); TikaInputStream tis = TikaInputStream.get(stream, tmp); String runCommand = processCommand(tis); String output = execCommand(new String[]{runCommand}); // now extract the actual metadata params // from the GDAL output in the content stream // to do this, we need to literally process the output // from the invoked command b/c we can't read metadata and // output text from the handler in ExternalParser // at the same time, so for now, we can't use the // ExternalParser to do this and I've had to bring some of // that functionality directly into this class // TODO: investigate a way to do both using ExternalParser extractMetFromOutput(output, metadata); applyPatternsToOutput(output, metadata, getPatterns()); // make the content handler and provide output there // now that we have metadata processOutput(handler, metadata, output); }
@Override public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws IOException, SAXException, TikaException { metadata.set(Metadata.CONTENT_TYPE,geoInfoType); DataStore dataStore= null; DefaultMetadata defaultMetadata=null; XHTMLContentHandler xhtmlContentHandler=new XHTMLContentHandler(contentHandler,metadata); TemporaryResources tmp = TikaInputStream.isTikaInputStream(inputStream) ? null : new TemporaryResources(); try { TikaInputStream tikaInputStream = TikaInputStream.get(inputStream,tmp); File file= tikaInputStream.getFile(); dataStore = DataStores.open(file); defaultMetadata=new DefaultMetadata(dataStore.getMetadata()); if(defaultMetadata!=null) extract(xhtmlContentHandler, metadata, defaultMetadata); } catch (UnsupportedStorageException e) { throw new TikaException("UnsupportedStorageException",e); } catch (DataStoreException e) { throw new TikaException("DataStoreException", e); } finally { if (tmp != null) { tmp.dispose(); } } }