public void parse( InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws SAXException { XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); xhtml.endDocument(); } }
public void parse(InputStream is, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { File deployArea = new File(IOUtils.toString(is, UTF_8)); File[] versions = deployArea.listFiles(new FileFilter() { public boolean accept(File pathname) { return !pathname.getName().startsWith("current"); } }); XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); for (File v : versions) { if (isSymlink(v)) continue; xhtml.startElement("a", "href", v.toURI().toURL().toExternalForm()); xhtml.characters(v.getName()); xhtml.endElement("a"); } } }
private void processOutput(ContentHandler handler, Metadata metadata, String output) throws SAXException, IOException { XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); InputStream stream = new ByteArrayInputStream(output.getBytes(UTF_8)); try (Reader reader = new InputStreamReader(stream, UTF_8)) { xhtml.startDocument(); xhtml.startElement("p"); char[] buffer = new char[1024]; for (int n = reader.read(buffer); n != -1; n = reader.read(buffer)) { xhtml.characters(buffer, 0, n); } xhtml.endElement("p"); } finally { xhtml.endDocument(); } }
/** * @see org.apache.tika.parser.microsoft.ooxml.OOXMLExtractor#getXHTML(ContentHandler, Metadata, ParseContext) */ public void getXHTML( ContentHandler handler, Metadata metadata, ParseContext context) throws SAXException, XmlException, IOException, TikaException { XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); buildXHTML(xhtml); // Now do any embedded parts handleEmbeddedParts(handler, metadata); // thumbnail handleThumbnail(handler); xhtml.endDocument(); }
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { ByteArrayOutputStream os = new ByteArrayOutputStream(); IOUtils.copy(stream, os); String name = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY); if (name == null) { name = ""; } try { NetcdfFile ncFile = NetcdfFile.openInMemory(name, os.toByteArray()); unravelStringMet(ncFile, null, metadata); } catch (IOException e) { throw new TikaException("HDF parse error", e); } XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); xhtml.endDocument(); }
public void parse( InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { // We only do metadata, for now XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); // What kind is it? byte[] first4 = new byte[4]; IOUtils.readFully(stream, first4); if (first4[0] == (byte)'M' && first4[1] == (byte)'Z') { parsePE(xhtml, metadata, stream, first4); } else if (first4[0] == (byte)0x7f && first4[1] == (byte)'E' && first4[2] == (byte)'L' && first4[3] == (byte)'F') { parseELF(xhtml, metadata, stream, first4); } // Finish everything xhtml.endDocument(); }
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); ContentHandler childHandler = new EmbeddedContentHandler( new BodyContentHandler(xhtml)); xhtml.endDocument();
@Override public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { if (metadata.get(Metadata.CONTENT_TYPE) == null) { metadata.set(Metadata.CONTENT_TYPE, QP_9.toString()); } XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); QPWTextExtractor extractor = new QPWTextExtractor(); extractor.extract(stream, xhtml, metadata); xhtml.endDocument(); } }
metadata.set(Metadata.CONTENT_ENCODING, charsetName); XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); xhtml.endDocument();
public void parse( InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { HashMap<String,String> properties = this.loadProperties(stream); this.setMetadata(metadata, properties); XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); // TODO: put body content here xhtml.startElement("p"); String body = clean(properties.get("body")); if (body != null) xhtml.characters(body); xhtml.endElement("p"); xhtml.endDocument(); }
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { for (Entry<String,String> m : this.metadata.entrySet()) { metadata.add(m.getKey(), m.getValue()); } XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); if (xmlText != null) { xhtml.characters(xmlText.toCharArray(), 0, xmlText.length()); } xhtml.endDocument(); }
public void parse( InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { stream.read(); metadata.set(Metadata.CONTENT_TYPE, "text/plain"); XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); char[] ch = "Hello, World!".toCharArray(); xhtml.characters(ch, 0, ch.length); xhtml.endDocument(); }
private void handleXFAOnly(PDDocument pdDocument, ContentHandler handler, Metadata metadata, ParseContext context) throws SAXException, IOException, TikaException { XFAExtractor ex = new XFAExtractor(); XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); try (InputStream is = new ByteArrayInputStream( pdDocument.getDocumentCatalog().getAcroForm().getXFA().getBytes())) { ex.extract(is, xhtml, metadata, context); } catch (XMLStreamException e) { throw new TikaException("XML error in XFA", e); } xhtml.endDocument(); }
public void parse( InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { TemporaryResources tmp = new TemporaryResources(); try { TikaInputStream tis = TikaInputStream.get(stream, tmp); new ImageMetadataExtractor(metadata).parseWebP(tis.getFile()); } finally { tmp.dispose(); } XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); xhtml.endDocument(); } }
throws IOException, SAXException, TikaException { XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); byte[] asciiNameBytes = new byte[ASCII_CHUNK_LENGTH]; IOUtils.readFully(stream, asciiNameBytes); throw new TikaException("Ascii name length should be the same as the unicode length"); xhtml.endDocument();
acceptedObjects.add(object); XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); xhtml.startElement("ol", "id", xhtmlStartVal); count = 0; xhtml.endDocument(); } else { LOG.warn("NO objects");
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); parseAssay(xhtml, metadata, context); xhtml.endDocument(); } finally { if (tmp != null) {
@Override public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { //Try to parse TSD file try (RereadableInputStream ris = new RereadableInputStream(stream, 2048, true, true)) { Metadata TSDAndEmbeddedMetadata = new Metadata(); List<TSDMetas> tsdMetasList = this.extractMetas(ris); this.buildMetas(tsdMetasList, metadata != null && metadata.size() > 0 ? TSDAndEmbeddedMetadata : metadata); XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); ris.rewind(); //Try to parse embedded file in TSD file this.parseTSDContent(ris, handler, TSDAndEmbeddedMetadata, context); xhtml.endDocument(); } }
public void parse( InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { TemporaryResources tmp = new TemporaryResources(); try { TikaInputStream tis = TikaInputStream.get(stream, tmp); new ImageMetadataExtractor(metadata).parseJpeg(tis.getFile()); new JempboxExtractor(metadata).parse(tis); } finally { tmp.dispose(); } XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); xhtml.endDocument(); }
public void parse( InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { TemporaryResources tmp = new TemporaryResources(); try { TikaInputStream tis = TikaInputStream.get(stream, tmp); new ImageMetadataExtractor(metadata).parseTiff(tis.getFile()); new JempboxExtractor(metadata).parse(tis); } finally { tmp.dispose(); } XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); xhtml.endDocument(); }