InputStream is = null; is = getStream(); //obtain the stream CloseShieldInputStream csis = new CloseShieldInputStream(is); // call the bad function that does things it shouldn't badFunction(csis); // happiness follows: do something with the original input stream is.read();
@Before public void setUp() { data = new byte[] { 'x', 'y', 'z' }; original = new ByteArrayInputStream(data) { @Override public void close() { closed = true; } }; shielded = new CloseShieldInputStream(original); closed = false; }
public void parse( InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { XMLReaderUtils.parseSAX( new CloseShieldInputStream(stream), new OfflineContentHandler(handler), context); }
private void handlePart(PackagePart packagePart, XWPFListManager xwpfListManager, StringBuilder buffer) throws IOException, SAXException { Map<String, String> hyperlinks = loadHyperlinkRelationships(packagePart); try (InputStream stream = packagePart.getInputStream()) { XMLReader reader = SAXHelper.newXMLReader(); reader.setContentHandler(new OOXMLWordAndPowerPointTextHandler( new XWPFToTextContentHandler(buffer), hyperlinks)); reader.parse(new InputSource(new CloseShieldInputStream(stream))); } catch (ParserConfigurationException e) { LOG.warn("Can't configure XMLReader", e); } }
public static void parseInvestigation(InputStream stream, XHTMLContentHandler handler, Metadata metadata, ParseContext context, String studyFileName) throws IOException, TikaException, SAXException { TikaConfig tikaConfig = context.get(TikaConfig.class); if (tikaConfig == null) { tikaConfig = TikaConfig.getDefaultConfig(); } // Automatically detect the character encoding try (AutoDetectReader reader = new AutoDetectReader(new CloseShieldInputStream(stream), metadata, tikaConfig.getEncodingDetector())) { extractMetadata(reader, metadata, studyFileName); } }
new CloseShieldInputStream(stream), new OfflineContentHandler( new XPSPageContentHandler(xhtml, embeddedImages)
private void handleDocumentRef(String docRef) throws SAXException { //docRef is a path to a FixedDocumentSequence document, // e.g. /Documents/1/FixedDoc.fdoc //relative root is /Documents/1 ..need this Pages... String relativeRoot = null; int i = docRef.lastIndexOf("/"); if (i > 0) { relativeRoot = docRef.substring(0, i); } else { relativeRoot = ""; } String zipPath = (docRef.startsWith("/") ? docRef.substring(1) : docRef); if (pkg instanceof ZipPackage) { try (InputStream stream = getZipStream(zipPath, pkg)) { XMLReaderUtils.parseSAX( new CloseShieldInputStream(stream), new OfflineContentHandler(new EmbeddedContentHandler( new PageContentPartHandler(relativeRoot, xhtml))), context); } catch (IOException | TikaException e) { throw new SAXException(new TikaException("IOException trying to read: " + docRef)); } } else { throw new SAXException(new TikaException("Package must be ZipPackage")); } }
void parseInternal( InputStream stream, final ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { DefaultHandler dh = new OpenDocumentElementMappingContentHandler(handler, MAPPINGS); XMLReaderUtils.parseSAX( new CloseShieldInputStream(stream), new OfflineContentHandler( new NSNormalizerContentHandler(dh)), context); }
@Override public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { // Only outputting the MIME type as metadata metadata.set(Metadata.CONTENT_TYPE, ENVI_MIME_TYPE); // The following code was taken from the TXTParser // Automatically detect the character encoding TikaConfig tikaConfig = context.get(TikaConfig.class); if (tikaConfig == null) { tikaConfig = TikaConfig.getDefaultConfig(); } try (AutoDetectReader reader = new AutoDetectReader( new CloseShieldInputStream(stream), metadata, getEncodingDetector(context))){ Charset charset = reader.getCharset(); // deprecated, see TIKA-431 metadata.set(Metadata.CONTENT_ENCODING, charset.name()); xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); readLines(reader, metadata); xhtml.endDocument(); } catch (IOException | TikaException e) { LOG.error("Error reading input data stream.", e); } }
throws IOException, SAXException, TikaException { try (AutoDetectReader reader = new AutoDetectReader( new CloseShieldInputStream(stream), metadata, getEncodingDetector(context))) { Charset charset = reader.getCharset(); String mediaType = metadata.get(Metadata.CONTENT_TYPE);
public void parse( InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { try { DigestCalculatorProvider digestCalculatorProvider = new JcaDigestCalculatorProviderBuilder().setProvider("BC").build(); CMSSignedDataParser parser = new CMSSignedDataParser(digestCalculatorProvider, new CloseShieldInputStream(stream)); try { CMSTypedStream content = parser.getSignedContent(); if (content == null) { throw new TikaException("cannot parse detached pkcs7 signature (no signed data to parse)"); } try (InputStream input = content.getContentStream()) { Parser delegate = context.get(Parser.class, EmptyParser.INSTANCE); delegate.parse(input, handler, metadata, context); } } finally { parser.close(); } } catch (OperatorCreationException e) { throw new TikaException("Unable to create DigestCalculatorProvider", e); } catch (CMSException e) { throw new TikaException("Unable to parse pkcs7 signed data", e); } }
private void handleDocuments(PackageRelationship packageRelationship, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException { try (InputStream stream = pkg.getPart(packageRelationship).getInputStream()) { XMLReaderUtils.parseSAX( new CloseShieldInputStream(stream), new OfflineContentHandler(new EmbeddedContentHandler( new FixedDocSeqHandler(xhtml))), context); } }
tikaConfig = TikaConfig.getDefaultConfig(); try (AutoDetectReader reader = new AutoDetectReader(new CloseShieldInputStream(tis), metadata, tikaConfig.getEncodingDetector()); CSVParser csvParser = new CSVParser(reader, CSVFormat.TDF)) {
@Override public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { //set OfficeParserConfig if the user hasn't specified one configure(context); final XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); try { XMLReaderUtils.parseSAX( new CloseShieldInputStream(stream), new OfflineContentHandler(new EmbeddedContentHandler( new Word2006MLDocHandler(xhtml, metadata, context))), context); } catch (SAXException e) { throw new TikaException("XML parse error", e); } xhtml.endDocument(); } }
@Override public void parse( InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { setContentType(metadata); final XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); TaggedContentHandler tagged = new TaggedContentHandler(xhtml); try { XMLReaderUtils.parseSAX( new CloseShieldInputStream(stream), new OfflineContentHandler(new EmbeddedContentHandler( getContentHandler(tagged, metadata, context))), context); } catch (SAXException e) { tagged.throwIfCauseOf(e); throw new TikaException("XML parse error", e); } finally { xhtml.endDocument(); } }
@Override public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { EmbeddedDocumentExtractor ex = EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context); short numEntries = readThroughNumEntries(stream); long bytesRead = 26; List<FieldInfo> fieldInfoList = getSortedFieldInfoList(stream, numEntries); bytesRead += 12*numEntries; Metadata embeddedMetadata = new Metadata(); bytesRead = processFieldEntries(stream, fieldInfoList, embeddedMetadata, bytesRead); FieldInfo contentFieldInfo = getContentFieldInfo(fieldInfoList); XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); if (contentFieldInfo != null) { long diff = contentFieldInfo.offset-bytesRead; IOUtils.skipFully(stream, diff); if (ex.shouldParseEmbedded(embeddedMetadata)) { // TODO: we should probably add a readlimiting wrapper around this // stream to ensure that not more than contentFieldInfo.length bytes // are read ex.parseEmbedded(new CloseShieldInputStream(stream), xhtml, embeddedMetadata, false); } } xhtml.endDocument(); }
@Override public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { // TODO Auto-generated method stub final XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); xhtml.startElement("p"); TaggedContentHandler tagged = new TaggedContentHandler(handler); try { XMLReaderUtils.parseSAX( new CloseShieldInputStream(stream), new OfflineContentHandler(new EmbeddedContentHandler( getContentHandler(tagged, metadata, context))), context); } catch (SAXException e) { tagged.throwIfCauseOf(e); throw new TikaException("XML parse error", e); } finally { xhtml.endElement("p"); xhtml.endDocument(); } }
private void handlePart(PackagePart packagePart, XWPFStylesShim styles, XWPFListManager listManager, XHTMLContentHandler xhtml) throws IOException, SAXException { Map<String, String> linkedRelationships = loadLinkedRelationships(packagePart, true, metadata); try (InputStream stream = packagePart.getInputStream()) { XMLReaderUtils.parseSAX( new CloseShieldInputStream(stream), new OfflineContentHandler(new EmbeddedContentHandler( new OOXMLWordAndPowerPointTextHandler( new OOXMLTikaBodyPartHandler(xhtml, styles, listManager, config), linkedRelationships, config.getIncludeShapeBasedContent(), config.getConcatenatePhoneticRuns()))), context); } catch (TikaException|IOException e) { metadata.add(TikaCoreProperties.TIKA_META_EXCEPTION_WARNING, ExceptionUtils.getStackTrace(e)); } }
public void parse( InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { if (metadata.get(Metadata.CONTENT_TYPE) == null) { metadata.set(Metadata.CONTENT_TYPE, "application/xml"); } final XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); xhtml.startElement("p"); TaggedContentHandler tagged = new TaggedContentHandler(handler); try { XMLReaderUtils.parseSAX( new CloseShieldInputStream(stream), new OfflineContentHandler(new EmbeddedContentHandler( getContentHandler(tagged, metadata, context))), context); } catch (SAXException e) { tagged.throwIfCauseOf(e); throw new TikaException("XML parse error", e); } finally { xhtml.endElement("p"); xhtml.endDocument(); } }
new CloseShieldInputStream(stream), new OfflineContentHandler(new XSLFCommentAuthorHandler()), context);