/** * Should this thread use event based extractors is available? * Checks the all-threads one first, then thread specific. */ public static boolean getPreferEventExtractor() { return OLE2ExtractorFactory.getPreferEventExtractor(); }
/** * Creates a new text extractor for the given document * * @param document The POIDocument to use in this extractor. */ public POIOLE2TextExtractor(POIDocument document) { this.document = document; // Ensure any underlying resources, such as open files, // will get cleaned up if the user calls #close() setFilesystem(document); }
/** * Should this thread prefer event based over usermodel based extractors? * Will only be used if the All Threads setting is null. */ public static void setThreadPrefersEventExtractors(boolean preferEventExtractors) { OLE2ExtractorFactory.setThreadPrefersEventExtractors(preferEventExtractors); }
DirectoryEntry root = ext.getRoot(); if(root == null) { throw new IllegalStateException("The extractor didn't know which POIFS it came from!"); Class<?> cls = getScratchpadClass(); try { Method m = cls.getDeclaredMethod( e.add(createExtractor((DirectoryNode) dir )); e.add(createExtractor(stream)); } catch (Exception xe) {
@SuppressWarnings("unchecked") public static <T extends POITextExtractor> T createExtractor(InputStream input) throws IOException { Class<?> cls = getOOXMLClass(); if (cls != null) { // Use Reflection to get us the full OOXML-enabled version try { Method m = cls.getDeclaredMethod("createExtractor", InputStream.class); return (T)m.invoke(null, input); } catch (IllegalArgumentException iae) { throw iae; } catch (Exception e) { throw new IllegalArgumentException("Error creating Extractor for InputStream", e); } } else { // Best hope it's OLE2.... return createExtractor(new POIFSFileSystem(input)); } }
if (getPreferEventExtractor()) { return new EventBasedExcelExtractor(poifsDir); Class<?> cls = getScratchpadClass(); try { Method m = cls.getDeclaredMethod("createExtractor", DirectoryNode.class);
public static void main(String[] args) throws Exception { if (args.length < 1) { System.err.println("Use:"); System.err.println(" CommandLineTextExtractor <filename> [filename] [filename]"); System.exit(1); } for (String arg : args) { System.out.println(DIVIDER); File f = new File(arg); System.out.println(f); try (POITextExtractor extractor = ExtractorFactory.createExtractor(f)) { POITextExtractor metadataExtractor = extractor.getMetadataTextExtractor(); System.out.println(" " + DIVIDER); String metaData = metadataExtractor.getText(); System.out.println(metaData); System.out.println(" " + DIVIDER); String text = extractor.getText(); System.out.println(text); System.out.println(DIVIDER); System.out.println("Had " + metaData.length() + " characters of metadata and " + text.length() + " characters of text"); } } } }
@SuppressWarnings("unchecked") public static <T extends POITextExtractor> T createExtractor(POIFSFileSystem fs) throws IOException { return (T)createExtractor(fs.getRoot()); }
DirectoryEntry root = ext.getRoot(); if (root == null) { throw new IllegalStateException("The extractor didn't know which POIFS it came from!");
/** * Should all threads prefer event based over usermodel based extractors? * (usermodel extractors tend to be more accurate, but use more memory) * Default is to use the thread level setting, which defaults to false. */ public static Boolean getAllThreadsPreferEventExtractors() { return OLE2ExtractorFactory.getAllThreadsPreferEventExtractors(); }
/** * Should this thread prefer event based over usermodel based extractors? * (usermodel extractors tend to be more accurate, but use more memory) * Default is false. */ public static boolean getThreadPrefersEventExtractors() { return OLE2ExtractorFactory.getThreadPrefersEventExtractors(); }
/** * Should all threads prefer event based over usermodel based extractors? * If set, will take preference over the Thread level setting. */ public static void setAllThreadsPreferEventExtractors(Boolean preferEventExtractors) { OLE2ExtractorFactory.setAllThreadsPreferEventExtractors(preferEventExtractors); }
@Override public void close() throws IOException { // e.g. XSSFEventBaseExcelExtractor passes a null-document if(_document != null) { @SuppressWarnings("resource") OPCPackage pkg = _document.getPackage(); if(pkg != null) { // revert the package to not re-write the file, which is very likely not wanted for a TextExtractor! pkg.revert(); } } super.close(); }
@SuppressWarnings("unchecked") public static <T extends POITextExtractor> T createExtractor(File f) throws IOException, OpenXML4JException, XmlException { POIFSFileSystem fs = null; try { fs = new POIFSFileSystem(f); if (fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) { return (T)createEncryptedOOXMLExtractor(fs); } POITextExtractor extractor = createExtractor(fs); extractor.setFilesystem(fs); return (T)extractor; } catch (OfficeXmlFileException e) { // ensure file-handle release IOUtils.closeQuietly(fs); OPCPackage pkg = OPCPackage.open(f.toString(), PackageAccess.READ); T t = (T)createExtractor(pkg); t.setFilesystem(pkg); return t; } catch (NotOLE2FileException ne) { // ensure file-handle release IOUtils.closeQuietly(fs); throw new IllegalArgumentException("Your File was neither an OLE2 file, nor an OOXML file"); } catch (OpenXML4JException | Error | RuntimeException | IOException | XmlException e) { // NOSONAR // ensure file-handle release IOUtils.closeQuietly(fs); throw e; } }
/** * Should this thread use event based extractors is available? * Checks the all-threads one first, then thread specific. */ public static boolean getPreferEventExtractor() { return OLE2ExtractorFactory.getPreferEventExtractor(); }
@SuppressWarnings("unchecked") public static <T extends POITextExtractor> T createExtractor(DirectoryNode poifsDir) throws IOException, OpenXML4JException, XmlException { // First, check for OOXML for (String entryName : poifsDir.getEntryNames()) { if (entryName.equals("Package")) { OPCPackage pkg = OPCPackage.open(poifsDir.createDocumentInputStream("Package")); return (T)createExtractor(pkg); } } // If not, ask the OLE2 code to check, with Scratchpad if possible return (T)OLE2ExtractorFactory.createExtractor(poifsDir); }
public EventBasedExcelExtractor(POIFSFileSystem fs) { this(fs.getRoot()); super.setFilesystem(fs); }
/** * Should this thread prefer event based over usermodel based extractors? * Will only be used if the All Threads setting is null. */ public static void setThreadPrefersEventExtractors(boolean preferEventExtractors) { OLE2ExtractorFactory.setThreadPrefersEventExtractors(preferEventExtractors); }
/** * Should all threads prefer event based over usermodel based extractors? * (usermodel extractors tend to be more accurate, but use more memory) * Default is to use the thread level setting, which defaults to false. */ public static Boolean getAllThreadsPreferEventExtractors() { return OLE2ExtractorFactory.getAllThreadsPreferEventExtractors(); }
/** * Should this thread prefer event based over usermodel based extractors? * (usermodel extractors tend to be more accurate, but use more memory) * Default is false. */ public static boolean getThreadPrefersEventExtractors() { return OLE2ExtractorFactory.getThreadPrefersEventExtractors(); }