public void processSheet( SheetContentsHandler sheetContentsExtractor, CommentsTable comments, StylesTable styles, ReadOnlySharedStringsTable strings, InputStream sheetInputStream) throws IOException, SAXException { try { XSSFSheetInterestingPartsCapturer handler = new XSSFSheetInterestingPartsCapturer(new XSSFSheetXMLHandler( styles, comments, strings, sheetContentsExtractor, formatter, false)); XMLReaderUtils.parseSAX(sheetInputStream, handler, parseContext); sheetInputStream.close(); if (handler.hasProtection) { metadata.set(TikaCoreProperties.PROTECTED, "true"); } } catch (TikaException e) { throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage()); } }
@Test public void test3UNDEFINED_IT_EN_Translation() { String inputText = "Buona Sera!!!"; String expectedText = "Good Evening!!!"; try{ String transText = translator.translate(inputText, "en"); assertNotNull("Text not translated", transText); assumeTrue(expectedText.equals(transText)); } catch(TikaException e){ e.printStackTrace(); fail(e.getMessage()); } catch(IOException e){ e.printStackTrace(); fail(e.getMessage()); } }
@Test public void test1EN_ES_Translation() { String inputText = "Hello World!!!"; String expectedText = "Hola Mundo!!!"; try{ String transText = translator.translate(inputText, "en", "es"); assertNotNull("Text not translated", transText); assumeTrue(expectedText.equals(transText)); } catch(TikaException e){ e.printStackTrace(); fail(e.getMessage()); } catch(IOException e){ e.printStackTrace(); fail(e.getMessage()); } }
@Test public void test2UNDEFINED_DE_ES_Translation() { String inputText = "Guten Tag!!!"; String expectedText = "Buen Día!!!"; try{ String transText = translator.translate(inputText, "es"); assertNotNull("Text not translated", transText); assumeTrue(expectedText.equals(transText)); } catch(TikaException e){ e.printStackTrace(); fail(e.getMessage()); } catch(IOException e){ e.printStackTrace(); fail(e.getMessage()); } }
public String fetchPage(final String url, final int timeout) { String fetchURL = addHttp(url); log.info("fetch url " + fetchURL); String pageContent = null; URLConnection connection; try { connection = new URL(fetchURL).openConnection(); connection.setReadTimeout(DEFAULT_TIMEOUT); pageContent = tika.parseToString(connection.getInputStream()) .replace('\n', ' ').replace('\t', ' '); } catch (MalformedURLException e) { log.severe(e.getMessage() + "\n" + e); } catch (IOException e) { log.severe(e.getMessage() + "\n" + e); } catch (TikaException e) { log.severe(e.getMessage() + "\n" + e); } return pageContent; }
public void processSheet( SheetContentsHandler sheetContentsExtractor, CommentsTable comments, StylesTable styles, ReadOnlySharedStringsTable strings, InputStream sheetInputStream) throws IOException, SAXException { InputSource sheetSource = new InputSource(sheetInputStream); try { XMLReader sheetParser = parseContext.getXMLReader(); XSSFSheetInterestingPartsCapturer handler = new XSSFSheetInterestingPartsCapturer(new XSSFSheetXMLHandler( styles, comments, strings, sheetContentsExtractor, formatter, false)); sheetParser.setContentHandler(handler); sheetParser.parse(sheetSource); sheetInputStream.close(); if (handler.hasProtection) { metadata.set(TikaMetadataKeys.PROTECTED, "true"); } } catch (TikaException e) { throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage()); } }
public void processSheet( SheetContentsHandler sheetContentsExtractor, CommentsTable comments, StylesTable styles, ReadOnlySharedStringsTable strings, InputStream sheetInputStream) throws IOException, SAXException { try { XSSFSheetInterestingPartsCapturer handler = new XSSFSheetInterestingPartsCapturer(new XSSFSheetXMLHandler( styles, comments, strings, sheetContentsExtractor, formatter, false)); XMLReaderUtils.parseSAX(sheetInputStream, handler, parseContext); sheetInputStream.close(); if (handler.hasProtection) { metadata.set(TikaMetadataKeys.PROTECTED, "true"); } } catch (TikaException e) { throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage()); } }
private Metadata getMetadataFromTika(Product product) throws MetExtractionException { try { File file = getProductFile(product); FileInputStream inputStream = new FileInputStream(file); org.apache.tika.metadata.Metadata tikaMetadata = new org.apache.tika.metadata.Metadata(); Parser parser = new AutoDetectParser(); parser.parse(inputStream, new DefaultHandler(), tikaMetadata, new ParseContext()); return transform(tikaMetadata); } catch (FileNotFoundException e) { throw new MetExtractionException( "Unable to find file: Reason: " + e.getMessage()); } catch (TikaException e) { throw new MetExtractionException( "Unable to parse the document: Reason: " + e.getMessage()); } catch (SAXException e) { throw new MetExtractionException( " Unable to process the SAX events : Reason: " + e.getMessage()); } catch (IOException e) { throw new MetExtractionException( "Unable to read the document stream: Reason: " + e.getMessage()); } }
private Metadata getMetadataFromTika(Product product) throws MetExtractionException { try { File file = getProductFile(product); FileInputStream inputStream = new FileInputStream(file); org.apache.tika.metadata.Metadata tikaMetadata = new org.apache.tika.metadata.Metadata(); Parser parser = new AutoDetectParser(); parser.parse(inputStream, new DefaultHandler(), tikaMetadata, new ParseContext()); return transform(tikaMetadata); } catch (FileNotFoundException e) { throw new MetExtractionException( "Unable to find file: Reason: " + e.getMessage()); } catch (TikaException e) { throw new MetExtractionException( "Unable to parse the document: Reason: " + e.getMessage()); } catch (SAXException e) { throw new MetExtractionException( " Unable to process the SAX events : Reason: " + e.getMessage()); } catch (IOException e) { throw new MetExtractionException( "Unable to read the document stream: Reason: " + e.getMessage()); } }
if (e.getMessage().indexOf("bomb") >= 0) { throw e;
if (e.getMessage().indexOf("bomb") >= 0) { throw e;
if (e.getMessage().indexOf("bomb") >= 0) { throw e;
if (e.getMessage().indexOf("bomb") >= 0) { throw e;
throw new DocumentReadException(e.getMessage(), e);
throw new DocumentReadException(e.getMessage(), e);
throw new DocumentReadException(e.getMessage(), e);