if (shouldHandleXFAOnly(pdfDocument, localConfig)) { handleXFAOnly(pdfDocument, handler, metadata, context); } else if (localConfig.getOcrStrategy().equals(PDFParserConfig.OCR_STRATEGY.OCR_ONLY)) { metadata.add("X-Parsed-By", TesseractOCRParser.class.toString()); OCR2XHTML.process(pdfDocument, handler, context, metadata, localConfig); } else { if (localConfig.getOcrStrategy().equals(PDFParserConfig.OCR_STRATEGY.OCR_AND_TEXT_EXTRACTION)) { metadata.add("X-Parsed-By", TesseractOCRParser.class.toString());
if (config.getOcrStrategy().equals(PDFParserConfig.OCR_STRATEGY.OCR_AND_TEXT_EXTRACTION)) { doOCROnCurrentPage();
@Override public boolean equals(Object o) { if (this == o) return true; if (!(o instanceof PDFParserConfig)) return false; PDFParserConfig config = (PDFParserConfig) o; if (getEnableAutoSpace() != config.getEnableAutoSpace()) return false; if (getSuppressDuplicateOverlappingText() != config.getSuppressDuplicateOverlappingText()) return false; if (getExtractAnnotationText() != config.getExtractAnnotationText()) return false; if (getSortByPosition() != config.getSortByPosition()) return false; if (getExtractAcroFormContent() != config.getExtractAcroFormContent()) return false; if (getExtractBookmarksText() != config.getExtractBookmarksText()) return false; if (getExtractInlineImages() != config.getExtractInlineImages()) return false; if (getExtractUniqueInlineImagesOnly() != config.getExtractUniqueInlineImagesOnly()) return false; if (getIfXFAExtractOnlyXFA() != config.getIfXFAExtractOnlyXFA()) return false; if (getOcrDPI() != config.getOcrDPI()) return false; if (isCatchIntermediateIOExceptions() != config.isCatchIntermediateIOExceptions()) return false; if (!getAverageCharTolerance().equals(config.getAverageCharTolerance())) return false; if (!getSpacingTolerance().equals(config.getSpacingTolerance())) return false; if (!getOcrStrategy().equals(config.getOcrStrategy())) return false; if (getOcrImageType() != config.getOcrImageType()) return false; if (!getOcrImageFormatName().equals(config.getOcrImageFormatName())) return false; if (getExtractActions() != config.getExtractActions()) return false; if (!getAccessChecker().equals(config.getAccessChecker())) return false; return getMaxMainMemoryBytes() == config.getMaxMainMemoryBytes(); }
if (config.getOcrStrategy().equals(PDFParserConfig.OCR_STRATEGY.OCR_AND_TEXT_EXTRACTION)) { doOCROnCurrentPage();
if (config.getOcrStrategy().equals(PDFParserConfig.OCR_STRATEGY.OCR_AND_TEXT_EXTRACTION)) { doOCROnCurrentPage();
if (shouldHandleXFAOnly(pdfDocument, localConfig)) { handleXFAOnly(pdfDocument, handler, metadata, context); } else if (localConfig.getOcrStrategy().equals(PDFParserConfig.OCR_STRATEGY.OCR_ONLY)) { metadata.add("X-Parsed-By", TesseractOCRParser.class.toString()); OCR2XHTML.process(pdfDocument, handler, context, metadata, localConfig); } else { if (localConfig.getOcrStrategy().equals(PDFParserConfig.OCR_STRATEGY.OCR_AND_TEXT_EXTRACTION)) { metadata.add("X-Parsed-By", TesseractOCRParser.class.toString());
if (shouldHandleXFAOnly(pdfDocument, localConfig)) { handleXFAOnly(pdfDocument, handler, metadata, context); } else if (localConfig.getOcrStrategy().equals(PDFParserConfig.OCR_STRATEGY.OCR_ONLY)) { metadata.add("X-Parsed-By", TesseractOCRParser.class.toString()); OCR2XHTML.process(pdfDocument, handler, context, metadata, localConfig); } else { if (localConfig.getOcrStrategy().equals(PDFParserConfig.OCR_STRATEGY.OCR_AND_TEXT_EXTRACTION)) { metadata.add("X-Parsed-By", TesseractOCRParser.class.toString());
void doOCROnCurrentPage() throws IOException, TikaException, SAXException { if (config.getOcrStrategy().equals(NO_OCR)) { return;
void doOCROnCurrentPage() throws IOException, TikaException, SAXException { if (config.getOcrStrategy().equals(NO_OCR)) { return;
@Override public boolean equals(Object o) { if (this == o) return true; if (!(o instanceof PDFParserConfig)) return false; PDFParserConfig config = (PDFParserConfig) o; if (getEnableAutoSpace() != config.getEnableAutoSpace()) return false; if (getSuppressDuplicateOverlappingText() != config.getSuppressDuplicateOverlappingText()) return false; if (getExtractAnnotationText() != config.getExtractAnnotationText()) return false; if (getSortByPosition() != config.getSortByPosition()) return false; if (getExtractAcroFormContent() != config.getExtractAcroFormContent()) return false; if (getExtractBookmarksText() != config.getExtractBookmarksText()) return false; if (getExtractInlineImages() != config.getExtractInlineImages()) return false; if (getExtractUniqueInlineImagesOnly() != config.getExtractUniqueInlineImagesOnly()) return false; if (getIfXFAExtractOnlyXFA() != config.getIfXFAExtractOnlyXFA()) return false; if (getOcrDPI() != config.getOcrDPI()) return false; if (isCatchIntermediateIOExceptions() != config.isCatchIntermediateIOExceptions()) return false; if (!getAverageCharTolerance().equals(config.getAverageCharTolerance())) return false; if (!getSpacingTolerance().equals(config.getSpacingTolerance())) return false; if (!getOcrStrategy().equals(config.getOcrStrategy())) return false; if (getOcrImageType() != config.getOcrImageType()) return false; if (!getOcrImageFormatName().equals(config.getOcrImageFormatName())) return false; if (getExtractActions() != config.getExtractActions()) return false; if (!getAccessChecker().equals(config.getAccessChecker())) return false; return getMaxMainMemoryBytes() == config.getMaxMainMemoryBytes(); }
@Override public boolean equals(Object o) { if (this == o) return true; if (!(o instanceof PDFParserConfig)) return false; PDFParserConfig config = (PDFParserConfig) o; if (getEnableAutoSpace() != config.getEnableAutoSpace()) return false; if (getSuppressDuplicateOverlappingText() != config.getSuppressDuplicateOverlappingText()) return false; if (getExtractAnnotationText() != config.getExtractAnnotationText()) return false; if (getSortByPosition() != config.getSortByPosition()) return false; if (getExtractAcroFormContent() != config.getExtractAcroFormContent()) return false; if (getExtractInlineImages() != config.getExtractInlineImages()) return false; if (getExtractUniqueInlineImagesOnly() != config.getExtractUniqueInlineImagesOnly()) return false; if (getIfXFAExtractOnlyXFA() != config.getIfXFAExtractOnlyXFA()) return false; if (getOcrDPI() != config.getOcrDPI()) return false; if (isCatchIntermediateIOExceptions() != config.isCatchIntermediateIOExceptions()) return false; if (!getAverageCharTolerance().equals(config.getAverageCharTolerance())) return false; if (!getSpacingTolerance().equals(config.getSpacingTolerance())) return false; if (!getOcrStrategy().equals(config.getOcrStrategy())) return false; if (getOcrImageType() != config.getOcrImageType()) return false; if (!getOcrImageFormatName().equals(config.getOcrImageFormatName())) return false; if (getExtractActions() != config.getExtractActions()) return false; return getAccessChecker().equals(config.getAccessChecker()); }
void doOCROnCurrentPage() throws IOException, TikaException, SAXException { if (config.getOcrStrategy().equals(NO_OCR)) { return;