private static OCR_STRATEGY parse(String s) { if (s == null) { return NO_OCR; } else if ("no_ocr".equals(s.toLowerCase(Locale.ROOT))) { return NO_OCR; } else if ("ocr_only".equals(s.toLowerCase(Locale.ROOT))) { return OCR_ONLY; } else if (s.toLowerCase(Locale.ROOT).contains("ocr_and_text")) { return OCR_AND_TEXT_EXTRACTION; } StringBuilder sb = new StringBuilder(); sb.append("I regret that I don't recognize '").append(s); sb.append("' as an OCR_STRATEGY. I only recognize:"); int i = 0; for (OCR_STRATEGY strategy : OCR_STRATEGY.values()) { if (i++ > 0) { sb.append(", "); } sb.append(strategy.toString()); } throw new IllegalArgumentException(sb.toString()); } }
if (shouldHandleXFAOnly(pdfDocument, localConfig)) { handleXFAOnly(pdfDocument, handler, metadata, context); } else if (localConfig.getOcrStrategy().equals(PDFPureJavaParserConfig.OCR_STRATEGY.OCR_ONLY)) { metadata.add("X-Parsed-By", "org.apache.tika.parser.ocr.TesseractOCRParser"); if (localConfig.getOcrStrategy().equals(PDFPureJavaParserConfig.OCR_STRATEGY.OCR_AND_TEXT_EXTRACTION)) { metadata.add("X-Parsed-By", "org.apache.tika.parser.ocr.TesseractOCRParser");
@Override public boolean equals(Object o) { if (this == o) return true; if (!(o instanceof PDFPureJavaParserConfig)) return false; PDFPureJavaParserConfig config = (PDFPureJavaParserConfig) o; if (getEnableAutoSpace() != config.getEnableAutoSpace()) return false; if (getSuppressDuplicateOverlappingText() != config.getSuppressDuplicateOverlappingText()) return false; if (getExtractAnnotationText() != config.getExtractAnnotationText()) return false; if (getSortByPosition() != config.getSortByPosition()) return false; if (getExtractAcroFormContent() != config.getExtractAcroFormContent()) return false; if (getExtractInlineImages() != config.getExtractInlineImages()) return false; if (getExtractUniqueInlineImagesOnly() != config.getExtractUniqueInlineImagesOnly()) return false; if (getIfXFAExtractOnlyXFA() != config.getIfXFAExtractOnlyXFA()) return false; if (getOcrDPI() != config.getOcrDPI()) return false; if (isCatchIntermediateIOExceptions() != config.isCatchIntermediateIOExceptions()) return false; if (!getAverageCharTolerance().equals(config.getAverageCharTolerance())) return false; if (!getSpacingTolerance().equals(config.getSpacingTolerance())) return false; if (!getOcrStrategy().equals(config.getOcrStrategy())) return false; if (getOcrImageType() != config.getOcrImageType()) return false; if (!getOcrImageFormatName().equals(config.getOcrImageFormatName())) return false; if (getExtractActions() != config.getExtractActions()) return false; return getAccessChecker().equals(config.getAccessChecker()); }
@Override public int hashCode() { int result = (getEnableAutoSpace() ? 1 : 0); result = 31 * result + (getSuppressDuplicateOverlappingText() ? 1 : 0); result = 31 * result + (getExtractAnnotationText() ? 1 : 0); result = 31 * result + (getSortByPosition() ? 1 : 0); result = 31 * result + (getExtractAcroFormContent() ? 1 : 0); result = 31 * result + (getExtractInlineImages() ? 1 : 0); result = 31 * result + (getExtractUniqueInlineImagesOnly() ? 1 : 0); result = 31 * result + getAverageCharTolerance().hashCode(); result = 31 * result + getSpacingTolerance().hashCode(); result = 31 * result + (getIfXFAExtractOnlyXFA() ? 1 : 0); result = 31 * result + ocrStrategy.hashCode(); result = 31 * result + getOcrDPI(); result = 31 * result + getOcrImageType().hashCode(); result = 31 * result + getOcrImageFormatName().hashCode(); result = 31 * result + getAccessChecker().hashCode(); result = 31 * result + (isCatchIntermediateIOExceptions() ? 1 : 0); result = 31 * result + (getExtractActions() ? 1 : 0); return result; }