@Override public void execute(ExtractByOutlineParameters parameters) throws TaskException { for(PdfSource<?> source: parameters.getSourceList()) { LOG.debug("Opening {} ", source); document = source.open(documentLoader).getUnderlyingPDDocument(); LOG.debug("Retrieving outline information for level {} and match regex {}", parameters.getLevel(), parameters.getMatchingTitleRegEx()); OutlineExtractPageDestinations pagesDestination = new SamboxOutlineLevelsHandler(document, parameters.getMatchingTitleRegEx()).getExtractPageDestinations(parameters.getLevel(), parameters.isIncludePageAfter()); LOG.debug("Starting extraction by outline, level {} and match regex {}", parameters.getLevel(), parameters.getMatchingTitleRegEx()); new PageDestinationsLevelPdfExtractor(document, parameters, pagesDestination, source) .extract(executionContext()); nullSafeCloseQuietly(document); } LOG.debug("Extraction completed and outputs written to {}", parameters.getOutput()); }
private ExtractByOutlineParameters setUpParameters(int level, String sourceFile, String regEx) { ExtractByOutlineParameters parameters = new ExtractByOutlineParameters(level); parameters.setMatchingTitleRegEx(regEx); parameters.setCompress(true); parameters.setVersion(PdfVersion.VERSION_1_6); parameters.addSource(customInput(sourceFile, "file1.pdf")); parameters.setOutputPrefix("[FILENUMBER]_[BOOKMARK_NAME_STRICT]"); parameters.setExistingOutputPolicy(ExistingOutputPolicy.OVERWRITE); return parameters; }
@Override public boolean equals(Object other) { if (this == other) { return true; } if (!(other instanceof ExtractByOutlineParameters)) { return false; } ExtractByOutlineParameters parameter = (ExtractByOutlineParameters) other; return new EqualsBuilder().appendSuper(super.equals(other)) .append(optimizationPolicy, parameter.optimizationPolicy) .append(discardOutline, parameter.discardOutline).append(level, parameter.getLevel()) .append(matchingTitleRegEx, parameter.getMatchingTitleRegEx()) .append(includePageAfter, parameter.isIncludePageAfter()) .isEquals(); } }
public void extract(TaskExecutionContext executionContext) throws TaskException { int outputDocumentsCounter = 0; this.outputWriter = OutputWriters.newMultipleOutputWriter(parameters.getExistingOutputPolicy(), executionContext); try (PagesExtractor extractor = new PagesExtractor(document)) { boolean optimize = new OptimizationRuler(parameters.getOptimizationPolicy()).apply(document); File tmpFile = createTemporaryBuffer(parameters.getOutput()); LOG.debug("Created output temporary buffer {}", tmpFile); String outName = nameGenerator(parameters.getOutputPrefix()) .generate(nameRequest().page(page).originalName(source.getName()) .fileNumber(outputDocumentsCounter).bookmark(section.title)); extractor.setVersion(parameters.getVersion()); extractor.setCompress(parameters.isCompress()); if (optimize) { extractor.optimize(); extractor.save(tmpFile, parameters.discardOutline()); extractor.reset(); LOG.debug("Ending extracting {}", section.title); parameters.getOutput().accept(outputWriter);
@Override public ExtractByOutlineParameters toTaskParameters(ExtractByBookmarksTaskCliArguments taskCliArguments) { ExtractByOutlineParameters parameters = new ExtractByOutlineParameters( taskCliArguments.getBookmarkLevel()); if (taskCliArguments.isMatchingRegEx()) { parameters.setMatchingTitleRegEx(taskCliArguments.getMatchingRegEx()); } populateAbstractParameters(parameters, taskCliArguments); populateSourceParameters(parameters, taskCliArguments); populateOutputTaskParameters(parameters, taskCliArguments); populateOutputPrefix(parameters, taskCliArguments); populateOptimizableOutputParameters(parameters, taskCliArguments); populateDiscardableOutlineParameters(parameters, taskCliArguments); return parameters; } }
@Test public void matchingRegExp_Specified() { ExtractByOutlineParameters parameters = defaultCommandLine().with("--matchingRegEx", "[Chapter*]") .invokeSejdaConsole(); assertEquals("[Chapter*]", parameters.getMatchingTitleRegEx()); }
@Test public void bookmarksLevel_Specified() { ExtractByOutlineParameters parameters = defaultCommandLine().with("-l", "3").invokeSejdaConsole(); assertEquals(3, parameters.getLevel()); }
@Test public void dontDiscardOutline() { ExtractByOutlineParameters parameters = defaultCommandLine().invokeSejdaConsole(); assertFalse(parameters.discardOutline()); } }
@Test public void optimizedNo() { ExtractByOutlineParameters parameters = defaultCommandLine().with("-z", "no").invokeSejdaConsole(); assertEquals(OptimizationPolicy.NO, parameters.getOptimizationPolicy()); }
@Test public void testBatchFilesWithConflictingOutputFiles() throws IOException { ExtractByOutlineParameters parameters = setUpParameters(2); parameters.addSource(customInput("pdf/extract_by_outline_sample.pdf", "file2.pdf")); parameters.setOutputPrefix("[BASENAME]_[FILENUMBER]_[BOOKMARK_NAME_STRICT]"); testContext.directoryOutputTo(parameters); execute(parameters); testContext.assertOutputContainsFilenames("file1_1_Invoking Maven.pdf", "file2_1_Invoking Maven.pdf"); testContext.assertOutputSize(36); }
public void extract(TaskExecutionContext executionContext) throws TaskException { int outputDocumentsCounter = 0; this.outputWriter = OutputWriters.newMultipleOutputWriter(parameters.getExistingOutputPolicy(), executionContext); try (PagesExtractor extractor = new PagesExtractor(document)) { boolean optimize = new OptimizationRuler(parameters.getOptimizationPolicy()).apply(document); File tmpFile = createTemporaryBuffer(parameters.getOutput()); LOG.debug("Created output temporary buffer {}", tmpFile); String outName = nameGenerator(parameters.getOutputPrefix()) .generate(nameRequest().page(page).originalName(source.getName()) .fileNumber(outputDocumentsCounter).bookmark(section.title)); extractor.setVersion(parameters.getVersion()); extractor.setCompress(parameters.isCompress()); if (optimize) { extractor.optimize(); extractor.save(tmpFile, parameters.discardOutline()); extractor.reset(); LOG.debug("Ending extracting {}", section.title); parameters.getOutput().accept(outputWriter);
@Override public boolean equals(Object other) { if (this == other) { return true; } if (!(other instanceof ExtractByOutlineParameters)) { return false; } ExtractByOutlineParameters parameter = (ExtractByOutlineParameters) other; return new EqualsBuilder().appendSuper(super.equals(other)) .append(optimizationPolicy, parameter.optimizationPolicy) .append(discardOutline, parameter.discardOutline).append(level, parameter.getLevel()) .append(matchingTitleRegEx, parameter.getMatchingTitleRegEx()) .append(includePageAfter, parameter.isIncludePageAfter()) .isEquals(); } }
@Test public void discardOutline() { ExtractByOutlineParameters parameters = defaultCommandLine().withFlag("--discardOutline").invokeSejdaConsole(); assertTrue(parameters.discardOutline()); }
@Test public void optimizedYes() { ExtractByOutlineParameters parameters = defaultCommandLine().with("-z", "yes").invokeSejdaConsole(); assertEquals(OptimizationPolicy.YES, parameters.getOptimizationPolicy()); }
@Override public void execute(ExtractByOutlineParameters parameters) throws TaskException { for(PdfSource<?> source: parameters.getSourceList()) { LOG.debug("Opening {} ", source); document = source.open(documentLoader).getUnderlyingPDDocument(); LOG.debug("Retrieving outline information for level {} and match regex {}", parameters.getLevel(), parameters.getMatchingTitleRegEx()); OutlineExtractPageDestinations pagesDestination = new SamboxOutlineLevelsHandler(document, parameters.getMatchingTitleRegEx()).getExtractPageDestinations(parameters.getLevel(), parameters.isIncludePageAfter()); LOG.debug("Starting extraction by outline, level {} and match regex {}", parameters.getLevel(), parameters.getMatchingTitleRegEx()); new PageDestinationsLevelPdfExtractor(document, parameters, pagesDestination, source) .extract(executionContext()); nullSafeCloseQuietly(document); } LOG.debug("Extraction completed and outputs written to {}", parameters.getOutput()); }