public XSLFPowerPointExtractor(OPCPackage container) throws XmlException, OpenXML4JException, IOException { this(new XSLFSlideShow(container)); }
/** * Returns the low level slide object from * the supplied slide reference */ @Internal public CTSlide getSlide(CTSlideIdListEntry slide) throws IOException, XmlException { PackagePart slidePart = getSlidePart(slide); SldDocument slideDoc = SldDocument.Factory.parse(slidePart.getInputStream(), DEFAULT_XML_OPTIONS); return slideDoc.getSld(); }
public XSLFSlideShow(OPCPackage container) throws OpenXML4JException, IOException, XmlException { super(container); if(getCorePart().getContentType().equals(XSLFRelation.THEME_MANAGER.getContentType())) { rebase(getPackage()); } presentationDoc = PresentationDocument.Factory.parse(getCorePart().getInputStream(), DEFAULT_XML_OPTIONS); embedds = new LinkedList<>(); for (CTSlideIdListEntry ctSlide : getSlideReferences().getSldIdArray()) { PackagePart corePart = getCorePart(); PackagePart slidePart = corePart.getRelatedPart(corePart.getRelationship(ctSlide.getId2())); for(PackageRelationship rel : slidePart.getRelationshipsByType(OLE_OBJECT_REL_TYPE)) { if (TargetMode.EXTERNAL == rel.getTargetMode()) { continue; } // TODO: Add this reference to each slide as well embedds.add(slidePart.getRelatedPart(rel)); } for (PackageRelationship rel : slidePart.getRelationshipsByType(PACK_OBJECT_REL_TYPE)) { embedds.add(slidePart.getRelatedPart(rel)); } } } public XSLFSlideShow(String file) throws OpenXML4JException, IOException, XmlException {
XSLFSlideShow document = null; try { document = new XSLFSlideShow(extractor.getPackage()); } catch (Exception e) { throw new TikaException(e.getMessage()); // Shouldn't happen CTSlideIdList ctSlideIdList = document.getSlideReferences(); if (ctSlideIdList != null) { for (int i = 0; i < ctSlideIdList.sizeOfSldIdArray(); i++) { slidePart = document.getSlidePart(ctSlide); } catch (IOException e) { throw new TikaException("Broken OOXML file", e); parts.add(document.getPackagePart()); HANDOUT_MASTER}) { try { PackageRelationshipCollection prc = document.getPackagePart().getRelationshipsByType(rel); for (int i = 0; i < prc.size(); i++) { PackagePart pp = document.getPackagePart().getRelatedPart(prc.getRelationship(i)); if (pp != null) { parts.add(pp);
@Override public void parseContent(final MultivaluedMap<String, String> parameters, final Path filePath, final String extension, final String mimeType, final ParserResultBuilder resultBuilder) throws Exception { final XSLFSlideShow pptSlideShow = new XSLFSlideShow(filePath.toAbsolutePath().toString()); final XMLSlideShow slideshow = new XMLSlideShow(pptSlideShow.getPackage()); final ParserFieldsBuilder metas = resultBuilder.metas(); metas.set(MIME_TYPE, findMimeType(extension, mimeType, this::findMimeTypeUsingDefault)); // Extract metadata try (XSLFPowerPointExtractor poiExtractor = new XSLFPowerPointExtractor(slideshow)) { final CoreProperties info = poiExtractor.getCoreProperties(); if (info != null) { metas.add(TITLE, info.getTitle()); metas.add(CREATOR, info.getCreator()); metas.add(SUBJECT, info.getSubject()); metas.add(DESCRIPTION, info.getDescription()); metas.add(KEYWORDS, info.getKeywords()); metas.add(CREATION_DATE, info.getCreated()); metas.add(MODIFICATION_DATE, info.getModified()); } } extractSides(slideshow, resultBuilder); }
public XSLFPowerPointExtractor(XSLFSlideShow slideShow) { this(new XMLSlideShow(slideShow.getPackage())); }
/** * Returns the references from the presentation to its * slides. * You'll need these to figure out the slide ordering, * and to get at the actual slides themselves */ @Internal public CTSlideIdList getSlideReferences() { if(! getPresentation().isSetSldIdLst()) { getPresentation().setSldIdLst(CTSlideIdList.Factory.newInstance()); } return getPresentation().getSldIdLst(); }
public PackagePart getSlidePart(CTSlideIdListEntry slide) throws IOException, XmlException { try { PackagePart corePart = getCorePart(); return corePart.getRelatedPart(corePart.getRelationship(slide.getId2())); } catch(InvalidFormatException e) { throw new XmlException(e); } } /**
/** * Returns the low level slide master object from * the supplied slide master reference */ @Internal public CTSlideMaster getSlideMaster(CTSlideMasterIdListEntry master) throws IOException, XmlException { PackagePart masterPart = getSlideMasterPart(master); SldMasterDocument masterDoc = SldMasterDocument.Factory.parse(masterPart.getInputStream(), DEFAULT_XML_OPTIONS); return masterDoc.getSldMaster(); }
/** * Returns the low level notes object for the given * slide, as found from the supplied slide reference */ @Internal public CTNotesSlide getNotes(CTSlideIdListEntry slide) throws IOException, XmlException { PackagePart notesPart = getNodesPart(slide); if(notesPart == null) return null; NotesDocument notesDoc = NotesDocument.Factory.parse(notesPart.getInputStream(), DEFAULT_XML_OPTIONS); return notesDoc.getNotes(); }
XSLFSlideShow document = null; try { document = new XSLFSlideShow(extractor.getPackage()); } catch (Exception e) { throw new TikaException(e.getMessage()); // Shouldn't happen CTSlideIdList ctSlideIdList = document.getSlideReferences(); if (ctSlideIdList != null) { for (int i = 0; i < ctSlideIdList.sizeOfSldIdArray(); i++) { slidePart = document.getSlidePart(ctSlide); } catch (IOException e) { throw new TikaException("Broken OOXML file", e); parts.add(document.getPackagePart()); HANDOUT_MASTER}) { try { PackageRelationshipCollection prc = document.getPackagePart().getRelationshipsByType(rel); for (int i = 0; i < prc.size(); i++) { PackagePart pp = document.getPackagePart().getRelatedPart(prc.getRelationship(i)); if (pp != null) { parts.add(pp);
public XSLFPowerPointExtractor(XSLFSlideShow slideShow) { this(new XMLSlideShow(slideShow.getPackage())); }
/** * Returns the references from the presentation to its * slide masters. * You'll need these to get at the actual slide * masters themselves */ @Internal public CTSlideMasterIdList getSlideMasterReferences() { return getPresentation().getSldMasterIdLst(); }
public PackagePart getSlideMasterPart(CTSlideMasterIdListEntry master) throws IOException, XmlException { try { PackagePart corePart = getCorePart(); return corePart.getRelatedPart( corePart.getRelationship(master.getId2()) ); } catch(InvalidFormatException e) { throw new XmlException(e); } } /**
/** * Returns the low level slide master object from * the supplied slide master reference */ @Internal public CTSlideMaster getSlideMaster(CTSlideMasterIdListEntry master) throws IOException, XmlException { PackagePart masterPart = getSlideMasterPart(master); SldMasterDocument masterDoc = SldMasterDocument.Factory.parse(masterPart.getInputStream(), DEFAULT_XML_OPTIONS); return masterDoc.getSldMaster(); }
/** * Returns the low level notes object for the given * slide, as found from the supplied slide reference */ @Internal public CTNotesSlide getNotes(CTSlideIdListEntry slide) throws IOException, XmlException { PackagePart notesPart = getNodesPart(slide); if(notesPart == null) return null; NotesDocument notesDoc = NotesDocument.Factory.parse(notesPart.getInputStream(), DEFAULT_XML_OPTIONS); return notesDoc.getNotes(); }
XSLFSlideShow document = null; try { document = new XSLFSlideShow(extractor.getPackage()); } catch (Exception e) { throw new TikaException(e.getMessage()); // Shouldn't happen CTSlideIdList ctSlideIdList = document.getSlideReferences(); if (ctSlideIdList != null) { for (int i = 0; i < ctSlideIdList.sizeOfSldIdArray(); i++) { slidePart = document.getSlidePart(ctSlide); } catch (IOException e) { throw new TikaException("Broken OOXML file", e); parts.add(document.getPackagePart()); HANDOUT_MASTER}) { try { PackageRelationshipCollection prc = document.getPackagePart().getRelationshipsByType(rel); for (int i = 0; i < prc.size(); i++) { PackagePart pp = document.getPackagePart().getRelatedPart(prc.getRelationship(i)); if (pp != null) { parts.add(pp);
public static void main(String[] args) throws Exception { if (args.length < 1) { System.err.println("Use:"); System.err.println(" XSLFPowerPointExtractor <filename.pptx>"); System.exit(1); } POIXMLTextExtractor extractor = new XSLFPowerPointExtractor( new XSLFSlideShow(args[0])); System.out.println(extractor.getText()); extractor.close(); }
public XSLFSlideShow(OPCPackage container) throws OpenXML4JException, IOException, XmlException { super(container); if(getCorePart().getContentType().equals(XSLFRelation.THEME_MANAGER.getContentType())) { rebase(getPackage()); } presentationDoc = PresentationDocument.Factory.parse(getCorePart().getInputStream(), DEFAULT_XML_OPTIONS); embedds = new LinkedList<>(); for (CTSlideIdListEntry ctSlide : getSlideReferences().getSldIdArray()) { PackagePart corePart = getCorePart(); PackagePart slidePart = corePart.getRelatedPart(corePart.getRelationship(ctSlide.getId2())); for(PackageRelationship rel : slidePart.getRelationshipsByType(OLE_OBJECT_REL_TYPE)) { if (TargetMode.EXTERNAL == rel.getTargetMode()) { continue; } // TODO: Add this reference to each slide as well embedds.add(slidePart.getRelatedPart(rel)); } for (PackageRelationship rel : slidePart.getRelationshipsByType(PACK_OBJECT_REL_TYPE)) { embedds.add(slidePart.getRelatedPart(rel)); } } } public XSLFSlideShow(String file) throws OpenXML4JException, IOException, XmlException {
/** * Gets the PackagePart of the notes for the * given slide, or null if there isn't one. */ public PackagePart getNodesPart(CTSlideIdListEntry parentSlide) throws IOException, XmlException { PackageRelationshipCollection notes; PackagePart slidePart = getSlidePart(parentSlide); try { notes = slidePart.getRelationshipsByType(XSLFRelation.NOTES.getRelation()); } catch(InvalidFormatException e) { throw new IllegalStateException(e); } if(notes.size() == 0) { // No notes for this slide return null; } if(notes.size() > 1) { throw new IllegalStateException("Expecting 0 or 1 notes for a slide, but found " + notes.size()); } try { return slidePart.getRelatedPart(notes.getRelationship(0)); } catch(InvalidFormatException e) { throw new IllegalStateException(e); } } /**