/** * This will print the documents data to System.out. * * @param document The document to get the metadata from. * * @throws IOException If there is an error getting the page count. */ public void printMetadata( PDDocument document ) throws IOException { PDDocumentInformation info = document.getDocumentInformation(); PDDocumentCatalog cat = document.getDocumentCatalog(); PDMetadata metadata = cat.getMetadata(); System.out.println( "Page Count=" + document.getNumberOfPages() ); System.out.println( "Title=" + info.getTitle() ); System.out.println( "Author=" + info.getAuthor() ); System.out.println( "Subject=" + info.getSubject() ); System.out.println( "Keywords=" + info.getKeywords() ); System.out.println( "Creator=" + info.getCreator() ); System.out.println( "Producer=" + info.getProducer() ); System.out.println( "Creation Date=" + formatDate( info.getCreationDate() ) ); System.out.println( "Modification Date=" + formatDate( info.getModificationDate() ) ); System.out.println( "Trapped=" + info.getTrapped() ); if( metadata != null ) { String string = new String( metadata.toByteArray(), "ISO-8859-1" ); System.out.println( "Metadata=" + string ); } }
String subject = dico.getSubject(); if (subject != null)
private static void showDocumentInformation(PDDocumentInformation information) { display("Title:", information.getTitle()); display("Subject:", information.getSubject()); display("Author:", information.getAuthor()); display("Creator:", information.getCreator()); display("Producer:", information.getProducer()); }
@Override public String getSubject() { return this.info.getSubject(); }
@Override public String getSubject() { if (info != null) { return info.getSubject(); } return null; }
addTextField(document, "ModificationDate", info.getModificationDate()); addTextField(document, "Producer", info.getProducer()); addTextField(document, "Subject", info.getSubject()); addTextField(document, "Title", info.getTitle()); addTextField(document, "Trapped", info.getTrapped());
dcSchema.setTitle( info.getTitle() ); dcSchema.addCreator( "PDFBox" ); dcSchema.setDescription( info.getSubject() );
extractMultilingualItems(metadata, TikaCoreProperties.DESCRIPTION, null, dcSchema); addMetadata(metadata, PDF.DOC_INFO_SUBJECT, info.getSubject()); addMetadata(metadata, TikaCoreProperties.SUBJECT, info.getSubject()); addMetadata(metadata, OfficeOpenXMLCore.SUBJECT, info.getSubject());
/** * This will print the documents data to System.out. * * @param document The document to get the metadata from. * * @throws IOException If there is an error getting the page count. */ public void printMetadata( PDDocument document ) throws IOException { PDDocumentInformation info = document.getDocumentInformation(); PDDocumentCatalog cat = document.getDocumentCatalog(); PDMetadata metadata = cat.getMetadata(); System.out.println( "Page Count=" + document.getNumberOfPages() ); System.out.println( "Title=" + info.getTitle() ); System.out.println( "Author=" + info.getAuthor() ); System.out.println( "Subject=" + info.getSubject() ); System.out.println( "Keywords=" + info.getKeywords() ); System.out.println( "Creator=" + info.getCreator() ); System.out.println( "Producer=" + info.getProducer() ); System.out.println( "Creation Date=" + formatDate( info.getCreationDate() ) ); System.out.println( "Modification Date=" + formatDate( info.getModificationDate() ) ); System.out.println( "Trapped=" + info.getTrapped() ); if( metadata != null ) { String string = new String( metadata.toByteArray(), "ISO-8859-1" ); System.out.println( "Metadata=" + string ); } }
String subject = dico.getSubject(); if (subject != null)
String subject = dico.getSubject(); if (subject != null)
private static void showDocumentInformation(PDDocumentInformation information) { display("Title:", information.getTitle()); display("Subject:", information.getSubject()); display("Author:", information.getAuthor()); display("Creator:", information.getCreator()); display("Producer:", information.getProducer()); }
void processDocumentInformation(PDDocument document, Metadata metadata) { PDDocumentInformation documentInformation = document.getDocumentInformation(); if (documentInformation == null) { return; } metadata.add("title", documentInformation.getTitle()); metadata.add("subject", documentInformation.getSubject()); metadata.add("author", documentInformation.getAuthor()); metadata.add("creator", documentInformation.getCreator()); metadata.add("producer", documentInformation.getProducer()); }
addTextField(document, "ModificationDate", info.getModificationDate()); addTextField(document, "Producer", info.getProducer()); addTextField(document, "Subject", info.getSubject()); addTextField(document, "Title", info.getTitle()); addTextField(document, "Trapped", info.getTrapped());
value = docinfo.getSubject(); if (value != null) { itemService
dcSchema.setTitle( info.getTitle() ); dcSchema.addCreator( "PDFBox" ); dcSchema.setDescription( info.getSubject() );
subject = checkNotNull(docInfo.getSubject()); title = checkNotNull(docInfo.getTitle()); permissions = pdfDoc.getCurrentAccessPermission();
private void extractMetaData(final PDDocument pdf, final ParserFieldsBuilder metas) { metas.set(MIME_TYPE, DEFAULT_MIMETYPES[0]); final PDDocumentInformation info = pdf.getDocumentInformation(); if (info != null) { metas.add(TITLE, info.getTitle()); metas.add(SUBJECT, info.getSubject()); metas.add(AUTHOR, info.getAuthor()); metas.add(PRODUCER, info.getProducer()); metas.add(KEYWORDS, info.getKeywords()); metas.add(CREATION_DATE, info.getCreationDate()); metas.add(MODIFICATION_DATE, info.getModificationDate()); } int pages = pdf.getNumberOfPages(); metas.add(NUMBER_OF_PAGES, pages); PDDocumentCatalog catalog = pdf.getDocumentCatalog(); if (catalog != null) metas.add(LANGUAGE, catalog.getLanguage()); }
extractMultilingualItems(metadata, TikaCoreProperties.DESCRIPTION, null, dcSchema); addMetadata(metadata, PDF.DOC_INFO_SUBJECT, info.getSubject()); addMetadata(metadata, TikaCoreProperties.TRANSITION_SUBJECT_TO_OO_SUBJECT, info.getSubject()); addMetadata(metadata, "trapped", info.getTrapped()); addMetadata(metadata, PDF.DOC_INFO_TRAPPED, info.getTrapped());
modificationDate = metadata.getModificationDate(); producer = metadata.getProducer(); subject = metadata.getSubject(); title = metadata.getTitle();