for (String val : metadata.getValues(key)) { if (dataBuilder.length() > 1) { dataBuilder.append(", ");
public void outputMetadata(String[] names) { for (String name : names) { for(String value : metadata.getValues(name)) { writer.println(name + ": " + value); } } }
int countMetadataValues(Metadata m) { if (m == null) { return 0; } int i = 0; for (String n : m.names()) { i += m.getValues(n).length; } return i; }
public static void addMulti(Metadata metadata, Property property, String string) { if (string == null) { return; } String[] parts = string.split(";"); String[] current = metadata.getValues(property); Set<String> seen = new HashSet<>(); if (current != null) { for (String val : current) { seen.add(val); } } for (String part : parts) { if (! seen.contains(part)) { metadata.add(property, part); seen.add(part); } } }
public static void metadataToCsv(Metadata metadata, OutputStream outputStream) throws IOException { CSVWriter writer = new CSVWriter(new OutputStreamWriter(outputStream, UTF_8)); for (String name : metadata.names()) { String[] values = metadata.getValues(name); ArrayList<String> list = new ArrayList<>(values.length + 1); list.add(name); list.addAll(Arrays.asList(values)); writer.writeNext(list.toArray(values)); } writer.close(); }
@Override protected void addMetadata(String value) { LOG.trace("adding {}={}", name, value); if (targetProperty != null && targetProperty.isMultiValuePermitted()) { if ((value != null && value.length() > 0) || allowEmptyValues) { if (value == null || value.length() == 0 && allowEmptyValues) { value = ""; } String[] previous = metadata.getValues(name); if (previous == null || !Arrays.asList(previous).contains(value) || allowDuplicateValues) { metadata.add(targetProperty, value); } } } else { super.addMetadata(value); } } }
@Override @SuppressWarnings("resource") public void writeTo(Metadata metadata, Class<?> type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap<String, Object> httpHeaders, OutputStream entityStream) throws IOException, WebApplicationException { CSVWriter writer = new CSVWriter(new OutputStreamWriter(entityStream, UTF_8)); for (String name : metadata.names()) { String[] values = metadata.getValues(name); ArrayList<String> list = new ArrayList<String>(values.length + 1); list.add(name); list.addAll(Arrays.asList(values)); writer.writeNext(list.toArray(values)); } // Don't close, just flush the stream writer.flush(); } }
public void indexContentSpecificMet(File file) throws Exception { Metadata met = new Metadata(); try (InputStream is = new FileInputStream(file)) { tika.parse(is, met); Document document = new Document(); for (String key : met.names()) { String[] values = met.getValues(key); for (String val : values) { document.add(new TextField(key, val, Store.YES)); } writer.addDocument(document); } } }
public void add(Metadata metadata) throws IOException { if (!hasStartedArray) { jsonWriter.beginArray(); hasStartedArray = true; } String[] names = metadata.names(); Arrays.sort(names); jsonWriter.beginObject(); for (String n : names) { jsonWriter.name(n); String[] values = metadata.getValues(n); if (values.length == 1) { jsonWriter.value(values[0]); } else { jsonWriter.beginArray(); for (String v : values) { jsonWriter.value(v); } jsonWriter.endArray(); } } jsonWriter.endObject(); }
public static void debug(Metadata metadata) { for (String n : metadata.names()) { for (String v : metadata.getValues(n)) { System.out.println(n + " : "+v); } } } }
public static void debug(List<Metadata> list) { int i = 0; for (Metadata m : list) { for (String n : m.names()) { for (String v : m.getValues(n)) { System.out.println(i + ": "+n + " : "+v); } } i++; } }
public static void main(String[] args) throws IOException, SAXException, TikaException { DirListParser parser = new DirListParser(); Metadata met = new Metadata(); parser.parse(System.in, new BodyContentHandler(), met); System.out.println("Num files: " + met.getValues("Filename").length); System.out.println("Num executables: " + met.get("NumExecutables")); }
private void parseAssay(XHTMLContentHandler xhtml, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { for (String assayFileName : metadata.getValues(studyAssayFileNameField)) { xhtml.startElement("div"); xhtml.element("h3", "ASSAY " + assayFileName); InputStream stream = TikaInputStream.get(new File(this.location + assayFileName)); ISATabUtils.parseAssay(stream, xhtml, metadata, context); stream.close(); xhtml.endElement("div"); } } }
private byte[] toString(ContentHandler contentHandler, Metadata metadata) { StringBuilder sb = new StringBuilder(); for (String n : metadata.names()) { for (String v : metadata.getValues(n)) { sb.append(n).append(" : ").append(v).append("\n");; } } if (! contentHandler.getClass().equals(DefaultHandler.class)) { sb.append("\n"); sb.append("CONTENT: "+ contentHandler.toString()); sb.append("\n\n"); } return sb.toString().getBytes(StandardCharsets.UTF_8); } }
public static void process(Path path) throws Exception { Parser parser = new AutoDetectParser(); Metadata metadata = new Metadata(); // The PhoneExtractingContentHandler will examine any characters for phone numbers before passing them // to the underlying Handler. PhoneExtractingContentHandler handler = new PhoneExtractingContentHandler(new BodyContentHandler(), metadata); try (InputStream stream = new BufferedInputStream(Files.newInputStream(path))) { parser.parse(stream, handler, metadata, new ParseContext()); } String[] numbers = metadata.getValues("phonenumbers"); Collections.addAll(phoneNumbers, numbers); } }
public static void process(Path path) throws Exception { Parser parser = new AutoDetectParser(); Metadata metadata = new Metadata(); // The StandardsExtractingContentHandler will examine any characters for // standard references before passing them // to the underlying Handler. StandardsExtractingContentHandler handler = new StandardsExtractingContentHandler(new BodyContentHandler(-1), metadata); handler.setThreshold(0.75); try (InputStream stream = new BufferedInputStream(Files.newInputStream(path))) { parser.parse(stream, handler, metadata, new ParseContext()); } String[] references = metadata.getValues(StandardsExtractingContentHandler.STANDARD_REFERENCES); Collections.addAll(standardReferences, references); } }
/** * Does a deep clone of a Metadata object. */ public static Metadata cloneMetadata(Metadata m) { Metadata clone = new Metadata(); for (String n : m.names()){ if (! m.isMultiValued(n)) { clone.set(n, m.get(n)); } else { String[] vals = m.getValues(n); for (int i = 0; i < vals.length; i++) { clone.add(n, vals[i]); } } } return clone; }
@Test public void testBasic() throws Exception { ExtractReader extractReader = new ExtractReader(); List<Metadata> metadataList = extractReader.loadExtract(testJsonFile); assertEquals(2, metadataList.size()); assertEquals(1, metadataList.get(0).getValues(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT).length); assertEquals(1, metadataList.get(1).getValues(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT).length); assertContains("fox", metadataList.get(0).get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT)); assertContains("attachment", metadataList.get(1).get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT)); extractReader = new ExtractReader(ExtractReader.ALTER_METADATA_LIST.FIRST_ONLY); metadataList = extractReader.loadExtract(testJsonFile); assertEquals(1, metadataList.size()); assertEquals(1, metadataList.get(0).getValues(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT).length); assertContains("fox", metadataList.get(0).get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT)); assertNotContained("attachment", metadataList.get(0).get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT)); extractReader = new ExtractReader(ExtractReader.ALTER_METADATA_LIST.CONCATENATE_CONTENT_INTO_FIRST); metadataList = extractReader.loadExtract(testJsonFile); assertEquals(1, metadataList.size()); assertEquals(1, metadataList.get(0).getValues(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT).length); assertContains("fox", metadataList.get(0).get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT)); assertContains("attachment", metadataList.get(0).get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT)); }
/** Test for the <code>set(String, String)</code> method. */ @Test public void testSet() { String[] values = null; Metadata meta = new Metadata(); values = meta.getValues(CONTENTTYPE); assertEquals(0, values.length); meta.set(CONTENTTYPE, "value1"); values = meta.getValues(CONTENTTYPE); assertEquals(1, values.length); assertEquals("value1", values[0]); meta.set(CONTENTTYPE, "value2"); values = meta.getValues(CONTENTTYPE); assertEquals(1, values.length); assertEquals("value2", values[0]); meta.set(CONTENTTYPE, "new value 1"); meta.add("contenttype", "new value 2"); values = meta.getValues(CONTENTTYPE); assertEquals(2, values.length); assertEquals("new value 1", values[0]); assertEquals("new value 2", values[1]); }
@Test public void testTextBasic() throws IOException { ExtractReader extractReader = new ExtractReader(); List<Metadata> metadataList = extractReader.loadExtract(testTxtFile); assertEquals(1, metadataList.size()); Metadata m = metadataList.get(0); assertEquals(1, m.getValues(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT).length); assertEquals("the quick brown fox fox fox jumped over the lazy lazy dog\n", m.get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT)); //test that the mime is inferred from the file extension assertEquals("application/msword", m.get(Metadata.CONTENT_TYPE)); }