/** * Return the document annotation. * * @param jCas * @return the document annotation */ protected DocumentAnnotation getDocumentAnnotation(JCas jCas) { return UimaSupport.getDocumentAnnotation(jCas); }
/** * Return the document annotation. * * @param jCas * @return the document annotation */ protected DocumentAnnotation getDocumentAnnotation(JCas jCas) { return UimaSupport.getDocumentAnnotation(jCas); }
/** * Return the document annotation. * * @param jCas * @return the document annotation */ protected DocumentAnnotation getDocumentAnnotation(JCas jCas) { return UimaSupport.getDocumentAnnotation(jCas); }
/** * Return the document annotation. * * @param jCas * @return the document annotation */ protected DocumentAnnotation getDocumentAnnotation(JCas jCas) { return UimaSupport.getDocumentAnnotation(jCas); }
/** * Returns the base filename from DocumentAnnotation source URI in the given JCas. * * <p>The basename is the main part of the filename, without extension or enclosing paths, e.g. * for path '/some/directory/SomeFile.txt' this method will return 'SomeFile'. * * @param jCas the {@link JCas} from which to get the document annotation. * @return the filename * @throws IllegalArgumentException if there is an error parsing the document source URI. */ public static String getDocumentSourceBaseName(final JCas jCas) { DocumentAnnotation documentAnnotation = UimaSupport.getDocumentAnnotation(jCas); String sourceUri = documentAnnotation.getSourceUri(); return FilenameUtils.getName(sourceUri); } }
private String getDocumentId(JCas jCas) { return ConsumerUtils.getExternalId( UimaSupport.getDocumentAnnotation(jCas), options.isContentHashAsId()); }
@Override public void doProcessStream(InputStream stream, String source, JCas jCas) throws IOException { DocumentAnnotation doc = UimaSupport.getDocumentAnnotation(jCas); doc.setSourceUri(source); doc.setTimestamp(System.currentTimeMillis()); // Add metadata item to capture which content extractor was used addMetadata(jCas, METADATA_KEY_CONTENT_EXTRACTOR, this.getClass().getName()); }
@Override public void doProcessStream(InputStream stream, String source, JCas jCas) throws IOException { DocumentAnnotation doc = UimaSupport.getDocumentAnnotation(jCas); doc.setSourceUri(source); doc.setTimestamp(System.currentTimeMillis()); // Add metadata item to capture which content extractor was used addMetadata(jCas, METADATA_KEY_CONTENT_EXTRACTOR, this.getClass().getName()); }
/** * Get (or create) the history associated with the document. * * @param jCas the target document * @return the history associated with the document */ public DocumentHistory getDocumentHistory(JCas jCas) { String documentId = pipelineName + ":" + getDocumentAnnotation(jCas).getHash(); return history.getHistory(documentId); }
/** * Get (or create) the history associated with the document. * * @param jCas the target document * @return the history associated with the document */ public DocumentHistory getDocumentHistory(JCas jCas) { String documentId = pipelineName + ":" + getDocumentAnnotation(jCas).getHash(); return history.getHistory(documentId); }
private String getDocumentId(JCas jCas) { return ConsumerUtils.getExternalId( UimaSupport.getDocumentAnnotation(jCas), options.isContentHashAsId()); }
/** * Serialise the JCas to a JSON map * * @param jCas to serialise * @return a JSON map representation * @throws IOException if the serialisation cannot be performed */ public Map<String, Object> serialise(final JCas jCas) { final Map<String, Object> output = new HashMap<>(); final DocumentAnnotation da = UimaSupport.getDocumentAnnotation(jCas); // Content and language output.put(JsonJCas.DOCUMENT_TEXT, jCas.getDocumentText()); if (!Strings.isNullOrEmpty(jCas.getDocumentLanguage())) { output.put(JsonJCas.DOCUMENT_LANGUAGE, jCas.getDocumentLanguage()); } // Document Annotation final Map<String, Object> documentAnnotation = serialiseDocumentAnnotation(da); output.put(JsonJCas.DOCUMENT_ANNOTATION, documentAnnotation); // Output all annotations final List<Map<String, Object>> annotationList = serialiseAnnotations(jCas); output.put(JsonJCas.ANNOTATIONS, annotationList); return output; }
/** * Reset id generation if new JCas. * * @param jCas the j cas * @return true, if reset */ public boolean resetIfNewJCas(final JCas jCas) { final DocumentAnnotation documentAnnotation = UimaSupport.getDocumentAnnotation(jCas); final String documentId = documentAnnotation.getHash(); final boolean isNewDocument = currentDocumentId == null || !currentDocumentId.equals(documentId); if (isNewDocument) { getMonitor().debug("Reset id cache for document {}", documentId); clearMappings(); currentDocumentId = documentId; } return isNewDocument; }
/** * Reset id generation if new JCas. * * @param jCas the j cas * @return true, if reset */ public boolean resetIfNewJCas(final JCas jCas) { final DocumentAnnotation documentAnnotation = UimaSupport.getDocumentAnnotation(jCas); final String documentId = documentAnnotation.getHash(); final boolean isNewDocument = currentDocumentId == null || !currentDocumentId.equals(documentId); if (isNewDocument) { getMonitor().debug("Reset id cache for document {}", documentId); clearMappings(); currentDocumentId = documentId; } return isNewDocument; }
@Override protected void apply(MucEntry entry, JCas jCas) { jCas.setDocumentLanguage("en"); jCas.setDocumentText(entry.getText()); UimaSupport.getDocumentAnnotation(jCas).setSourceUri(entry.getId()); }
@Override protected void apply(MucEntry entry, JCas jCas) { jCas.setDocumentLanguage("en"); jCas.setDocumentText(entry.getText()); UimaSupport.getDocumentAnnotation(jCas).setSourceUri(entry.getId()); }
@Test public void testGetDocumentAnnotation() { assertNotNull(UimaSupport.getDocumentAnnotation(jCas)); }
@SuppressWarnings("unchecked") private void assertMetadata(JCas jCas, Map<String, Object> variables) { DocumentAnnotation da = UimaSupport.getDocumentAnnotation(jCas); String documentId = ConsumerUtils.getExternalId(da, false); assertEquals(da.getDocType(), variables.get(FIELD_DOCUMENT_TYPE)); assertEquals(da.getSourceUri(), variables.get(FIELD_DOCUMENT_SOURCE)); assertEquals(da.getLanguage(), variables.get(FIELD_DOCUMENT_LANGUAGE)); assertEquals(new Date(da.getTimestamp()), variables.get(FIELD_DOCUMENT_TIMESTAMP)); assertEquals(da.getDocumentClassification(), variables.get(FIELD_DOCUMENT_CLASSIFICATION)); assertEquals( UimaTypesUtils.toList(da.getDocumentCaveats()), variables.get(FIELD_DOCUMENT_CAVEATS)); assertFalse(variables.containsKey(FIELD_DOCUMENT_RELEASABILITY)); Map<String, String> publishedId = ((List<Map<String, String>>) variables.get(FIELD_PUBLISHEDIDS)).get(0); assertEquals("12", publishedId.get(FIELD_PUBLISHEDIDS_ID)); assertEquals("test", publishedId.get(FIELD_PUBLISHEDIDS_TYPE)); Map<String, Collection<Object>> meta = (Map<String, Collection<Object>>) variables.get(FIELD_METADATA); assertTrue(meta.get("test").contains("1")); assertTrue(meta.get("test").contains("2")); assertEquals(2, meta.get("test").size()); assertNull(variables.get(FIELD_CONTENT)); assertEquals(documentId, variables.get("externalId")); }
/** Process a single body part */ private boolean processBody(JCas jCas, Body body, String sourceUri) throws IOException { if (body instanceof TextBody) { // Process plain text body processTextBody(jCas, (TextBody) body); // Add fields from parent for (Field f : body.getParent().getHeader().getFields()) { addMetadata(jCas, f.getName(), f.getBody()); } // Set up document annotation - this is done by the content extractor in other cases DocumentAnnotation doc = UimaSupport.getDocumentAnnotation(jCas); doc.setSourceUri(sourceUri); doc.setTimestamp(System.currentTimeMillis()); } else if (body instanceof BinaryBody) { processBinaryBody(jCas, (BinaryBody) body, sourceUri); } else if (body instanceof Multipart) { // Multipart message, so recurse Multipart mp = (Multipart) body; return processMultipart(jCas, mp, sourceUri); } else { // No body processed return false; } return true; }
@Test public void testExternalId() throws UIMAException { JCas jCas = JCasSingleton.getJCasInstance(); jCas.setDocumentText("Hello World"); DocumentAnnotation da = UimaSupport.getDocumentAnnotation(jCas); assertEquals( "a591a6d40bf420404a011733cfb7b190d62c65bf0bcda32b57b277d9ad9f146e", ConsumerUtils.getExternalId(da, true)); da.setSourceUri("http://www.example.com/test.html"); assertEquals( "b2e870534ee6fc1abc14feac22dcfd0b268460ac4205d9c3f68a000aab685f4f", ConsumerUtils.getExternalId(da, false)); }