private Map<String, Object> serialiseDocumentAnnotation(final DocumentAnnotation da) { final Map<String, Object> map = new HashMap<>(); map.put(JsonJCas.DA_DOCUMENT_TYPE, da.getDocType()); map.put(JsonJCas.DA_LANGUAGE, da.getLanguage()); map.put(JsonJCas.DA_SOURCE_URI, da.getSourceUri()); map.put(JsonJCas.DA_CLASSIFICATION, da.getDocumentClassification()); final String[] caveats = da.getDocumentCaveats() != null ? da.getDocumentCaveats().toArray() : new String[0]; map.put(JsonJCas.DA_CAVEATS, caveats); final String[] rels = da.getDocumentReleasability() != null ? da.getDocumentReleasability().toArray() : new String[0]; map.put(JsonJCas.DA_RELEASABILITY, rels); return map; }
private void addDocumentAnnotationToProperties( final Map<String, Object> properties, final DocumentAnnotation da) { properties.put(AnalysisConstants.DOCUMENT_TYPE, da.getDocType()); properties.put(AnalysisConstants.CAVEATS, UimaTypesUtils.toList(da.getDocumentCaveats())); properties.put(AnalysisConstants.CLASSIFICATION, da.getDocumentClassification()); properties.put( AnalysisConstants.RELEASABILITY, UimaTypesUtils.toList(da.getDocumentReleasability())); properties.put(AnalysisConstants.LANGUAGE, da.getLanguage()); properties.put(AnalysisConstants.HASH, da.getHash()); properties.put(AnalysisConstants.SOURCE, da.getSourceUri()); properties.put(AnalysisConstants.TIMESTAMP, new Date(da.getTimestamp())); }
/** Create a map containing information from the DocumentAnnotation object */ public static Map<String, Object> createDocumentAnnotationMap(DocumentAnnotation da) { Map<String, Object> map = new HashMap<>(); if (!Strings.isNullOrEmpty(da.getSourceUri())) { map.put("sourceUri", da.getSourceUri()); } map.put("dateAccessed", da.getTimestamp()); if (!Strings.isNullOrEmpty(da.getDocType())) { map.put("docType", da.getDocType()); } if (!Strings.isNullOrEmpty(da.getDocumentClassification())) { map.put("classification", da.getDocumentClassification().toUpperCase()); } if (da.getDocumentCaveats() != null) { String[] caveats = da.getDocumentCaveats().toArray(); if (caveats.length > 0) { map.put("caveats", caveats); } } if (da.getDocumentReleasability() != null) { String[] rels = da.getDocumentReleasability().toArray(); if (rels.length > 0) { map.put("releasability", rels); } } return map; }
document.put("classification", da.getDocumentClassification().toUpperCase()); if (da.getDocumentCaveats() != null) { String[] caveats = da.getDocumentCaveats().toArray(); if (caveats.length > 0) { document.put("caveats", caveats);
private Integer executeDocInsert(JCas jCas) throws SQLException, BaleenException { DocumentAnnotation da = getDocumentAnnotation(jCas); String documentId = ConsumerUtils.getExternalId(da, contentHashAsId); insertDocStatement.clearParameters(); insertDocStatement.setString(1, documentId); insertDocStatement.setString(2, da.getDocType()); insertDocStatement.setString(3, da.getSourceUri()); insertDocStatement.setString(4, jCas.getDocumentText()); insertDocStatement.setString(5, jCas.getDocumentLanguage()); insertDocStatement.setTimestamp(6, new Timestamp(da.getTimestamp())); insertDocStatement.setString(7, da.getDocumentClassification()); insertDocStatement.setArray( 8, createVarcharArray(postgresResource.getConnection(), da.getDocumentCaveats())); insertDocStatement.setArray( 9, createVarcharArray(postgresResource.getConnection(), da.getDocumentReleasability())); insertDocStatement.executeUpdate(); Integer docKey = getKey(insertDocStatement); if (docKey == null) { throw new BaleenException("No document key returned"); } return docKey; }
variables, FIELD_DOCUMENT_CAVEATS, UimaTypesUtils.toList(da.getDocumentCaveats())); setIfListValue( variables,
variables, FIELD_DOCUMENT_CAVEATS, UimaTypesUtils.toList(da.getDocumentCaveats())); setIfListValue( variables,
head, "document.caveats", String.join(",", UimaTypesUtils.toArray(da.getDocumentCaveats()))); appendMeta( head,
@SuppressWarnings("unchecked") private void assertMetadata(JCas jCas, Map<String, Object> variables) { DocumentAnnotation da = UimaSupport.getDocumentAnnotation(jCas); String documentId = ConsumerUtils.getExternalId(da, false); assertEquals(da.getDocType(), variables.get(FIELD_DOCUMENT_TYPE)); assertEquals(da.getSourceUri(), variables.get(FIELD_DOCUMENT_SOURCE)); assertEquals(da.getLanguage(), variables.get(FIELD_DOCUMENT_LANGUAGE)); assertEquals(new Date(da.getTimestamp()), variables.get(FIELD_DOCUMENT_TIMESTAMP)); assertEquals(da.getDocumentClassification(), variables.get(FIELD_DOCUMENT_CLASSIFICATION)); assertEquals( UimaTypesUtils.toList(da.getDocumentCaveats()), variables.get(FIELD_DOCUMENT_CAVEATS)); assertFalse(variables.containsKey(FIELD_DOCUMENT_RELEASABILITY)); Map<String, String> publishedId = ((List<Map<String, String>>) variables.get(FIELD_PUBLISHEDIDS)).get(0); assertEquals("12", publishedId.get(FIELD_PUBLISHEDIDS_ID)); assertEquals("test", publishedId.get(FIELD_PUBLISHEDIDS_TYPE)); Map<String, Collection<Object>> meta = (Map<String, Collection<Object>>) variables.get(FIELD_METADATA); assertTrue(meta.get("test").contains("1")); assertTrue(meta.get("test").contains("2")); assertEquals(2, meta.get("test").size()); assertNull(variables.get(FIELD_CONTENT)); assertEquals(documentId, variables.get("externalId")); }
private void saveDocument(String documentId, JCas jCas) { Document doc = new Document(); DocumentAnnotation da = getDocumentAnnotation(jCas); doc.append(fields.getExternalId(), documentId) .append( FIELD_DOCUMENT, new Document() .append(FIELD_DOCUMENT_TYPE, da.getDocType()) .append(FIELD_DOCUMENT_SOURCE, da.getSourceUri()) .append(FIELD_DOCUMENT_LANGUAGE, da.getLanguage()) .append(FIELD_DOCUMENT_TIMESTAMP, new Date(da.getTimestamp())) .append(FIELD_DOCUMENT_CLASSIFICATION, da.getDocumentClassification()) .append(FIELD_DOCUMENT_CAVEATS, toList(da.getDocumentCaveats())) .append(FIELD_DOCUMENT_RELEASABILITY, toList(da.getDocumentReleasability()))); addPublishedIds(jCas, doc); addMetadata(jCas, doc); if (outputContent) { doc.append(FIELD_CONTENT, jCas.getDocumentText()); } documentsCollection.insertOne(doc); }
addProperty(vDoc, "timestamp", da.getTimestamp()); addProperty(vDoc, "classification", da.getDocumentClassification()); addListProperty(vDoc, "caveats", UimaTypesUtils.toList(da.getDocumentCaveats())); addListProperty(vDoc, "releasablity", UimaTypesUtils.toList(da.getDocumentReleasability()));