/** * Make field from definition and path * * @param templateFieldDefinition the field definition * @param fields the fields * @return the configuration */ private TemplateFieldConfiguration makeField( TemplateFieldDefinition templateFieldDefinition, String fieldPath) { TemplateFieldConfiguration field = new TemplateFieldConfiguration(templateFieldDefinition.getName(), fieldPath); field.setRequired(templateFieldDefinition.getRequired()); field.setRepeat(templateFieldDefinition.getRepeat()); field.setRegex(templateFieldDefinition.getRegex()); return field; }
/** * @generated * @param jcas JCas to which this Feature Structure belongs * @param begin offset to the begin spot in the SofA * @param end offset to the end spot in the SofA */ public TemplateFieldDefinition(JCas jcas, int begin, int end) { super(jcas); setBegin(begin); setEnd(end); readObject(); }
@Test public void annotateFieldAllAttributesLenient() throws AnalysisEngineProcessException, ResourceInitializationException { jCas.setDocumentText(FIELD_REGEX_DEFAULT_REQUIRED_TEXT_LENIENT); processJCas(); TemplateFieldDefinition field = JCasUtil.selectByIndex(jCas, TemplateFieldDefinition.class, 0); assertEquals(0, field.getBegin()); assertEquals(FIELD_REGEX_DEFAULT_REQUIRED_TEXT_LENIENT.length(), field.getEnd()); assertEquals("all", field.getName()); assertEquals(FIELD_REGEX_DEFAULT_REQUIRED_TEXT_LENIENT, field.getCoveredText()); assertTrue(field.getRequired()); assertTrue(field.getRepeat()); assertEquals("\\d?:\\s\\d?", field.getRegex()); assertEquals("not found", field.getDefaultValue()); } }
@Test(expected = AnalysisEngineProcessException.class) public void testErrorWhenRecordNameNotUnique() throws AnalysisEngineProcessException, ResourceInitializationException, JsonParseException, JsonMappingException, IOException { TemplateRecordDefinition record1 = new TemplateRecordDefinition(jCas); record1.setBegin(123); record1.setEnd(140); record1.setName("record1"); record1.addToIndexes(); TemplateFieldDefinition field1 = new TemplateFieldDefinition(jCas); field1.setBegin(72); field1.setEnd(75); field1.setName("myField1"); field1.addToIndexes(); processJCas( TemplateRecordConfigurationCreatingConsumer.PARAM_OUTPUT_DIRECTORY, tempDirectory.toString()); }
@Override protected void doProcess(final JCas jCas) throws AnalysisEngineProcessException { CoveringStructureHierarchy structureHierarchy = CoveringStructureHierarchy.build(jCas, structuralClasses); Collection<TemplateRecordDefinition> recordDefinitions = JCasUtil.select(jCas, TemplateRecordDefinition.class); Collection<TemplateFieldDefinition> fieldDefinitions = new HashSet<>(JCasUtil.select(jCas, TemplateFieldDefinition.class)); Map<String, TemplateRecordConfiguration> recordConfigurations = new HashMap<>(); for (TemplateRecordDefinition recordDefinition : recordDefinitions) { processRecordDefinition( jCas, structureHierarchy, fieldDefinitions, recordConfigurations, recordDefinition); } List<TemplateRecordConfiguration> configurations = new ArrayList<>(recordConfigurations.values()); if (!fieldDefinitions.isEmpty()) { for (TemplateFieldDefinition field : fieldDefinitions) { configurations.add( new TemplateRecordConfiguration( makeFields(structureHierarchy, ImmutableList.of(field)), field.getBegin())); } } String documentSourceName = SourceUtils.getDocumentSourceBaseName(jCas); try (Writer w = createOutputWriter(documentSourceName)) { Collections.sort(configurations, Comparator.comparing(TemplateRecordConfiguration::getOrder)); objectMapper.writeValue(w, configurations); } catch (IOException e) { throw new AnalysisEngineProcessException(e); } }
@Test public void annotateField() throws AnalysisEngineProcessException, ResourceInitializationException { jCas.setDocumentText(FIELD_TEXT); processJCas(); TemplateFieldDefinition field = JCasUtil.selectByIndex(jCas, TemplateFieldDefinition.class, 0); assertEquals(10, field.getBegin()); assertEquals(34, field.getEnd()); assertEquals("PersonFullName", field.getName()); assertEquals("<<field:PersonFullName>>", field.getCoveredText()); assertNull(field.getDefaultValue()); assertFalse(field.getRequired()); }
paragraph4.addToIndexes(); TemplateFieldDefinition field1 = new TemplateFieldDefinition(jCas); field1.setBegin(72); field1.setEnd(75); field1.setName("field1"); field1.addToIndexes(); TemplateFieldDefinition field2 = new TemplateFieldDefinition(jCas); field2.setBegin(123); field2.setEnd(140); field2.setName("field2"); field2.addToIndexes(); TemplateFieldDefinition field3 = new TemplateFieldDefinition(jCas); field3.setBegin(17); field3.setEnd(20); field3.setName("noRecordField"); field3.addToIndexes();
configurations.add( new TemplateRecordConfiguration( makeFields(structureHierarchy, ImmutableList.of(field)), field.getBegin()));
@Test public void annotateFieldAllAttributes() throws AnalysisEngineProcessException, ResourceInitializationException { jCas.setDocumentText(FIELD_REGEX_DEFAULT_REQUIRED_TEXT); processJCas(); TemplateFieldDefinition field = JCasUtil.selectByIndex(jCas, TemplateFieldDefinition.class, 0); assertEquals(0, field.getBegin()); assertEquals(FIELD_REGEX_DEFAULT_REQUIRED_TEXT.length(), field.getEnd()); assertEquals("all", field.getName()); assertEquals(FIELD_REGEX_DEFAULT_REQUIRED_TEXT, field.getCoveredText()); assertTrue(field.getRequired()); assertTrue(field.getRepeat()); assertEquals("\\d?:\\s\\d?", field.getRegex()); assertEquals("not found", field.getDefaultValue()); }
/** * @generated * @param jcas JCas to which this Feature Structure belongs * @param begin offset to the begin spot in the SofA * @param end offset to the end spot in the SofA */ public TemplateFieldDefinition(JCas jcas, int begin, int end) { super(jcas); setBegin(begin); setEnd(end); readObject(); }
@Test public void annotateFieldRepeat() throws AnalysisEngineProcessException, ResourceInitializationException { jCas.setDocumentText(FIELD_REPEAT_TEXT); processJCas(); TemplateFieldDefinition field = JCasUtil.selectByIndex(jCas, TemplateFieldDefinition.class, 0); assertEquals(0, field.getBegin()); assertEquals(FIELD_REPEAT_TEXT.length(), field.getEnd()); assertEquals("required", field.getName()); assertEquals(FIELD_REPEAT_TEXT, field.getCoveredText()); assertFalse(field.getRequired()); assertTrue(field.getRepeat()); assertNull(field.getDefaultValue()); }
@Test public void annotateFieldWithDefaultValue() throws AnalysisEngineProcessException, ResourceInitializationException { jCas.setDocumentText(FIELD_DEFAULT_TEXT); processJCas(); TemplateFieldDefinition field = JCasUtil.selectByIndex(jCas, TemplateFieldDefinition.class, 0); assertEquals(0, field.getBegin()); assertEquals(FIELD_DEFAULT_TEXT.length(), field.getEnd()); assertEquals("ten", field.getName()); assertEquals(FIELD_DEFAULT_TEXT, field.getCoveredText()); assertEquals("10", field.getDefaultValue()); assertFalse(field.getRequired()); assertFalse(field.getRepeat()); }
@Test public void annotateFieldRequired() throws AnalysisEngineProcessException, ResourceInitializationException { jCas.setDocumentText(FIELD_REQUIRED_TEXT); processJCas(); TemplateFieldDefinition field = JCasUtil.selectByIndex(jCas, TemplateFieldDefinition.class, 0); assertEquals(0, field.getBegin()); assertEquals(FIELD_REQUIRED_TEXT.length(), field.getEnd()); assertEquals("required", field.getName()); assertEquals(FIELD_REQUIRED_TEXT, field.getCoveredText()); assertTrue(field.getRequired()); assertFalse(field.getRepeat()); assertNull(field.getDefaultValue()); }
@Test public void annotateFieldWithHtmlRegex() throws AnalysisEngineProcessException, ResourceInitializationException { jCas.setDocumentText(FIELD_HTML_REGEX); processJCas(); TemplateFieldDefinition field = JCasUtil.selectByIndex(jCas, TemplateFieldDefinition.class, 0); assertEquals(6, field.getBegin()); assertEquals(90, field.getEnd()); assertEquals( "<<field:html regex=\"/^<([a-z]+)([^<]+)*(?:>(.*)<\\/\\1>|\\s+\\/>)$/\">>", field.getCoveredText()); assertEquals("html", field.getName()); assertEquals("/^<([a-z]+)([^<]+)*(?:>(.*)<\\/\\1>|\\s+\\/>)$/", field.getRegex()); assertNull(field.getDefaultValue()); assertFalse(field.getRepeat()); }
@Test public void annotate2Fields() throws AnalysisEngineProcessException, ResourceInitializationException { jCas.setDocumentText(FIELD2_TEXT); processJCas(); TemplateFieldDefinition field1 = JCasUtil.selectByIndex(jCas, TemplateFieldDefinition.class, 0); assertEquals(10, field1.getBegin()); assertEquals(34, field1.getEnd()); assertEquals("PersonFullName", field1.getName()); assertEquals("<<field:PersonFullName>>", field1.getCoveredText()); assertNull(field1.getDefaultValue()); assertFalse(field1.getRequired()); TemplateFieldDefinition field2 = JCasUtil.selectByIndex(jCas, TemplateFieldDefinition.class, 1); assertEquals(53, field2.getBegin()); assertEquals(74, field2.getEnd()); assertEquals("Description", field2.getName()); assertEquals("<<field:Description>>", field2.getCoveredText()); assertNull(field2.getDefaultValue()); assertFalse(field2.getRequired()); }
@Test public void annotateFieldWithRegex() throws AnalysisEngineProcessException, ResourceInitializationException { jCas.setDocumentText(FIELD_REGEX_TEXT); processJCas(); TemplateFieldDefinition field = JCasUtil.selectByIndex(jCas, TemplateFieldDefinition.class, 0); assertEquals(17, field.getBegin()); assertEquals(82, field.getEnd()); assertEquals( "<<field:email regex=\"\\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,}\\b\">>", field.getCoveredText()); assertEquals("email", field.getName()); assertEquals("\\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,}\\b", field.getRegex()); assertNull(field.getDefaultValue()); }
@Test public void annotateFieldNeighbours() throws AnalysisEngineProcessException, ResourceInitializationException { jCas.setDocumentText(FIELD_NEIGHBOURS); processJCas(); TemplateFieldDefinition field1 = JCasUtil.selectByIndex(jCas, TemplateFieldDefinition.class, 0); assertEquals(0, field1.getBegin()); assertEquals(13, field1.getEnd()); assertEquals("one", field1.getName()); assertEquals("<<field:one>>", field1.getCoveredText()); assertNull(field1.getDefaultValue()); assertFalse(field1.getRequired()); TemplateFieldDefinition field2 = JCasUtil.selectByIndex(jCas, TemplateFieldDefinition.class, 1); assertEquals(13, field2.getBegin()); assertEquals(26, field2.getEnd()); assertEquals("two", field2.getName()); assertEquals("<<field:two>>", field2.getCoveredText()); assertNull(field2.getDefaultValue()); assertFalse(field2.getRequired()); }