private Path createGoodRecordDefinitionWithRegex() throws IOException { TemplateFieldConfiguration fieldDefinitionConfiguration = new TemplateFieldConfiguration("field", "Paragraph:nth-of-type(2)"); fieldDefinitionConfiguration.setRegex("(?<=brown )(.*)(?= jumped)"); return createRecord("test", fieldDefinitionConfiguration); }
private Path createGoodRecordDefinitionWithRegexRequired() throws IOException { TemplateFieldConfiguration fieldDefinitionConfiguration = new TemplateFieldConfiguration("field", "Paragraph:nth-of-type(3)"); fieldDefinitionConfiguration.setRegex("(?<=brown )(.*)(?= jumped)"); fieldDefinitionConfiguration.setRequired(true); return createRecord("test", fieldDefinitionConfiguration); }
private Path createGoodRecordDefinitionWithRegexDefaultNotNeeded() throws IOException { TemplateFieldConfiguration fieldDefinitionConfiguration = new TemplateFieldConfiguration("field", "Paragraph:nth-of-type(4)"); fieldDefinitionConfiguration.setRegex("(?<=ant )(.*)(?= over)"); fieldDefinitionConfiguration.setDefaultValue("crawled"); return createRecord("test", fieldDefinitionConfiguration); }
private TemplateRecordConfiguration createRepeatQuoteRecord2() { TemplateRecordConfiguration record = new TemplateRecordConfiguration(); record.setName("quote2"); record.setOrder(2); record.setPrecedingPath("Document > Link"); record.setFollowingPath("Document > Link:nth-of-type(2)"); record.setKind(Kind.NAMED); TemplateFieldConfiguration field = new TemplateFieldConfiguration("quote", "Document > Quotation:nth-of-type(2)"); field.setRepeat(true); record.setFieldPaths(ImmutableList.of(field)); return record; }
protected void assertDefaultRecord(List<TemplateRecordConfiguration> definitions) { TemplateRecordConfiguration defaultRecord = definitions .stream() .filter(p -> p.getKind().equals(Kind.DEFAULT)) .collect(Collectors.toList()) .get(0); assertEquals(null, defaultRecord.getName()); assertEquals(1, defaultRecord.getFields().size()); TemplateFieldConfiguration field = defaultRecord.getFields().get(0); assertEquals("noRecordField", field.getName()); assertEquals("Paragraph:nth-of-type(1)", field.getPath()); }
private Path createRecordDefinitionWithRequiredField() throws IOException { TemplateFieldConfiguration notRequiredField = new TemplateFieldConfiguration("optional", "Paragraph:nth-of-type(3)"); TemplateFieldConfiguration requiredField = new TemplateFieldConfiguration("required", "Paragraph:nth-of-type(2)"); requiredField.setRequired(true); return createRecord("record", notRequiredField, requiredField); }
private Path createGoodRecordDefinition() throws IOException { return createRecord( "test", new TemplateFieldConfiguration("field", "Paragraph:nth-of-type(2)")); }
private TemplateRecordConfiguration createRepeatQuoteRecord1() { TemplateRecordConfiguration record = new TemplateRecordConfiguration(); record.setName("quote1"); record.setOrder(1); record.setPrecedingPath(""); record.setFollowingPath("Document > Link"); record.setKind(Kind.NAMED); TemplateFieldConfiguration field = new TemplateFieldConfiguration("quote", "Document > Quotation:nth-of-type(1)"); field.setRepeat(true); record.setFieldPaths(ImmutableList.of(field)); return record; }
private Path createGoodRecordDefinitionWithRegexRequiredAndMissing() throws IOException { TemplateFieldConfiguration fieldDefinitionConfiguration = new TemplateFieldConfiguration("field", "Paragraph:nth-of-type(3)"); fieldDefinitionConfiguration.setRegex("(?<=white )(.*)(?= jumped)"); fieldDefinitionConfiguration.setRequired(true); return createRecord("test", fieldDefinitionConfiguration); }
private Path createGoodRecordDefinitionWithRegexDefaultNeeded() throws IOException { TemplateFieldConfiguration fieldDefinitionConfiguration = new TemplateFieldConfiguration("field", "Paragraph:nth-of-type(4)"); fieldDefinitionConfiguration.setRegex("(?<=white )(.*)(?= jumped)"); fieldDefinitionConfiguration.setDefaultValue("horse"); return createRecord("test", fieldDefinitionConfiguration); }
private Path createBadRecordDefinition() throws IOException { return createRecord("test", new TemplateFieldConfiguration("field", "Table")); }
private TemplateRecordConfiguration createMissingRepeatQuoteRecord() { TemplateRecordConfiguration record = new TemplateRecordConfiguration(); record.setName("missing"); record.setOrder(3); record.setPrecedingPath("Document > Link:nth-of-type(2)"); record.setFollowingPath("Document > Link:nth-of-type(3)"); record.setKind(Kind.NAMED); TemplateFieldConfiguration field = new TemplateFieldConfiguration("quote", "Document > Quotation:nth-of-type(3)"); field.setRepeat(true); record.setFieldPaths(ImmutableList.of(field)); return record; }
private TemplateRecordConfiguration createSingleQuoteRecord() { TemplateRecordConfiguration record = new TemplateRecordConfiguration(); record.setName("single"); record.setOrder(4); record.setPrecedingPath("Document > Link:nth-of-type(3)"); record.setFollowingPath("Document > Table"); record.setRepeat(false); record.setKind(Kind.NAMED); List<TemplateFieldConfiguration> fields = ImmutableList.of( new TemplateFieldConfiguration("quote", "Document > Quotation:nth-of-type(4)")); record.setFieldPaths(fields); return record; }
private TemplateRecordConfiguration createSingleQuoteRecord() { TemplateRecordConfiguration record = new TemplateRecordConfiguration(); record.setName("single"); record.setOrder(4); record.setPrecedingPath("Document > Link:nth-of-type(3)"); record.setFollowingPath("Document > Table"); record.setRepeat(false); record.setKind(Kind.NAMED); List<TemplateFieldConfiguration> fields = ImmutableList.of( new TemplateFieldConfiguration("quote", "Document > Quotation:nth-of-type(4)")); record.setFieldPaths(fields); return record; }
private TemplateRecordConfiguration createRepeatQuoteRecord2() { TemplateRecordConfiguration record = new TemplateRecordConfiguration(); record.setName("quote2"); record.setOrder(2); record.setPrecedingPath("Document > Link"); record.setFollowingPath("Document > Link:nth-of-type(2)"); record.setCoveredPaths(ImmutableList.of("Document > Quotation")); record.setMinimalRepeat("Document > Quotation"); record.setRepeat(true); record.setKind(Kind.NAMED); List<TemplateFieldConfiguration> fields = ImmutableList.of( new TemplateFieldConfiguration("quote", "Document > Quotation:nth-of-type(2)")); record.setFieldPaths(fields); return record; }
private TemplateRecordConfiguration createRepeatQuoteRecord1() { TemplateRecordConfiguration record = new TemplateRecordConfiguration(); record.setName("quote1"); record.setOrder(1); record.setPrecedingPath(""); record.setFollowingPath("Document > Link"); record.setCoveredPaths(ImmutableList.of("Document > Quotation")); record.setMinimalRepeat("Document > Quotation"); record.setRepeat(true); record.setKind(Kind.NAMED); List<TemplateFieldConfiguration> fields = ImmutableList.of( new TemplateFieldConfiguration("quote", "Document > Quotation:nth-of-type(1)")); record.setFieldPaths(fields); return record; }
private TemplateRecordConfiguration createMissingRepeatQuoteRecord() { TemplateRecordConfiguration record = new TemplateRecordConfiguration(); record.setName("missing"); record.setOrder(3); record.setPrecedingPath("Document > Link:nth-of-type(2)"); record.setFollowingPath("Document > Link:nth-of-type(3)"); record.setCoveredPaths(ImmutableList.of("Document > Quotation")); record.setMinimalRepeat("Document > Quotation"); record.setRepeat(true); record.setKind(Kind.NAMED); List<TemplateFieldConfiguration> fields = ImmutableList.of( new TemplateFieldConfiguration("quote", "Document > Quotation:nth-of-type(3)")); record.setFieldPaths(fields); return record; }
private TemplateRecordConfiguration createSectionRecord() { TemplateRecordConfiguration record = new TemplateRecordConfiguration(); record.setName("section"); record.setOrder(6); record.setPrecedingPath("Document > Table > TableBody > TableRow > TableCell:nth-of-type(2)"); record.setFollowingPath(""); record.setCoveredPaths( ImmutableList.of("Document > Section > Heading", "Document > Section > Paragraph")); record.setMinimalRepeat(""); record.setRepeat(true); record.setKind(Kind.NAMED); List<TemplateFieldConfiguration> fields = ImmutableList.of( new TemplateFieldConfiguration("heading", "Document > Section > Heading"), new TemplateFieldConfiguration("para", "Document > Section > Paragraph")); record.setFieldPaths(fields); return record; }
private TemplateRecordConfiguration createRowRecord() { TemplateRecordConfiguration record = new TemplateRecordConfiguration(); record.setName("row"); record.setOrder(5); record.setPrecedingPath("Document > Quotation:nth-of-type(4)"); record.setFollowingPath("Document > Section"); record.setCoveredPaths(ImmutableList.of("Document > Table")); record.setMinimalRepeat("Document > Table > TableBody > TableRow"); record.setRepeat(true); record.setKind(Kind.NAMED); List<TemplateFieldConfiguration> fields = ImmutableList.of( new TemplateFieldConfiguration( "cell1", "Document > Table > TableBody > TableRow > TableCell:nth-of-type(1)"), new TemplateFieldConfiguration( "cell2", "Document > Table > TableBody > TableRow > TableCell:nth-of-type(2)")); record.setFieldPaths(fields); return record; }
private TemplateRecordConfiguration createRowRecord() { TemplateRecordConfiguration record = new TemplateRecordConfiguration(); record.setName("row"); record.setOrder(5); record.setPrecedingPath("Document > Quotation:nth-of-type(4)"); record.setFollowingPath("Document > Section"); record.setCoveredPaths(ImmutableList.of("Document > Table")); record.setMinimalRepeat("Document > Table > TableBody > TableRow"); record.setRepeat(true); record.setKind(Kind.NAMED); TemplateFieldConfiguration cell1 = new TemplateFieldConfiguration( "cell1", "Document > Table > TableBody > TableRow > TableCell:nth-of-type(1) > Paragraph"); cell1.setRepeat(true); TemplateFieldConfiguration cell2 = new TemplateFieldConfiguration( "cell2", "Document > Table > TableBody > TableRow > TableCell:nth-of-type(2)> Paragraph"); cell2.setRepeat(true); record.setFieldPaths(ImmutableList.of(cell1, cell2)); return record; }