@Override public void process(InputStream in) throws IOException { Schema as = JsonUtil.inferSchema( in, context.getProperty(RECORD_NAME).evaluateAttributeExpressions(inputFlowFile).getValue(), context.getProperty(NUM_RECORDS_TO_ANALYZE).evaluateAttributeExpressions(inputFlowFile).asInteger()); avroSchema.set(as.toString(context.getProperty(PRETTY_AVRO_OUTPUT).asBoolean())); } });
@Override public Schema apply(JsonNode node) { return inferSchema(node, name); } });
public static Schema fromJSON(String name, InputStream in) throws IOException { return JsonUtil.inferSchema(in, name, 20); }
@Override public Schema apply(JsonNode node) { return JsonUtil.inferSchema(node, "Test"); } }));
@Override public int run() throws IOException { Preconditions.checkArgument(samplePaths != null && !samplePaths.isEmpty(), "Sample JSON path is required"); Preconditions.checkArgument(samplePaths.size() == 1, "Only one JSON sample can be given"); // assume fields are nullable by default, users can easily change this Schema sampleSchema = JsonUtil.inferSchema( open(samplePaths.get(0)), recordName, 10); if (sampleSchema != null) { output(sampleSchema.toString(!minimize), console, outputPath); return 0; } else { console.error("Sample file did not contain any records"); return 1; } }
@Override public void process(InputStream in) throws IOException { Schema as = JsonUtil.inferSchema( in, context.getProperty(RECORD_NAME).evaluateAttributeExpressions(inputFlowFile).getValue(), context.getProperty(NUM_RECORDS_TO_ANALYZE).evaluateAttributeExpressions(inputFlowFile).asInteger()); avroSchema.set(as.toString(context.getProperty(PRETTY_AVRO_OUTPUT).asBoolean())); } });
@Test public void testSchemaInferencePrimitiveArray() throws Exception { Schema recordSchema = SchemaBuilder.record("Test").fields() .requiredString("aString") .name("anArray").type().array().items().intType().noDefault() .endRecord(); String jsonSample = "{" + "\"aString\": \"triangle\"," + "\"anArray\": [ 1, 2, 3, 4 ]" + "}"; JsonNode datum = JsonUtil.parse(jsonSample); Assert.assertEquals("Should produce expected schema", recordSchema, JsonUtil.inferSchema(datum, "Test")); GenericData.Record expected = new GenericData.Record(recordSchema); expected.put("aString", "triangle"); expected.put("anArray", Lists.newArrayList(1, 2, 3, 4)); Assert.assertEquals("Should convert to record", expected, convertGeneric(datum, recordSchema)); }
recordSchema, JsonUtil.inferSchema(datum, "Test"));
@Test public void testSchemaInferenceRecord() throws Exception { Schema recordSchema = SchemaBuilder.record("Test").fields() .requiredString("aString") .name("aRecord").type().record("aRecord").fields() .requiredString("left") .requiredString("right") .endRecord().noDefault() .endRecord(); String jsonSample = "{" + "\"aString\": \"triangle\"," + "\"aRecord\": { \"left\": \"timid\", \"right\": \"dictionary\" }" + "}"; JsonNode datum = JsonUtil.parse(jsonSample); Assert.assertEquals("Should produce expected schema", recordSchema, JsonUtil.inferSchema(datum, "Test")); GenericData.Record aRecord = new GenericData.Record( recordSchema.getField("aRecord").schema()); aRecord.put("left", "timid"); aRecord.put("right", "dictionary"); GenericData.Record expected = new GenericData.Record(recordSchema); expected.put("aString", "triangle"); expected.put("aRecord", aRecord); Assert.assertEquals("Should convert to record", expected, convertGeneric(datum, recordSchema)); }
@Test public void testSchemaInferenceNullablePrimitiveArray() throws Exception { Schema recordSchema = SchemaBuilder.record("Test").fields() .requiredString("aString") .name("anArray").type().array().items() .unionOf().nullType().and().intType().endUnion().noDefault() .endRecord(); String jsonSample = "{" + "\"aString\": \"triangle\"," + "\"anArray\": [ null, 1, 2, 3, 4 ]" + "}"; JsonNode datum = JsonUtil.parse(jsonSample); Assert.assertEquals("Should produce expected schema", recordSchema, JsonUtil.inferSchema(datum, "Test")); GenericData.Record expected = new GenericData.Record(recordSchema); expected.put("aString", "triangle"); expected.put("anArray", Lists.newArrayList(null, 1, 2, 3, 4)); Assert.assertEquals("Should convert to record", expected, convertGeneric(datum, recordSchema)); }
@Test public void testSchemaInferenceMultipleTypes() throws Exception { Schema recordSchema = SchemaBuilder.record("Test").fields() .requiredString("aString") .name("anArray").type().array().items() .unionOf().nullType().and().intType().and().stringType().endUnion().noDefault() .endRecord(); String jsonSample = "{" + "\"aString\": \"triangle\"," + "\"anArray\": [ null, 1, 2, 3, \"winter\" ]" + "}"; JsonNode datum = JsonUtil.parse(jsonSample); Assert.assertEquals("Should produce expected schema", recordSchema, JsonUtil.inferSchema(datum, "Test")); GenericData.Record expected = new GenericData.Record(recordSchema); expected.put("aString", "triangle"); expected.put("anArray", Lists.newArrayList(null, 1, 2, 3, "winter")); Assert.assertEquals("Should convert to record", expected, convertGeneric(datum, recordSchema)); }