public SourceState(State properties, Map<String, ? extends SourceState> previousDatasetStatesByUrns, Iterable<WorkUnitState> previousWorkUnitStates) { super(properties, previousDatasetStatesByUrns, adaptWorkUnitStates(previousWorkUnitStates)); }
private JsonObject parseJsonObject(JsonObject json, JsonArray record) throws DataConversionException { return converter.convertRecord(record, json.toString(), new WorkUnitState()).iterator().next(); }
@BeforeClass public static void setUp() throws SchemaConversionException { converter = new JsonStringToJsonIntermediateConverter(); WorkUnitState workUnit = new WorkUnitState(); workUnit.getPropAsBoolean("gobblin.converter.jsonStringToJsonIntermediate.unpackComplexSchemas", true); converter.convertSchema("[]", workUnit); Type jsonType = new TypeToken<JsonObject>() { }.getType(); Gson gson = new Gson(); testJsonData = gson.fromJson(new InputStreamReader(JsonStringToJsonIntermediateConverterTest.class .getResourceAsStream("/converter/JsonStringToJsonIntermediateConverter.json")), jsonType); }
@Test(expectedExceptions = DataConversionException.class) public void convertOutputWithNonNullableFieldsShouldThrowDataConversionException() throws Exception { JsonParser parser = new JsonParser(); String inputRecord = "10.121.123.104 - - [01/Nov/2012:21:01:17 +0100] \"GET /cpc/auth.do?loginsetup=true&targetPage=%2Fcpc%2F HTTP/1.1\" 302 466"; JsonElement jsonElement = parser.parse( new InputStreamReader(getClass().getResourceAsStream("/converter/grok/schemaWithNonNullableFields.json"))); JsonArray outputSchema = jsonElement.getAsJsonArray(); GrokToJsonConverter grokToJsonConverter = new GrokToJsonConverter(); WorkUnitState workUnitState = new WorkUnitState(); workUnitState.setProp(GrokToJsonConverter.GROK_PATTERN, "^%{IPORHOST:clientip} (?:-|%{USER:ident}) (?:-|%{USER:auth}) \\[%{HTTPDATE:timestamp}\\] \\\"(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|-)\\\" %{NUMBER:response} (?:-|%{NUMBER:bytes})"); grokToJsonConverter.init(workUnitState); JsonObject actual = grokToJsonConverter.convertRecord(outputSchema, inputRecord, workUnitState).iterator().next(); JsonObject expected = parser.parse(new InputStreamReader(getClass().getResourceAsStream("/converter/grok/convertedRecord.json"))) .getAsJsonObject(); grokToJsonConverter.close(); }
/** * Get final state for this object. By default this returns an empty {@link gobblin.configuration.State}, but * concrete subclasses can add information that will be added to the task state. * @return Empty {@link gobblin.configuration.State}. */ @Override public State getFinalState() { return new State(); }
/** * Creates a unique {@link String} representing this branch. */ private static String getForkMetricsId(State state, int index) { return state.getProp(ConfigurationKeys.FORK_BRANCH_NAME_KEY + "." + index, ConfigurationKeys.DEFAULT_FORK_BRANCH_NAME + index); }
@Override public boolean apply(WorkUnitState workUnitState) { return workUnitState.getWorkingState().equals(WorkingState.SUCCESSFUL); } }
/** * Get the number of {@link Dataset}s to be verified together. This allows multiple {@link Dataset}s * to share the same verification job, e.g., share the same query. */ private int getNumDatasetsVerifiedTogether() { return this.state.getPropAsInt(COMPACTION_COMPLETENESS_VERIFICATION_NUM_DATASETS_VERIFIED_TOGETHER, DEFAULT_COMPACTION_COMPLETENESS_VERIFICATION_NUM_DATASETS_VERIFIED_TOGETHER); }
@Override public LongWatermark apply(WorkUnitState w) { return w.getActualHighWatermark(LongWatermark.class); } }).toList();
/** * Constructor. * * @param type {@link TableType} * @param namespace dot separated namespace path * @param table table name */ public Extract(TableType type, String namespace, String table) { this(new SourceState(), type, namespace, table); }
/** * If creating a {@link WorkUnitState} for use by a task, use {@link #WorkUnitState(WorkUnit, State, SharedResourcesBroker)} * instead. */ public WorkUnitState(WorkUnit workUnit, State jobState) { this(workUnit, jobState, buildTaskBroker(null, jobState, workUnit)); }
@Test public void convertOutputWithNullableFields() throws Exception { JsonParser parser = new JsonParser(); String inputRecord = "10.121.123.104 - - [01/Nov/2012:21:01:17 +0100] \"GET /cpc/auth.do?loginsetup=true&targetPage=%2Fcpc%2F HTTP/1.1\" 302 466"; JsonElement jsonElement = parser .parse(new InputStreamReader(getClass().getResourceAsStream("/converter/grok/schemaWithNullableFields.json"))); JsonArray outputSchema = jsonElement.getAsJsonArray(); GrokToJsonConverter grokToJsonConverter = new GrokToJsonConverter(); WorkUnitState workUnitState = new WorkUnitState(); workUnitState.setProp(GrokToJsonConverter.GROK_PATTERN, "^%{IPORHOST:clientip} (?:-|%{USER:ident}) (?:-|%{USER:auth}) \\[%{HTTPDATE:timestamp}\\] \\\"(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|-)\\\" %{NUMBER:response} (?:-|%{NUMBER:bytes})"); grokToJsonConverter.init(workUnitState); JsonObject actual = grokToJsonConverter.convertRecord(outputSchema, inputRecord, workUnitState).iterator().next(); JsonObject expected = parser.parse(new InputStreamReader(getClass().getResourceAsStream("/converter/grok/convertedRecord.json"))) .getAsJsonObject(); Assert.assertEquals(actual, expected); grokToJsonConverter.close(); }
@Override public State getExecutionMetadata() { return new State(); }
@Override public boolean apply(WorkUnitState input) { return null == input || !WorkingState.SUCCESSFUL.equals(input.getWorkingState()); } };
private int getDataCompletenessVerificationThreadPoolSize() { return this.props.getPropAsInt(COMPACTION_COMPLETENESS_VERIFICATION_THREAD_POOL_SIZE, DEFAULT_COMPACTION_COMPLETENESS_VERIFICATION_THREAD_POOL_SIZE); }
public SourceState(State properties, Iterable<WorkUnitState> prevWorkUnitStates) { super(properties, adaptWorkUnitStates(prevWorkUnitStates)); }
@Test public void convertWithNullStringSet() throws Exception { JsonParser parser = new JsonParser(); String inputRecord = "79a59df900b949e55d96a1e698fbacedfd6e09d98eacf8f8d5218e7cd47ef2be mybucket [06/Feb/2014:00:00:38 +0000] 192.0.2.3 79a59df900b949e55d96a1e698fbacedfd6e09d98eacf8f8d5218e7cd47ef2be 3E57427F3EXAMPLE REST.GET.VERSIONING - \"GET /mybucket?versioning HTTP/1.1\" 200 - 113 - 7 - \"-\" \"S3Console/0.4\" -"; JsonElement jsonElement = parser.parse(new InputStreamReader(getClass().getResourceAsStream("/converter/grok/s3AccessLogSchema.json"))); JsonArray outputSchema = jsonElement.getAsJsonArray(); GrokToJsonConverter grokToJsonConverter = new GrokToJsonConverter(); WorkUnitState workUnitState = new WorkUnitState(); //Grok expression was taken from https://github.com/logstash-plugins/logstash-patterns-core/blob/master/patterns/aws workUnitState.setProp(GrokToJsonConverter.GROK_PATTERN, "%{WORD:owner} %{NOTSPACE:bucket} \\[%{HTTPDATE:timestamp}\\] %{IP:clientip} %{NOTSPACE:requester} %{NOTSPACE:request_id} %{NOTSPACE:operation} %{NOTSPACE:key} (?:\"(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest})\"|-) (?:%{INT:response:int}|-) (?:-|%{NOTSPACE:error_code}) (?:%{INT:bytes:int}|-) (?:%{INT:object_size:int}|-) (?:%{INT:request_time_ms:int}|-) (?:%{INT:turnaround_time_ms:int}|-) (?:%{QS:referrer}|-) (?:\"?%{QS:agent}\"?|-) (?:-|%{NOTSPACE:version_id})"); workUnitState.setProp(GrokToJsonConverter.NULLSTRING_REGEXES, "[\\s-]"); grokToJsonConverter.init(workUnitState); JsonObject actual = grokToJsonConverter.convertRecord(outputSchema, inputRecord, workUnitState).iterator().next(); JsonObject expected = parser .parse(new InputStreamReader(getClass().getResourceAsStream("/converter/grok/convertedS3AccessLogRecord.json"))) .getAsJsonObject(); Assert.assertEquals(actual, expected); grokToJsonConverter.close(); } }