private JsonObject parseJsonObject(JsonObject json, JsonArray record) throws DataConversionException { return converter.convertRecord(record, json.toString(), new WorkUnitState()).iterator().next(); }
@BeforeClass public static void setUp() throws SchemaConversionException { converter = new JsonStringToJsonIntermediateConverter(); WorkUnitState workUnit = new WorkUnitState(); workUnit.getPropAsBoolean("gobblin.converter.jsonStringToJsonIntermediate.unpackComplexSchemas", true); converter.convertSchema("[]", workUnit); Type jsonType = new TypeToken<JsonObject>() { }.getType(); Gson gson = new Gson(); testJsonData = gson.fromJson(new InputStreamReader(JsonStringToJsonIntermediateConverterTest.class .getResourceAsStream("/converter/JsonStringToJsonIntermediateConverter.json")), jsonType); }
@Test public void convertOutputWithNullableFields() throws Exception { JsonParser parser = new JsonParser(); String inputRecord = "10.121.123.104 - - [01/Nov/2012:21:01:17 +0100] \"GET /cpc/auth.do?loginsetup=true&targetPage=%2Fcpc%2F HTTP/1.1\" 302 466"; JsonElement jsonElement = parser .parse(new InputStreamReader(getClass().getResourceAsStream("/converter/grok/schemaWithNullableFields.json"))); JsonArray outputSchema = jsonElement.getAsJsonArray(); GrokToJsonConverter grokToJsonConverter = new GrokToJsonConverter(); WorkUnitState workUnitState = new WorkUnitState(); workUnitState.setProp(GrokToJsonConverter.GROK_PATTERN, "^%{IPORHOST:clientip} (?:-|%{USER:ident}) (?:-|%{USER:auth}) \\[%{HTTPDATE:timestamp}\\] \\\"(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|-)\\\" %{NUMBER:response} (?:-|%{NUMBER:bytes})"); grokToJsonConverter.init(workUnitState); JsonObject actual = grokToJsonConverter.convertRecord(outputSchema, inputRecord, workUnitState).iterator().next(); JsonObject expected = parser.parse(new InputStreamReader(getClass().getResourceAsStream("/converter/grok/convertedRecord.json"))) .getAsJsonObject(); Assert.assertEquals(actual, expected); grokToJsonConverter.close(); }
@Test(expectedExceptions = DataConversionException.class) public void convertOutputWithNonNullableFieldsShouldThrowDataConversionException() throws Exception { JsonParser parser = new JsonParser(); String inputRecord = "10.121.123.104 - - [01/Nov/2012:21:01:17 +0100] \"GET /cpc/auth.do?loginsetup=true&targetPage=%2Fcpc%2F HTTP/1.1\" 302 466"; JsonElement jsonElement = parser.parse( new InputStreamReader(getClass().getResourceAsStream("/converter/grok/schemaWithNonNullableFields.json"))); JsonArray outputSchema = jsonElement.getAsJsonArray(); GrokToJsonConverter grokToJsonConverter = new GrokToJsonConverter(); WorkUnitState workUnitState = new WorkUnitState(); workUnitState.setProp(GrokToJsonConverter.GROK_PATTERN, "^%{IPORHOST:clientip} (?:-|%{USER:ident}) (?:-|%{USER:auth}) \\[%{HTTPDATE:timestamp}\\] \\\"(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|-)\\\" %{NUMBER:response} (?:-|%{NUMBER:bytes})"); grokToJsonConverter.init(workUnitState); JsonObject actual = grokToJsonConverter.convertRecord(outputSchema, inputRecord, workUnitState).iterator().next(); JsonObject expected = parser.parse(new InputStreamReader(getClass().getResourceAsStream("/converter/grok/convertedRecord.json"))) .getAsJsonObject(); grokToJsonConverter.close(); }
@Test public void convertWithNullStringSet() throws Exception { JsonParser parser = new JsonParser(); String inputRecord = "79a59df900b949e55d96a1e698fbacedfd6e09d98eacf8f8d5218e7cd47ef2be mybucket [06/Feb/2014:00:00:38 +0000] 192.0.2.3 79a59df900b949e55d96a1e698fbacedfd6e09d98eacf8f8d5218e7cd47ef2be 3E57427F3EXAMPLE REST.GET.VERSIONING - \"GET /mybucket?versioning HTTP/1.1\" 200 - 113 - 7 - \"-\" \"S3Console/0.4\" -"; JsonElement jsonElement = parser.parse(new InputStreamReader(getClass().getResourceAsStream("/converter/grok/s3AccessLogSchema.json"))); JsonArray outputSchema = jsonElement.getAsJsonArray(); GrokToJsonConverter grokToJsonConverter = new GrokToJsonConverter(); WorkUnitState workUnitState = new WorkUnitState(); //Grok expression was taken from https://github.com/logstash-plugins/logstash-patterns-core/blob/master/patterns/aws workUnitState.setProp(GrokToJsonConverter.GROK_PATTERN, "%{WORD:owner} %{NOTSPACE:bucket} \\[%{HTTPDATE:timestamp}\\] %{IP:clientip} %{NOTSPACE:requester} %{NOTSPACE:request_id} %{NOTSPACE:operation} %{NOTSPACE:key} (?:\"(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest})\"|-) (?:%{INT:response:int}|-) (?:-|%{NOTSPACE:error_code}) (?:%{INT:bytes:int}|-) (?:%{INT:object_size:int}|-) (?:%{INT:request_time_ms:int}|-) (?:%{INT:turnaround_time_ms:int}|-) (?:%{QS:referrer}|-) (?:\"?%{QS:agent}\"?|-) (?:-|%{NOTSPACE:version_id})"); workUnitState.setProp(GrokToJsonConverter.NULLSTRING_REGEXES, "[\\s-]"); grokToJsonConverter.init(workUnitState); JsonObject actual = grokToJsonConverter.convertRecord(outputSchema, inputRecord, workUnitState).iterator().next(); JsonObject expected = parser .parse(new InputStreamReader(getClass().getResourceAsStream("/converter/grok/convertedS3AccessLogRecord.json"))) .getAsJsonObject(); Assert.assertEquals(actual, expected); grokToJsonConverter.close(); } }
@Override public void forWorkUnit(WorkUnit workUnit) { jobState.incrementTaskCount(); jobState.addTaskState(new TaskState(new WorkUnitState(workUnit, jobState))); } });
@Override public void readFields(DataInput in) throws IOException { int size = in.readInt(); for (int i = 0; i < size; i++) { WorkUnitState workUnitState = new WorkUnitState(); workUnitState.readFields(in); this.previousWorkUnitStates.add(new ImmutableWorkUnitState(workUnitState)); } super.readFields(in); }
/** * Get task states of {@link Task}s of this job as {@link WorkUnitState}s. * * @return a list of {@link WorkUnitState}s */ public List<WorkUnitState> getTaskStatesAsWorkUnitStates() { ImmutableList.Builder<WorkUnitState> builder = ImmutableList.builder(); for (TaskState taskState : this.taskStates.values()) { WorkUnitState workUnitState = new WorkUnitState(taskState.getWorkunit(), taskState.getJobState()); workUnitState.setId(taskState.getId()); workUnitState.addAll(taskState); builder.add(workUnitState); } return builder.build(); }
workUnitFileCloser.close(); JobLauncherUtils.cleanTaskStagingData(new WorkUnitState(wu), LOG); JobLauncherUtils.cleanTaskStagingData(new WorkUnitState(wu), LOG);
@Override public boolean apply(WorkUnit workUnit) { if (workUnit instanceof MultiWorkUnit) { Preconditions.checkArgument(!workUnit.contains(ConfigurationKeys.WORK_UNIT_SKIP_KEY), "Error: MultiWorkUnit cannot be skipped"); for (WorkUnit wu : ((MultiWorkUnit) workUnit).getWorkUnits()) { Preconditions.checkArgument(!wu.contains(ConfigurationKeys.WORK_UNIT_SKIP_KEY), "Error: MultiWorkUnit cannot contain skipped WorkUnit"); } } if (workUnit.getPropAsBoolean(ConfigurationKeys.WORK_UNIT_SKIP_KEY, false)) { WorkUnitState workUnitState = new WorkUnitState(workUnit, this.jobState); workUnitState.setWorkingState(WorkUnitState.WorkingState.SKIPPED); this.jobState.addSkippedTaskState(new TaskState(workUnitState)); return false; } return true; } }
try { for (WorkUnit workUnit : JobLauncherUtils.flattenWorkUnits(workUnits.getMaterializedWorkUnitCollection())) { JobLauncherUtils.cleanTaskStagingData(new WorkUnitState(workUnit, jobState), LOG, closer, parallelRunners);
SubscopedBrokerBuilder<GobblinScopeTypes, ?> taskBrokerBuilder = this.jobBroker.newSubscopedBuilder(new TaskScopeInstance(taskId)); WorkUnitState workUnitState = new WorkUnitState(workUnit, this.jobState, taskBrokerBuilder); workUnitState.setId(taskId); workUnitState.setProp(ConfigurationKeys.JOB_ID_KEY, this.jobId);
public void forWorkUnit(WorkUnit workUnit) { jobState.incrementTaskCount(); jobState.addTaskState(new TaskState(new WorkUnitState(workUnit, jobState)));