@Test(expectedExceptions = DataConversionException.class) public void convertOutputWithNonNullableFieldsShouldThrowDataConversionException() throws Exception { JsonParser parser = new JsonParser(); String inputRecord = "10.121.123.104 - - [01/Nov/2012:21:01:17 +0100] \"GET /cpc/auth.do?loginsetup=true&targetPage=%2Fcpc%2F HTTP/1.1\" 302 466"; JsonElement jsonElement = parser.parse( new InputStreamReader(getClass().getResourceAsStream("/converter/grok/schemaWithNonNullableFields.json"))); JsonArray outputSchema = jsonElement.getAsJsonArray(); GrokToJsonConverter grokToJsonConverter = new GrokToJsonConverter(); WorkUnitState workUnitState = new WorkUnitState(); workUnitState.setProp(GrokToJsonConverter.GROK_PATTERN, "^%{IPORHOST:clientip} (?:-|%{USER:ident}) (?:-|%{USER:auth}) \\[%{HTTPDATE:timestamp}\\] \\\"(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|-)\\\" %{NUMBER:response} (?:-|%{NUMBER:bytes})"); grokToJsonConverter.init(workUnitState); JsonObject actual = grokToJsonConverter.convertRecord(outputSchema, inputRecord, workUnitState).iterator().next(); JsonObject expected = parser.parse(new InputStreamReader(getClass().getResourceAsStream("/converter/grok/convertedRecord.json"))) .getAsJsonObject(); grokToJsonConverter.close(); }
@BeforeClass public static void setUp() throws SchemaConversionException { converter = new JsonStringToJsonIntermediateConverter(); WorkUnitState workUnit = new WorkUnitState(); workUnit.getPropAsBoolean("gobblin.converter.jsonStringToJsonIntermediate.unpackComplexSchemas", true); converter.convertSchema("[]", workUnit); Type jsonType = new TypeToken<JsonObject>() { }.getType(); Gson gson = new Gson(); testJsonData = gson.fromJson(new InputStreamReader(JsonStringToJsonIntermediateConverterTest.class .getResourceAsStream("/converter/JsonStringToJsonIntermediateConverter.json")), jsonType); }
@Override public String getSchema() { return this.workUnitState.getProp(ConfigurationKeys.SOURCE_SCHEMA); }
public TaskState(WorkUnitState workUnitState) { // Since getWorkunit() returns an immutable WorkUnit object, // the WorkUnit object in this object is also immutable. super(workUnitState.getWorkunit(), workUnitState.getJobState(), workUnitState.getTaskBrokerNullable()); addAll(workUnitState); this.jobId = workUnitState.getProp(ConfigurationKeys.JOB_ID_KEY); this.taskId = workUnitState.getProp(ConfigurationKeys.TASK_ID_KEY); this.taskKey = workUnitState.getProp(ConfigurationKeys.TASK_KEY_KEY, "unknown_task_key"); this.taskAttemptId = Optional.fromNullable(workUnitState.getProp(ConfigurationKeys.TASK_ATTEMPT_ID_KEY)); this.setId(this.taskId); }
/** * Get the actual high {@link Watermark} as a {@link JsonElement}. * * @return a {@link JsonElement} representing the actual high {@link Watermark}, * or {@code null} if the actual high {@link Watermark} is not set. */ public JsonElement getActualHighWatermark() { if (!contains(ConfigurationKeys.WORK_UNIT_STATE_ACTUAL_HIGH_WATER_MARK_KEY)) { return null; } return JSON_PARSER.parse(getProp(ConfigurationKeys.WORK_UNIT_STATE_ACTUAL_HIGH_WATER_MARK_KEY)); }
private String readProp(String key, WorkUnitState workUnitState) { String value = workUnitState.getWorkunit().getProp(key); if (StringUtils.isBlank(value)) { value = workUnitState.getProp(key); } if (StringUtils.isBlank(value)) { value = workUnitState.getJobState().getProp(key); } return value; }
this.requestedTitle = workUnitState.getProp(ConfigurationKeys.DATASET_URN_KEY); long baseRevision = workUnitState.getWorkunit().getLowWatermark(LongWatermark.class, new Gson()).getValue(); if (baseRevision < 0) { try { workUnitState.setActualHighWatermark(new LongWatermark(this.lastRevisionId)); this.currentBatch = new LinkedList<>(); this.lastRevisionId, this.requestedTitle)); this.maxRevisionsPulled = workUnitState.getPropAsInt(MAX_REVISION_PER_PAGE, DEFAULT_MAX_REVISIONS_PER_PAGE);
public SimpleJsonExtractor(WorkUnitState workUnitState) throws FileSystemException { this.workUnitState = workUnitState; // Resolve the file to pull if (workUnitState.getPropAsBoolean(ConfigurationKeys.SOURCE_CONN_USE_AUTHENTICATION, false)) { // Add authentication credential if authentication is needed UserAuthenticator auth = new StaticUserAuthenticator(workUnitState.getProp(ConfigurationKeys.SOURCE_CONN_DOMAIN, ""), workUnitState.getProp(ConfigurationKeys.SOURCE_CONN_USERNAME), PasswordManager.getInstance(workUnitState) .readPassword(workUnitState.getProp(ConfigurationKeys.SOURCE_CONN_PASSWORD))); FileSystemOptions opts = new FileSystemOptions(); DefaultFileSystemConfigBuilder.getInstance().setUserAuthenticator(opts, auth); this.fileObject = VFS.getManager().resolveFile(workUnitState.getProp(SOURCE_FILE_KEY), opts); } else { this.fileObject = VFS.getManager().resolveFile(workUnitState.getProp(SOURCE_FILE_KEY)); } // Open the file for reading LOGGER.info("Opening file " + this.fileObject.getURL().toString()); this.bufferedReader = this.closer.register(new BufferedReader(new InputStreamReader(this.fileObject.getContent().getInputStream(), ConfigurationKeys.DEFAULT_CHARSET_ENCODING))); }
/** * Sets metadata to indicate whether this is the first time this table or partition is being published. * @param wus to set if this is first publish for this table or partition */ public static void setIsFirstPublishMetadata(WorkUnitState wus) { if (!Boolean.valueOf(wus.getPropAsBoolean(IS_WATERMARK_WORKUNIT_KEY))) { LongWatermark previousWatermark = wus.getWorkunit().getLowWatermark(LongWatermark.class); wus.setProp(SlaEventKeys.IS_FIRST_PUBLISH, (null == previousWatermark || previousWatermark.getValue() == 0)); } } }
public HiveMetadataForCompactionExtractor(WorkUnitState state, FileSystem fs) throws IOException, TException, HiveException { super(state); if (Boolean.valueOf(state.getPropAsBoolean(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY))) { log.info("Ignoring Watermark workunit for {}", state.getProp(ConfigurationKeys.DATASET_URN_KEY)); return; } try (AutoReturnableObject<IMetaStoreClient> client = this.pool.getClient()) { Table table = client.get().getTable(this.dbName, this.tableName); String primaryKeyString = table.getParameters().get(state.getProp(COMPACTION_PRIMARY_KEY)); List<String> primaryKeyList = Splitter.on(',').omitEmptyStrings().trimResults().splitToList(primaryKeyString); String deltaString = table.getParameters().get(state.getProp(COMPACTION_DELTA)); List<String> deltaList = Splitter.on(',').omitEmptyStrings().trimResults().splitToList(deltaString); Path dataFilesPath = new Path(table.getSd().getLocation()); compactionEntity = new MRCompactionEntity(primaryKeyList, deltaList, dataFilesPath, state.getProperties()); } }
public HiveBaseExtractor(WorkUnitState state) throws IOException { if (Boolean.valueOf(state.getPropAsBoolean(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY))) { return; } this.hiveWorkUnit = new HiveWorkUnit(state.getWorkunit()); this.hiveDataset = hiveWorkUnit.getHiveDataset(); this.dbName = hiveDataset.getDbAndTable().getDb(); this.tableName = hiveDataset.getDbAndTable().getTable(); this.pool = HiveMetastoreClientPool.get(state.getJobState().getProperties(), Optional.fromNullable(state.getJobState().getProp(HiveDatasetFinder.HIVE_METASTORE_URI_KEY))); }
public ImmutableWorkUnitState(WorkUnitState workUnitState) { super(workUnitState.getWorkunit(), workUnitState.getJobState()); super.addAll(workUnitState.getSpecProperties()); }
/** * Sets the actual high watermark by reading the expected high watermark * {@inheritDoc} * @see gobblin.data.management.conversion.hive.watermarker.HiveSourceWatermarker#setActualHighWatermark(gobblin.configuration.WorkUnitState) */ @Override public void setActualHighWatermark(WorkUnitState wus) { if (Boolean.valueOf(wus.getPropAsBoolean(IS_WATERMARK_WORKUNIT_KEY))) { wus.setActualHighWatermark(wus.getWorkunit().getExpectedHighWatermark(MultiKeyValueLongWatermark.class)); } else { wus.setActualHighWatermark(wus.getWorkunit().getExpectedHighWatermark(LongWatermark.class)); } }
@VisibleForTesting public void preservePartitionParams(Collection<? extends WorkUnitState> states) { for (WorkUnitState wus : states) { if (wus.getWorkingState() != WorkingState.COMMITTED) { continue; } if (!wus.contains(COMPLETE_SOURCE_PARTITION_NAME)) { continue; } if (!wus.contains(COMPLETE_DEST_PARTITION_NAME)) { continue; } if (!(wus.contains(PARTITION_PARAMETERS_WHITELIST) || wus.contains(PARTITION_PARAMETERS_BLACKLIST))) { continue; } List<String> whitelist = COMMA_SPLITTER.splitToList(wus.getProp(PARTITION_PARAMETERS_WHITELIST, StringUtils.EMPTY)); List<String> blacklist = COMMA_SPLITTER.splitToList(wus.getProp(PARTITION_PARAMETERS_BLACKLIST, StringUtils.EMPTY)); String completeSourcePartitionName = wus.getProp(COMPLETE_SOURCE_PARTITION_NAME); String completeDestPartitionName = wus.getProp(COMPLETE_DEST_PARTITION_NAME); if (!copyPartitionParams(completeSourcePartitionName, completeDestPartitionName, whitelist, blacklist)) { log.warn("Unable to copy partition parameters from " + completeSourcePartitionName + " to " + completeDestPartitionName); } } }
/** * Set the high watermark. * * @param value high watermark * @deprecated use {@link #setActualHighWatermark(Watermark)}. */ @Deprecated public void setHighWaterMark(long value) { setProp(ConfigurationKeys.WORK_UNIT_STATE_RUNTIME_HIGH_WATER_MARK, value); }
SubscopedBrokerBuilder<GobblinScopeTypes, ?> taskBrokerBuilder = this.jobBroker.newSubscopedBuilder(new TaskScopeInstance(taskId)); WorkUnitState workUnitState = new WorkUnitState(workUnit, this.jobState, taskBrokerBuilder); workUnitState.setId(taskId); workUnitState.setProp(ConfigurationKeys.JOB_ID_KEY, this.jobId); workUnitState.setProp(ConfigurationKeys.TASK_ID_KEY, taskId); if (this.containerIdOptional.isPresent()) { workUnitState.setProp(ConfigurationKeys.TASK_ATTEMPT_ID_KEY, this.containerIdOptional.get());
.deserialize(datasetWorkUnitStates.iterator().next().getProp(CopySource.SERIALIZED_COPYABLE_DATASET)); Path datasetWriterOutputPath = new Path(this.writerOutputDir, datasetAndPartition.identifier()); if (wus.getWorkingState() == WorkingState.SUCCESSFUL) { wus.setWorkingState(WorkUnitState.WorkingState.COMMITTED); if (wus.getWorkingState() == WorkingState.COMMITTED) { CopyEventSubmitterHelper.submitSuccessfulFilePublish(this.eventSubmitter, copyableFile, wus);
wus.setWorkingState(WorkingState.FAILED); if (!wus.getPropAsBoolean(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY)) { try { new SlaEventSubmitter(eventSubmitter, EventConstants.CONVERSION_FAILED_EVENT, wus.getProperties()).submit(); } catch (Exception e) { log.error("Failed while emitting SLA event, but ignoring and moving forward to curate " + "all clean up comamnds", e); wus.setWorkingState(WorkingState.COMMITTED); this.watermarker.setActualHighWatermark(wus); if (!wus.getPropAsBoolean(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY)) { EventWorkunitUtils.setIsFirstPublishMetadata(wus); try { new SlaEventSubmitter(eventSubmitter, EventConstants.CONVERSION_SUCCESSFUL_SLA_EVENT, wus.getProperties()) .submit(); } catch (Exception e) {
private JsonObject parseJsonObject(JsonObject json, JsonArray record) throws DataConversionException { return converter.convertRecord(record, json.toString(), new WorkUnitState()).iterator().next(); }
this.fs.setPermission(new Path(getConversionConfig().getDestinationDataPath()), sourceDataPermission); if (!workUnit.getPropAsBoolean(HIVE_DATASET_DESTINATION_SKIP_SETGROUP, DEFAULT_HIVE_DATASET_DESTINATION_SKIP_SETGROUP)) { this.fs.setOwner(new Path(getConversionConfig().getDestinationDataPath()), null, workUnit.getWorkunit().getProp(SlaEventKeys.ORIGIN_TS_IN_MILLI_SECS_KEY))); log.debug("Drop partitions if exist in final table: " + dropPartitionsDDL); publishQueries.addAll(dropPartitionsDDL); if (workUnit.getPropAsBoolean(HIVE_CONVERSION_SETSERDETOAVROEXPLICITELY, DEFAULT_HIVE_CONVERSION_SETSERDETOAVROEXPLICITELY)) { List<String> createFinalPartitionDDL =