private static void addRuntimeHiveRegistrationProperties(State state) { // Use seconds instead of milliseconds to be consistent with other times stored in hive state.appendToListProp(HiveRegProps.HIVE_TABLE_PARTITION_PROPS, String.format("%s:%d", DATA_PUBLISH_TIME, TimeUnit.SECONDS.convert(System.currentTimeMillis(), TimeUnit.MILLISECONDS))); } }
/** * Add a {@link Tag} to a {@link org.apache.gobblin.configuration.State} with key {@link #METRICS_STATE_CUSTOM_TAGS}. * * <p> * {@link org.apache.gobblin.metrics.Tag}s under this key can later be parsed using the method {@link #getCustomTagsFromState}. * </p> * * @param state {@link org.apache.gobblin.configuration.State} state to add the tag to. * @param tag {@link Tag} to add. */ public static void addCustomTagToState(State state, Tag<?> tag) { state.appendToListProp(METRICS_STATE_CUSTOM_TAGS, tag.toString()); }
/** * Add a {@link List} of {@link Tag}s to a {@link org.apache.gobblin.configuration.State} with key {@link #METRICS_STATE_CUSTOM_TAGS}. * * <p> * {@link org.apache.gobblin.metrics.Tag}s under this key can later be parsed using the method {@link #getCustomTagsFromState}. * </p> * * @param state {@link org.apache.gobblin.configuration.State} state to add the tag to. * @param tags list of {@link Tag}s to add. */ public static void addCustomTagToState(State state, List<? extends Tag<?>> tags) { for (Tag<?> tag : tags) { state.appendToListProp(METRICS_STATE_CUSTOM_TAGS, tag.toString()); } }
@Test(expectedExceptions = IllegalStateException.class) public void testTableRegexpWithoutGroupShouldFail() throws IOException { State state = new State(); String regexp = ".*test_bucket/.*/staging/.*"; Optional<Pattern> pattern = Optional.of(Pattern.compile(regexp)); Path path = new Path("s3://test_bucket/topic/staging/2017-10-21/"); state.appendToListProp(HiveRegistrationPolicyBase.HIVE_DATABASE_REGEX, regexp); HiveRegistrationPolicyBase registrationPolicyBase = new HiveRegistrationPolicyBase(state); String resultTable = registrationPolicyBase.getDatabaseOrTableName(path, HiveRegistrationPolicyBase.HIVE_DATABASE_NAME, HiveRegistrationPolicyBase.HIVE_DATABASE_REGEX, pattern ); Assert.assertEquals(resultTable, "topic"); }
@Test(expectedExceptions = IllegalStateException.class) public void testTableRegexpWithoutMatchShouldFail() throws IOException { State state = new State(); String regexp = "^hdfs://(.*)"; Optional<Pattern> pattern = Optional.of(Pattern.compile(regexp)); Path path = new Path("s3://test_bucket/topic/staging/2017-10-21/"); state.appendToListProp(HiveRegistrationPolicyBase.HIVE_DATABASE_REGEX, regexp); HiveRegistrationPolicyBase registrationPolicyBase = new HiveRegistrationPolicyBase(state); String resultTable = registrationPolicyBase.getDatabaseOrTableName(path, HiveRegistrationPolicyBase.HIVE_DATABASE_NAME, HiveRegistrationPolicyBase.HIVE_DATABASE_REGEX, pattern ); Assert.assertEquals(resultTable, "topic"); }
@Test public void testTableRegexp() throws IOException { State state = new State(); String regexp = ".*test_bucket/(.*)/staging/.*"; Optional<Pattern> pattern = Optional.of(Pattern.compile(regexp)); Path path = new Path("s3://test_bucket/topic/staging/2017-10-21/"); state.appendToListProp(HiveRegistrationPolicyBase.HIVE_DATABASE_REGEX, regexp); HiveRegistrationPolicyBase registrationPolicyBase = new HiveRegistrationPolicyBase(state); String resultTable = registrationPolicyBase.getDatabaseOrTableName(path, HiveRegistrationPolicyBase.HIVE_DATABASE_NAME, HiveRegistrationPolicyBase.HIVE_DATABASE_REGEX, pattern ); Assert.assertEquals(resultTable, "topic"); }
LIST_SPLITTER_COMMA.splitToList(this.hiveRegister.getProps().getUpstreamDataAttrName().get())){ if (state.contains(attrName)) { taskSpecificState.appendToListProp(HiveMetaStoreUtils.RUNTIME_PROPS, attrName + ":" + state.getProp(attrName));
@Test public void testGetHiveSpecs() throws IOException { State state = new State(); state.appendToListProp(HiveRegistrationPolicyBase.HIVE_DATABASE_NAME, "db1"); state.appendToListProp(HiveRegistrationPolicyBase.ADDITIONAL_HIVE_DATABASE_NAMES, "db2"); state.appendToListProp(HiveRegistrationPolicyBase.HIVE_TABLE_NAME, "tbl1"); state.appendToListProp(HiveRegistrationPolicyBase.ADDITIONAL_HIVE_TABLE_NAMES, "tbl2,tbl3"); this.path = new Path(getClass().getResource("/test-hive-table").toString()); Collection<HiveSpec> specs = new HiveRegistrationPolicyBase(state).getHiveSpecs(this.path); Assert.assertEquals(specs.size(), 6); Iterator<HiveSpec> iterator = specs.iterator(); HiveSpec spec = iterator.next(); examine(spec, "db1", "tbl1"); spec = iterator.next(); examine(spec, "db1", "tbl2"); spec = iterator.next(); examine(spec, "db1", "tbl3"); spec = iterator.next(); examine(spec, "db2", "tbl1"); spec = iterator.next(); examine(spec, "db2", "tbl2"); spec = iterator.next(); examine(spec, "db2", "tbl3"); }
@Test public void testGetHiveSpecsWithDBFilter() throws IOException { State state = new State(); state.appendToListProp(HiveRegistrationPolicyBase.HIVE_DATABASE_NAME, "db1"); state.appendToListProp(HiveRegistrationPolicyBase.ADDITIONAL_HIVE_DATABASE_NAMES, "db2"); state.appendToListProp(HiveRegistrationPolicyBase.HIVE_TABLE_NAME, "tbl1"); state.appendToListProp(HiveRegistrationPolicyBase.ADDITIONAL_HIVE_TABLE_NAMES, "tbl2,tbl3,$PRIMARY_TABLE_col"); state.appendToListProp("db2." + HiveRegistrationPolicyBase.HIVE_TABLE_NAME, "$PRIMARY_TABLE_col,tbl4,tbl5"); this.path = new Path(getClass().getResource("/test-hive-table").toString()); Collection<HiveSpec> specs = new HiveRegistrationPolicyBase(state).getHiveSpecs(this.path); Assert.assertEquals(specs.size(), 7); Iterator<HiveSpec> iterator = specs.iterator(); HiveSpec spec = iterator.next(); examine(spec, "db1", "tbl1"); spec = iterator.next(); examine(spec, "db1", "tbl2"); spec = iterator.next(); examine(spec, "db1", "tbl3"); spec = iterator.next(); examine(spec, "db1", "tbl1_col"); spec = iterator.next(); examine(spec, "db2", "tbl1_col"); spec = iterator.next(); examine(spec, "db2", "tbl4"); spec = iterator.next(); examine(spec, "db2", "tbl5"); }
private static State getStorageProps(StorageDescriptor sd) { State storageProps = new State(); for (Map.Entry<String, String> entry : sd.getParameters().entrySet()) { storageProps.setProp(entry.getKey(), entry.getValue()); } if (sd.isSetLocation()) { storageProps.setProp(HiveConstants.LOCATION, sd.getLocation()); } if (sd.isSetInputFormat()) { storageProps.setProp(HiveConstants.INPUT_FORMAT, sd.getInputFormat()); } if (sd.isSetOutputFormat()) { storageProps.setProp(HiveConstants.OUTPUT_FORMAT, sd.getOutputFormat()); } if (sd.isSetCompressed()) { storageProps.setProp(HiveConstants.COMPRESSED, sd.isCompressed()); } if (sd.isSetNumBuckets()) { storageProps.setProp(HiveConstants.NUM_BUCKETS, sd.getNumBuckets()); } if (sd.isSetBucketCols()) { for (String bucketColumn : sd.getBucketCols()) { storageProps.appendToListProp(HiveConstants.BUCKET_COLUMNS, bucketColumn); } } if (sd.isSetStoredAsSubDirectories()) { storageProps.setProp(HiveConstants.STORED_AS_SUB_DIRS, sd.isStoredAsSubDirectories()); } return storageProps; }
private static void addRuntimeHiveRegistrationProperties(State state) { // Use seconds instead of milliseconds to be consistent with other times stored in hive state.appendToListProp(HiveRegProps.HIVE_TABLE_PARTITION_PROPS, String.format("%s:%d", DATA_PUBLISH_TIME, TimeUnit.SECONDS.convert(System.currentTimeMillis(), TimeUnit.MILLISECONDS))); } }
/** * Add a {@link Tag} to a {@link org.apache.gobblin.configuration.State} with key {@link #METRICS_STATE_CUSTOM_TAGS}. * * <p> * {@link org.apache.gobblin.metrics.Tag}s under this key can later be parsed using the method {@link #getCustomTagsFromState}. * </p> * * @param state {@link org.apache.gobblin.configuration.State} state to add the tag to. * @param tag {@link Tag} to add. */ public static void addCustomTagToState(State state, Tag<?> tag) { state.appendToListProp(METRICS_STATE_CUSTOM_TAGS, tag.toString()); }
/** * Add a {@link List} of {@link Tag}s to a {@link org.apache.gobblin.configuration.State} with key {@link #METRICS_STATE_CUSTOM_TAGS}. * * <p> * {@link org.apache.gobblin.metrics.Tag}s under this key can later be parsed using the method {@link #getCustomTagsFromState}. * </p> * * @param state {@link org.apache.gobblin.configuration.State} state to add the tag to. * @param tags list of {@link Tag}s to add. */ public static void addCustomTagToState(State state, List<? extends Tag<?>> tags) { for (Tag<?> tag : tags) { state.appendToListProp(METRICS_STATE_CUSTOM_TAGS, tag.toString()); } }
LIST_SPLITTER_COMMA.splitToList(this.hiveRegister.getProps().getUpstreamDataAttrName().get())){ if (state.contains(attrName)) { taskSpecificState.appendToListProp(HiveMetaStoreUtils.RUNTIME_PROPS, attrName + ":" + state.getProp(attrName));
private static State getStorageProps(StorageDescriptor sd) { State storageProps = new State(); for (Map.Entry<String, String> entry : sd.getParameters().entrySet()) { storageProps.setProp(entry.getKey(), entry.getValue()); } if (sd.isSetLocation()) { storageProps.setProp(HiveConstants.LOCATION, sd.getLocation()); } if (sd.isSetInputFormat()) { storageProps.setProp(HiveConstants.INPUT_FORMAT, sd.getInputFormat()); } if (sd.isSetOutputFormat()) { storageProps.setProp(HiveConstants.OUTPUT_FORMAT, sd.getOutputFormat()); } if (sd.isSetCompressed()) { storageProps.setProp(HiveConstants.COMPRESSED, sd.isCompressed()); } if (sd.isSetNumBuckets()) { storageProps.setProp(HiveConstants.NUM_BUCKETS, sd.getNumBuckets()); } if (sd.isSetBucketCols()) { for (String bucketColumn : sd.getBucketCols()) { storageProps.appendToListProp(HiveConstants.BUCKET_COLUMNS, bucketColumn); } } if (sd.isSetStoredAsSubDirectories()) { storageProps.setProp(HiveConstants.STORED_AS_SUB_DIRS, sd.isStoredAsSubDirectories()); } return storageProps; }