private FileSet<CopyEntity> fileSetForPartition(final Partition partition) { return new HivePartitionFileSet(HiveCopyEntityHelper.this, partition, HiveCopyEntityHelper.this.dataset.getProperties()); } }
public HiveTargetPathHelper(HiveDataset dataset) { this.dataset = dataset; this.relocateDataFiles = Boolean .valueOf(this.dataset.getProperties().getProperty(RELOCATE_DATA_FILES_KEY, DEFAULT_RELOCATE_DATA_FILES)); this.targetTableRoot = this.dataset.getProperties().containsKey(COPY_TARGET_TABLE_ROOT) ? Optional.of(resolvePath(this.dataset.getProperties().getProperty(COPY_TARGET_TABLE_ROOT), this.dataset.getTable().getDbName(), this.dataset.getTable().getTableName())) : Optional.<Path> absent(); this.targetTablePrefixTobeReplaced = this.dataset.getProperties().containsKey(COPY_TARGET_TABLE_PREFIX_TOBE_REPLACED) ? Optional.of(new Path(this.dataset.getProperties().getProperty(COPY_TARGET_TABLE_PREFIX_TOBE_REPLACED))) : Optional.<Path> absent(); this.targetTablePrefixReplacement = this.dataset.getProperties().containsKey(COPY_TARGET_TABLE_PREFIX_REPLACEMENT) ? Optional.of(new Path(this.dataset.getProperties().getProperty(COPY_TARGET_TABLE_PREFIX_REPLACEMENT))) : Optional.<Path> absent(); }
this.hiveRegProps = new HiveRegProps(new State(this.dataset.getProperties())); this.targetURI = Optional.fromNullable(this.dataset.getProperties().getProperty(TARGET_METASTORE_URI_KEY)); this.targetClientPool = HiveMetastoreClientPool.get(this.dataset.getProperties(), this.targetURI); this.targetDatabase = Optional.fromNullable(this.dataset.getProperties().getProperty(TARGET_DATABASE_KEY)) .or(this.dataset.table.getDbName()); this.existingEntityPolicy = ExistingEntityPolicy.valueOf(this.dataset.getProperties() .getProperty(EXISTING_ENTITY_POLICY_KEY, DEFAULT_EXISTING_ENTITY_POLICY).toUpperCase()); this.unmanagedDataPolicy = UnmanagedDataPolicy.valueOf( this.dataset.getProperties().getProperty(UNMANAGED_DATA_POLICY_KEY, DEFAULT_UNMANAGED_DATA_POLICY) .toUpperCase()); this.deleteMethod = this.dataset.getProperties().containsKey(DELETE_FILES_ON_DEREGISTER) ? DeregisterFileDeleteMethod .valueOf(this.dataset.getProperties().getProperty(DELETE_FILES_ON_DEREGISTER).toUpperCase()) : DEFAULT_DEREGISTER_DELETE_METHOD; if (this.dataset.getProperties().containsKey(COPY_PARTITION_FILTER_GENERATOR)) { try { PartitionFilterGenerator generator = GobblinConstructorUtils.invokeFirstConstructor( (Class<PartitionFilterGenerator>) Class .forName(this.dataset.getProperties().getProperty(COPY_PARTITION_FILTER_GENERATOR)), Lists.<Object> newArrayList(this.dataset.getProperties()), Lists.newArrayList()); this.partitionFilter = Optional.of(generator.getFilter(this.dataset)); log.info(String.format("Dynamic partition filter for table %s: %s.", this.dataset.table.getCompleteName(), Optional.fromNullable(this.dataset.getProperties().getProperty(COPY_PARTITIONS_FILTER_CONSTANT)); if ( this.dataset.getProperties().containsKey(HIVE_PARTITION_EXTENDED_FILTER_TYPE)){ String filterType = dataset.getProperties().getProperty(HIVE_PARTITION_EXTENDED_FILTER_TYPE);
int addPartitionDeregisterSteps(List<CopyEntity> copyEntities, String fileSet, int initialPriority, Table table, Partition partition) throws IOException { int stepPriority = initialPriority; Collection<Path> partitionPaths = Lists.newArrayList(); if (this.deleteMethod == DeregisterFileDeleteMethod.RECURSIVE) { partitionPaths = Lists.newArrayList(partition.getDataLocation()); } else if (this.deleteMethod == DeregisterFileDeleteMethod.INPUT_FORMAT) { InputFormat<?, ?> inputFormat = HiveUtils.getInputFormat(partition.getTPartition().getSd()); HiveLocationDescriptor targetLocation = new HiveLocationDescriptor(partition.getDataLocation(), inputFormat, this.targetFs, this.dataset.getProperties()); partitionPaths = targetLocation.getPaths().keySet(); } else if (this.deleteMethod == DeregisterFileDeleteMethod.NO_DELETE) { partitionPaths = Lists.newArrayList(); } if (!partitionPaths.isEmpty()) { DeleteFileCommitStep deletePaths = DeleteFileCommitStep.fromPaths(this.targetFs, partitionPaths, this.dataset.getProperties(), table.getDataLocation()); copyEntities.add(new PostPublishStep(fileSet, Maps.<String, String> newHashMap(), deletePaths, stepPriority++)); } PartitionDeregisterStep deregister = new PartitionDeregisterStep(table.getTTable(), partition.getTPartition(), this.targetURI, this.hiveRegProps); copyEntities.add(new PostPublishStep(fileSet, Maps.<String, String> newHashMap(), deregister, stepPriority++)); return stepPriority; }
this.getTargetFs(), this.getDataset().getProperties()); this.getDataset().getProperties(), table.getDataLocation()); copyEntities.add(new PostPublishStep(fileSet, Maps.<String, String> newHashMap(), deletePaths, stepPriority++));
if (hiveDataset.getProperties().containsKey(LookbackPartitionFilterGenerator.PARTITION_COLUMN) && hiveDataset.getProperties().containsKey(LookbackPartitionFilterGenerator.DATETIME_FORMAT) && hiveDataset.getProperties().containsKey(LookbackPartitionFilterGenerator.LOOKBACK)) { partitionFilter = Optional.of(new LookbackPartitionFilterGenerator(hiveDataset.getProperties()).getFilter(hiveDataset)); log.info(String.format("Getting partitions for %s using partition filter %s", hiveDataset.getTable() .getCompleteName(), partitionFilter.get()));
helper.getDataset().getFs(), helper.getDataset().getProperties());
private HiveTargetPathHelper createTestTargetPathHelper(Properties properties) { HiveDataset dataset = Mockito.mock(HiveDataset.class); Table table = new Table(new org.apache.hadoop.hive.metastore.api.Table()); table.setDbName("dbName"); table.setTableName("tableName"); Mockito.when(dataset.getTable()).thenReturn(table); Mockito.when(dataset.getTableRootPath()).thenReturn(Optional.of(TABLE_ROOT)); Mockito.when(dataset.getProperties()).thenReturn(properties); HiveTargetPathHelper helper = new HiveTargetPathHelper(dataset); return helper; }
@Test public void testAddTableDeregisterSteps() throws Exception { HiveDataset dataset = Mockito.mock(HiveDataset.class); Mockito.when(dataset.getProperties()).thenReturn(new Properties()); HiveCopyEntityHelper helper = Mockito.mock(HiveCopyEntityHelper.class); Mockito.when(helper.getDeleteMethod()).thenReturn(DeregisterFileDeleteMethod.NO_DELETE); Mockito.when(helper.getTargetURI()).thenReturn(Optional.of("/targetURI")); Mockito.when(helper.getHiveRegProps()).thenReturn(new HiveRegProps(new State())); Mockito.when(helper.getDataset()).thenReturn(dataset); Mockito.when(helper.addTableDeregisterSteps(Mockito.any(List.class), Mockito.any(String.class), Mockito.anyInt(), Mockito.any(org.apache.hadoop.hive.ql.metadata.Table.class))).thenCallRealMethod(); org.apache.hadoop.hive.ql.metadata.Table meta_table = Mockito.mock(org.apache.hadoop.hive.ql.metadata.Table.class); org.apache.hadoop.hive.metastore.api.Table api_table = Mockito.mock(org.apache.hadoop.hive.metastore.api.Table.class); Mockito.when(api_table.getDbName()).thenReturn("TestDB"); Mockito.when(api_table.getTableName()).thenReturn("TestTable"); Mockito.when(meta_table.getTTable()).thenReturn(api_table); List<CopyEntity> copyEntities = new ArrayList<CopyEntity>(); String fileSet = "testFileSet"; int initialPriority = 0; int priority = helper.addTableDeregisterSteps(copyEntities, fileSet, initialPriority, meta_table); Assert.assertTrue(priority == 1); Assert.assertTrue(copyEntities.size() == 1); Assert.assertTrue(copyEntities.get(0) instanceof PostPublishStep); PostPublishStep p = (PostPublishStep) (copyEntities.get(0)); Assert .assertTrue(p.getStep().toString().contains("Deregister table TestDB.TestTable on Hive metastore /targetURI")); }
HiveLocationDescriptor.forTable(getTable(), getHiveDataset().getFs(), getHiveDataset().getProperties()); HiveLocationDescriptor desiredTargetLocation = HiveLocationDescriptor.forTable(this.helper.getTargetTable(), this.helper.getTargetFs(), getHiveDataset().getProperties()); HiveLocationDescriptor.forTable(existingTargetTable.get(), this.helper.getTargetFs(), getHiveDataset().getProperties())) : Optional.<HiveLocationDescriptor> absent(); DeleteFileCommitStep.fromPaths(this.helper.getTargetFs(), diffPathSet.pathsToDelete, getHiveDataset().getProperties()); copyEntities.add(new PrePublishStep(fileSet, Maps.<String, String> newHashMap(), deleteStep, stepPriority++));
private FileSet<CopyEntity> fileSetForPartition(final Partition partition) { return new HivePartitionFileSet(HiveCopyEntityHelper.this, partition, HiveCopyEntityHelper.this.dataset.getProperties()); } }
public HiveTargetPathHelper(HiveDataset dataset) { this.dataset = dataset; this.relocateDataFiles = Boolean .valueOf(this.dataset.getProperties().getProperty(RELOCATE_DATA_FILES_KEY, DEFAULT_RELOCATE_DATA_FILES)); this.targetTableRoot = this.dataset.getProperties().containsKey(COPY_TARGET_TABLE_ROOT) ? Optional.of(resolvePath(this.dataset.getProperties().getProperty(COPY_TARGET_TABLE_ROOT), this.dataset.getTable().getDbName(), this.dataset.getTable().getTableName())) : Optional.<Path> absent(); this.targetTablePrefixTobeReplaced = this.dataset.getProperties().containsKey(COPY_TARGET_TABLE_PREFIX_TOBE_REPLACED) ? Optional.of(new Path(this.dataset.getProperties().getProperty(COPY_TARGET_TABLE_PREFIX_TOBE_REPLACED))) : Optional.<Path> absent(); this.targetTablePrefixReplacement = this.dataset.getProperties().containsKey(COPY_TARGET_TABLE_PREFIX_REPLACEMENT) ? Optional.of(new Path(this.dataset.getProperties().getProperty(COPY_TARGET_TABLE_PREFIX_REPLACEMENT))) : Optional.<Path> absent(); }
int addPartitionDeregisterSteps(List<CopyEntity> copyEntities, String fileSet, int initialPriority, Table table, Partition partition) throws IOException { int stepPriority = initialPriority; Collection<Path> partitionPaths = Lists.newArrayList(); if (this.deleteMethod == DeregisterFileDeleteMethod.RECURSIVE) { partitionPaths = Lists.newArrayList(partition.getDataLocation()); } else if (this.deleteMethod == DeregisterFileDeleteMethod.INPUT_FORMAT) { InputFormat<?, ?> inputFormat = HiveUtils.getInputFormat(partition.getTPartition().getSd()); HiveLocationDescriptor targetLocation = new HiveLocationDescriptor(partition.getDataLocation(), inputFormat, this.targetFs, this.dataset.getProperties()); partitionPaths = targetLocation.getPaths().keySet(); } else if (this.deleteMethod == DeregisterFileDeleteMethod.NO_DELETE) { partitionPaths = Lists.newArrayList(); } if (!partitionPaths.isEmpty()) { DeleteFileCommitStep deletePaths = DeleteFileCommitStep.fromPaths(this.targetFs, partitionPaths, this.dataset.getProperties(), table.getDataLocation()); copyEntities.add(new PostPublishStep(fileSet, Maps.<String, String> newHashMap(), deletePaths, stepPriority++)); } PartitionDeregisterStep deregister = new PartitionDeregisterStep(table.getTTable(), partition.getTPartition(), this.targetURI, this.hiveRegProps); copyEntities.add(new PostPublishStep(fileSet, Maps.<String, String> newHashMap(), deregister, stepPriority++)); return stepPriority; }
this.hiveRegProps = new HiveRegProps(new State(this.dataset.getProperties())); this.targetURI = Optional.fromNullable(this.dataset.getProperties().getProperty(TARGET_METASTORE_URI_KEY)); this.targetClientPool = HiveMetastoreClientPool.get(this.dataset.getProperties(), this.targetURI); this.targetDatabase = Optional.fromNullable(this.dataset.getProperties().getProperty(TARGET_DATABASE_KEY)) .or(this.dataset.table.getDbName()); this.existingEntityPolicy = ExistingEntityPolicy.valueOf(this.dataset.getProperties() .getProperty(EXISTING_ENTITY_POLICY_KEY, DEFAULT_EXISTING_ENTITY_POLICY).toUpperCase()); this.unmanagedDataPolicy = UnmanagedDataPolicy.valueOf( this.dataset.getProperties().getProperty(UNMANAGED_DATA_POLICY_KEY, DEFAULT_UNMANAGED_DATA_POLICY) .toUpperCase()); this.deleteMethod = this.dataset.getProperties().containsKey(DELETE_FILES_ON_DEREGISTER) ? DeregisterFileDeleteMethod .valueOf(this.dataset.getProperties().getProperty(DELETE_FILES_ON_DEREGISTER).toUpperCase()) : DEFAULT_DEREGISTER_DELETE_METHOD; if (this.dataset.getProperties().containsKey(COPY_PARTITION_FILTER_GENERATOR)) { try { PartitionFilterGenerator generator = GobblinConstructorUtils.invokeFirstConstructor( (Class<PartitionFilterGenerator>) Class .forName(this.dataset.getProperties().getProperty(COPY_PARTITION_FILTER_GENERATOR)), Lists.<Object> newArrayList(this.dataset.getProperties()), Lists.newArrayList()); this.partitionFilter = Optional.of(generator.getFilter(this.dataset)); log.info(String.format("Dynamic partition filter for table %s: %s.", this.dataset.table.getCompleteName(), Optional.fromNullable(this.dataset.getProperties().getProperty(COPY_PARTITIONS_FILTER_CONSTANT)); if ( this.dataset.getProperties().containsKey(HIVE_PARTITION_EXTENDED_FILTER_TYPE)){ String filterType = dataset.getProperties().getProperty(HIVE_PARTITION_EXTENDED_FILTER_TYPE);
this.getTargetFs(), this.getDataset().getProperties()); this.getDataset().getProperties(), table.getDataLocation()); copyEntities.add(new PostPublishStep(fileSet, Maps.<String, String> newHashMap(), deletePaths, stepPriority++));
if (hiveDataset.getProperties().containsKey(LookbackPartitionFilterGenerator.PARTITION_COLUMN) && hiveDataset.getProperties().containsKey(LookbackPartitionFilterGenerator.DATETIME_FORMAT) && hiveDataset.getProperties().containsKey(LookbackPartitionFilterGenerator.LOOKBACK)) { partitionFilter = Optional.of(new LookbackPartitionFilterGenerator(hiveDataset.getProperties()).getFilter(hiveDataset)); log.info(String.format("Getting partitions for %s using partition filter %s", hiveDataset.getTable() .getCompleteName(), partitionFilter.get()));
helper.getDataset().getFs(), helper.getDataset().getProperties());
HiveLocationDescriptor.forTable(getTable(), getHiveDataset().getFs(), getHiveDataset().getProperties()); HiveLocationDescriptor desiredTargetLocation = HiveLocationDescriptor.forTable(this.helper.getTargetTable(), this.helper.getTargetFs(), getHiveDataset().getProperties()); HiveLocationDescriptor.forTable(existingTargetTable.get(), this.helper.getTargetFs(), getHiveDataset().getProperties())) : Optional.<HiveLocationDescriptor> absent(); DeleteFileCommitStep.fromPaths(this.helper.getTargetFs(), diffPathSet.pathsToDelete, getHiveDataset().getProperties()); copyEntities.add(new PrePublishStep(fileSet, Maps.<String, String> newHashMap(), deleteStep, stepPriority++));