/** * * @param entity, name of the entity to be changed, e.g. hive table or partition * @param sd, StorageDescriptor of the entity */ public static void updateAvroSchemaURL(String entity, StorageDescriptor sd, HiveCopyEntityHelper hiveHelper) { String oldAvroSchemaURL = sd.getSerdeInfo().getParameters().get(HIVE_TABLE_AVRO_SCHEMA_URL); if (oldAvroSchemaURL != null) { Path oldAvroSchemaPath = new Path(oldAvroSchemaURL); URI sourceFileSystemURI = hiveHelper.getDataset().getFs().getUri(); if (PathUtils.isAbsoluteAndSchemeAuthorityNull(oldAvroSchemaPath) || (oldAvroSchemaPath.toUri().getScheme().equals(sourceFileSystemURI.getScheme()) && oldAvroSchemaPath.toUri().getAuthority().equals(sourceFileSystemURI.getAuthority()))) { String newAvroSchemaURL = hiveHelper.getTargetPathHelper().getTargetPath(oldAvroSchemaPath, hiveHelper.getTargetFileSystem(), Optional.<Partition>absent(), true).toString(); sd.getSerdeInfo().getParameters().put(HIVE_TABLE_AVRO_SCHEMA_URL, newAvroSchemaURL); log.info(String.format("For entity %s, change %s from %s to %s", entity, HIVE_TABLE_AVRO_SCHEMA_URL, oldAvroSchemaURL, newAvroSchemaURL)); } } }
public HivePartitionFileSet(HiveCopyEntityHelper hiveCopyEntityHelper, Partition partition, Properties properties) { super(partition.getCompleteName(), hiveCopyEntityHelper.getDataset()); this.hiveCopyEntityHelper = hiveCopyEntityHelper; this.partition = partition; this.properties = properties; this.existingTargetPartition = Optional.fromNullable(this.hiveCopyEntityHelper.getTargetPartitions().get(this.partition.getValues())); this.eventSubmitter = new EventSubmitter.Builder(this.hiveCopyEntityHelper.getDataset().getMetricContext(), "hive.dataset.copy") .addMetadata("Partition", this.partition.getName()).build(); }
helper.getExistingTargetTable().get().getParameters().get(HiveDataset.REGISTRATION_GENERATION_TIME_MILLIS)); HiveLocationDescriptor sourceHiveDescriptor = HiveLocationDescriptor.forTable(helper.getDataset().getTable(), helper.getDataset().getFs(), helper.getDataset().getProperties()); getFileStatus(helper.getDataset().getFs(), sourceHiveDescriptor.getLocation()); HiveDataset.REGISTRATION_GENERATION_TIME_MILLIS, helper.getDataset().getTable().getDbName()+"."+helper.getDataset().getTable().getTableName())); return false; } catch (IOException ioe) {
getFileStatus(this.helper.getDataset().getFs(), input.getPartition().getDataLocation());
this.getTargetFs(), this.getDataset().getProperties()); this.getDataset().getProperties(), table.getDataLocation()); copyEntities.add(new PostPublishStep(fileSet, Maps.<String, String> newHashMap(), deletePaths, stepPriority++));
throw new IOException(String.format( "New table / partition would pick up existing, undesired files in target file system. " + "%s, files %s.", partition.isPresent() ? partition.get().getCompleteName() : helper.getDataset().getTable().getCompleteName(), Arrays.toString(desiredTargetExistingPaths.keySet().toArray())));
.getFileStatus(this.helper.getDataset().getFs(), input.getPartition().getDataLocation());
@Test public void testAddTableDeregisterSteps() throws Exception { HiveDataset dataset = Mockito.mock(HiveDataset.class); Mockito.when(dataset.getProperties()).thenReturn(new Properties()); HiveCopyEntityHelper helper = Mockito.mock(HiveCopyEntityHelper.class); Mockito.when(helper.getDeleteMethod()).thenReturn(DeregisterFileDeleteMethod.NO_DELETE); Mockito.when(helper.getTargetURI()).thenReturn(Optional.of("/targetURI")); Mockito.when(helper.getHiveRegProps()).thenReturn(new HiveRegProps(new State())); Mockito.when(helper.getDataset()).thenReturn(dataset); Mockito.when(helper.addTableDeregisterSteps(Mockito.any(List.class), Mockito.any(String.class), Mockito.anyInt(), Mockito.any(org.apache.hadoop.hive.ql.metadata.Table.class))).thenCallRealMethod(); org.apache.hadoop.hive.ql.metadata.Table meta_table = Mockito.mock(org.apache.hadoop.hive.ql.metadata.Table.class); org.apache.hadoop.hive.metastore.api.Table api_table = Mockito.mock(org.apache.hadoop.hive.metastore.api.Table.class); Mockito.when(api_table.getDbName()).thenReturn("TestDB"); Mockito.when(api_table.getTableName()).thenReturn("TestTable"); Mockito.when(meta_table.getTTable()).thenReturn(api_table); List<CopyEntity> copyEntities = new ArrayList<CopyEntity>(); String fileSet = "testFileSet"; int initialPriority = 0; int priority = helper.addTableDeregisterSteps(copyEntities, fileSet, initialPriority, meta_table); Assert.assertTrue(priority == 1); Assert.assertTrue(copyEntities.size() == 1); Assert.assertTrue(copyEntities.get(0) instanceof PostPublishStep); PostPublishStep p = (PostPublishStep) (copyEntities.get(0)); Assert .assertTrue(p.getStep().toString().contains("Deregister table TestDB.TestTable on Hive metastore /targetURI")); }
HiveCopyEntityHelper helper = Mockito.mock(HiveCopyEntityHelper.class); HiveTargetPathHelper targetPathHelper = Mockito.mock(HiveTargetPathHelper.class); Mockito.when(helper.getDataset()).thenReturn(hiveDataset); Mockito.when(hiveDataset.getTable()).thenReturn(table); Mockito.when(table.getCompleteName()).thenReturn("table1");
Path targetPath = hiveCopyEntityHelper.getTargetLocation(hiveCopyEntityHelper.getDataset().fs, hiveCopyEntityHelper.getTargetFs(), this.partition.getDataLocation(), Optional.of(this.partition)); Partition targetPartition = getTargetPartition(this.partition, targetPath); HiveLocationDescriptor.forPartition(this.partition, hiveCopyEntityHelper.getDataset().fs, this.properties); HiveLocationDescriptor desiredTargetLocation = HiveLocationDescriptor.forPartition(targetPartition, hiveCopyEntityHelper.getTargetFs(), this.properties); if (diffPathSet.pathsToDelete.size() > 0) { DeleteFileCommitStep deleteStep = DeleteFileCommitStep.fromPaths(hiveCopyEntityHelper.getTargetFs(), diffPathSet.pathsToDelete, hiveCopyEntityHelper.getDataset().properties); copyEntities.add(new PrePublishStep(fileSet, Maps.<String, String> newHashMap(), deleteStep, stepPriority++));
HiveCopyEntityHelper helper = Mockito.mock(HiveCopyEntityHelper.class); HiveTargetPathHelper targetPathHelper = Mockito.mock(HiveTargetPathHelper.class); Mockito.when(helper.getDataset()).thenReturn(hiveDataset); Mockito.when(hiveDataset.getTable()).thenReturn(table); Mockito.when(table.getCompleteName()).thenReturn("table1");
Mockito.doReturn(dataset).when(helper).getDataset();
/** * * @param entity, name of the entity to be changed, e.g. hive table or partition * @param sd, StorageDescriptor of the entity */ public static void updateAvroSchemaURL(String entity, StorageDescriptor sd, HiveCopyEntityHelper hiveHelper) { String oldAvroSchemaURL = sd.getSerdeInfo().getParameters().get(HIVE_TABLE_AVRO_SCHEMA_URL); if (oldAvroSchemaURL != null) { Path oldAvroSchemaPath = new Path(oldAvroSchemaURL); URI sourceFileSystemURI = hiveHelper.getDataset().getFs().getUri(); if (PathUtils.isAbsoluteAndSchemeAuthorityNull(oldAvroSchemaPath) || (oldAvroSchemaPath.toUri().getScheme().equals(sourceFileSystemURI.getScheme()) && oldAvroSchemaPath.toUri().getAuthority().equals(sourceFileSystemURI.getAuthority()))) { String newAvroSchemaURL = hiveHelper.getTargetPathHelper().getTargetPath(oldAvroSchemaPath, hiveHelper.getTargetFileSystem(), Optional.<Partition>absent(), true).toString(); sd.getSerdeInfo().getParameters().put(HIVE_TABLE_AVRO_SCHEMA_URL, newAvroSchemaURL); log.info(String.format("For entity %s, change %s from %s to %s", entity, HIVE_TABLE_AVRO_SCHEMA_URL, oldAvroSchemaURL, newAvroSchemaURL)); } } }
public HivePartitionFileSet(HiveCopyEntityHelper hiveCopyEntityHelper, Partition partition, Properties properties) { super(partition.getCompleteName(), hiveCopyEntityHelper.getDataset()); this.hiveCopyEntityHelper = hiveCopyEntityHelper; this.partition = partition; this.properties = properties; this.existingTargetPartition = Optional.fromNullable(this.hiveCopyEntityHelper.getTargetPartitions().get(this.partition.getValues())); this.eventSubmitter = new EventSubmitter.Builder(this.hiveCopyEntityHelper.getDataset().getMetricContext(), "hive.dataset.copy") .addMetadata("Partition", this.partition.getName()).build(); }
helper.getExistingTargetTable().get().getParameters().get(HiveDataset.REGISTRATION_GENERATION_TIME_MILLIS)); HiveLocationDescriptor sourceHiveDescriptor = HiveLocationDescriptor.forTable(helper.getDataset().getTable(), helper.getDataset().getFs(), helper.getDataset().getProperties()); getFileStatus(helper.getDataset().getFs(), sourceHiveDescriptor.getLocation()); HiveDataset.REGISTRATION_GENERATION_TIME_MILLIS, helper.getDataset().getTable().getDbName()+"."+helper.getDataset().getTable().getTableName())); return false; } catch (IOException ioe) {
getFileStatus(this.helper.getDataset().getFs(), input.getPartition().getDataLocation());
this.getTargetFs(), this.getDataset().getProperties()); this.getDataset().getProperties(), table.getDataLocation()); copyEntities.add(new PostPublishStep(fileSet, Maps.<String, String> newHashMap(), deletePaths, stepPriority++));
throw new IOException(String.format( "New table / partition would pick up existing, undesired files in target file system. " + "%s, files %s.", partition.isPresent() ? partition.get().getCompleteName() : helper.getDataset().getTable().getCompleteName(), Arrays.toString(desiredTargetExistingPaths.keySet().toArray())));
.getFileStatus(this.helper.getDataset().getFs(), input.getPartition().getDataLocation());
Path targetPath = hiveCopyEntityHelper.getTargetLocation(hiveCopyEntityHelper.getDataset().fs, hiveCopyEntityHelper.getTargetFs(), this.partition.getDataLocation(), Optional.of(this.partition)); Partition targetPartition = getTargetPartition(this.partition, targetPath); HiveLocationDescriptor.forPartition(this.partition, hiveCopyEntityHelper.getDataset().fs, this.properties); HiveLocationDescriptor desiredTargetLocation = HiveLocationDescriptor.forPartition(targetPartition, hiveCopyEntityHelper.getTargetFs(), this.properties); if (diffPathSet.pathsToDelete.size() > 0) { DeleteFileCommitStep deleteStep = DeleteFileCommitStep.fromPaths(hiveCopyEntityHelper.getTargetFs(), diffPathSet.pathsToDelete, hiveCopyEntityHelper.getDataset().properties); copyEntities.add(new PrePublishStep(fileSet, Maps.<String, String> newHashMap(), deleteStep, stepPriority++));