private List<FileSet<CopyEntity>> generateAllFileSets(Map<List<String>, Partition> partitionMap) { List<FileSet<CopyEntity>> fileSets = Lists.newArrayList(); for (Map.Entry<List<String>, Partition> partition : partitionMap.entrySet()) { fileSets.add(fileSetForPartition(partition.getValue())); HiveCopyEntityHelper.this.targetPartitions.remove(partition.getKey()); } if (!HiveCopyEntityHelper.this.targetPartitions.isEmpty()) { fileSets.add(new HivePartitionsDeregisterFileSet( HiveCopyEntityHelper.this.dataset.getTable().getCompleteName() + DEREGISTER_FILE_SET, HiveCopyEntityHelper.this.dataset, HiveCopyEntityHelper.this.targetPartitions.values(), HiveCopyEntityHelper.this)); } return fileSets; }
public PartitionIterator(Map<List<String>, Partition> partitionMap, CopyConfiguration configuration, Comparator<FileSet<CopyEntity>> prioritizer, PushDownRequestor<FileSet<CopyEntity>> requestor) { this.allFileSets = generateAllFileSets(partitionMap); for (FileSet<CopyEntity> fileSet : this.allFileSets) { fileSet.setRequestor(requestor); } if (prioritizer != null) { Collections.sort(this.allFileSets, prioritizer); } this.fileSetIterator = this.allFileSets.iterator(); }
/** * Finds all files read by the table and generates {@link CopyEntity}s for duplicating the table. The semantics are as follows: * 1. Find all valid {@link org.apache.hadoop.hive.metastore.api.StorageDescriptor}. If the table is partitioned, the * {@link org.apache.hadoop.hive.metastore.api.StorageDescriptor} of the base * table will be ignored, and we will instead process the {@link org.apache.hadoop.hive.metastore.api.StorageDescriptor} of each partition. * 2. For each {@link org.apache.hadoop.hive.metastore.api.StorageDescriptor} find all files referred by it. * 3. Generate a {@link CopyableFile} for each file referred by a {@link org.apache.hadoop.hive.metastore.api.StorageDescriptor}. * 4. If the table is partitioned, create a file set for each partition. * 5. Create work units for registering, deregistering partitions / tables, and deleting unnecessary files in the target. * * For computation of target locations see {@link HiveTargetPathHelper#getTargetPath} */ Iterator<FileSet<CopyEntity>> getCopyEntities(CopyConfiguration configuration, Comparator<FileSet<CopyEntity>> prioritizer, PushDownRequestor<FileSet<CopyEntity>> requestor) throws IOException { if (HiveUtils.isPartitioned(this.dataset.table)) { return new PartitionIterator(this.sourcePartitions, configuration, prioritizer, requestor); } else { FileSet<CopyEntity> fileSet = new UnpartitionedTableFileSet(this.dataset.table.getCompleteName(), this.dataset, this); return Iterators.singletonIterator(fileSet); } }
private List<FileSet<CopyEntity>> generateAllFileSets(Map<List<String>, Partition> partitionMap) { List<FileSet<CopyEntity>> fileSets = Lists.newArrayList(); for (Map.Entry<List<String>, Partition> partition : partitionMap.entrySet()) { fileSets.add(fileSetForPartition(partition.getValue())); HiveCopyEntityHelper.this.targetPartitions.remove(partition.getKey()); } if (!HiveCopyEntityHelper.this.targetPartitions.isEmpty()) { fileSets.add(new HivePartitionsDeregisterFileSet( HiveCopyEntityHelper.this.dataset.getTable().getCompleteName() + DEREGISTER_FILE_SET, HiveCopyEntityHelper.this.dataset, HiveCopyEntityHelper.this.targetPartitions.values(), HiveCopyEntityHelper.this)); } return fileSets; }
public PartitionIterator(Map<List<String>, Partition> partitionMap, CopyConfiguration configuration, Comparator<FileSet<CopyEntity>> prioritizer, PushDownRequestor<FileSet<CopyEntity>> requestor) { this.allFileSets = generateAllFileSets(partitionMap); for (FileSet<CopyEntity> fileSet : this.allFileSets) { fileSet.setRequestor(requestor); } if (prioritizer != null) { Collections.sort(this.allFileSets, prioritizer); } this.fileSetIterator = this.allFileSets.iterator(); }
/** * Finds all files read by the table and generates {@link CopyEntity}s for duplicating the table. The semantics are as follows: * 1. Find all valid {@link org.apache.hadoop.hive.metastore.api.StorageDescriptor}. If the table is partitioned, the * {@link org.apache.hadoop.hive.metastore.api.StorageDescriptor} of the base * table will be ignored, and we will instead process the {@link org.apache.hadoop.hive.metastore.api.StorageDescriptor} of each partition. * 2. For each {@link org.apache.hadoop.hive.metastore.api.StorageDescriptor} find all files referred by it. * 3. Generate a {@link CopyableFile} for each file referred by a {@link org.apache.hadoop.hive.metastore.api.StorageDescriptor}. * 4. If the table is partitioned, create a file set for each partition. * 5. Create work units for registering, deregistering partitions / tables, and deleting unnecessary files in the target. * * For computation of target locations see {@link HiveTargetPathHelper#getTargetPath} */ Iterator<FileSet<CopyEntity>> getCopyEntities(CopyConfiguration configuration, Comparator<FileSet<CopyEntity>> prioritizer, PushDownRequestor<FileSet<CopyEntity>> requestor) throws IOException { if (HiveUtils.isPartitioned(this.dataset.table)) { return new PartitionIterator(this.sourcePartitions, configuration, prioritizer, requestor); } else { FileSet<CopyEntity> fileSet = new UnpartitionedTableFileSet(this.dataset.table.getCompleteName(), this.dataset, this); return Iterators.singletonIterator(fileSet); } }