/** * Finds all files read by the table and generates {@link CopyEntity}s for duplicating the table. The semantics are as follows: * 1. Find all valid {@link org.apache.hadoop.hive.metastore.api.StorageDescriptor}. If the table is partitioned, the * {@link org.apache.hadoop.hive.metastore.api.StorageDescriptor} of the base * table will be ignored, and we will instead process the {@link org.apache.hadoop.hive.metastore.api.StorageDescriptor} of each partition. * 2. For each {@link org.apache.hadoop.hive.metastore.api.StorageDescriptor} find all files referred by it. * 3. Generate a {@link CopyableFile} for each file referred by a {@link org.apache.hadoop.hive.metastore.api.StorageDescriptor}. * 4. If the table is partitioned, create a file set for each partition. * 5. Create work units for registering, deregistering partitions / tables, and deleting unnecessary files in the target. * * For computation of target locations see {@link HiveTargetPathHelper#getTargetPath} */ Iterator<FileSet<CopyEntity>> getCopyEntities(CopyConfiguration configuration, Comparator<FileSet<CopyEntity>> prioritizer, PushDownRequestor<FileSet<CopyEntity>> requestor) throws IOException { if (HiveUtils.isPartitioned(this.dataset.table)) { return new PartitionIterator(this.sourcePartitions, configuration, prioritizer, requestor); } else { FileSet<CopyEntity> fileSet = new UnpartitionedTableFileSet(this.dataset.table.getCompleteName(), this.dataset, this); return Iterators.singletonIterator(fileSet); } }
/** * Finds all files read by the table and generates {@link CopyEntity}s for duplicating the table. The semantics are as follows: * 1. Find all valid {@link org.apache.hadoop.hive.metastore.api.StorageDescriptor}. If the table is partitioned, the * {@link org.apache.hadoop.hive.metastore.api.StorageDescriptor} of the base * table will be ignored, and we will instead process the {@link org.apache.hadoop.hive.metastore.api.StorageDescriptor} of each partition. * 2. For each {@link org.apache.hadoop.hive.metastore.api.StorageDescriptor} find all files referred by it. * 3. Generate a {@link CopyableFile} for each file referred by a {@link org.apache.hadoop.hive.metastore.api.StorageDescriptor}. * 4. If the table is partitioned, create a file set for each partition. * 5. Create work units for registering, deregistering partitions / tables, and deleting unnecessary files in the target. * * For computation of target locations see {@link HiveTargetPathHelper#getTargetPath} */ Iterator<FileSet<CopyEntity>> getCopyEntities(CopyConfiguration configuration, Comparator<FileSet<CopyEntity>> prioritizer, PushDownRequestor<FileSet<CopyEntity>> requestor) throws IOException { if (HiveUtils.isPartitioned(this.dataset.table)) { return new PartitionIterator(this.sourcePartitions, configuration, prioritizer, requestor); } else { FileSet<CopyEntity> fileSet = new UnpartitionedTableFileSet(this.dataset.table.getCompleteName(), this.dataset, this); return Iterators.singletonIterator(fileSet); } }