/** * Compute the target location for a Hive location. * @param sourceFs Source {@link FileSystem}. * @param path source {@link Path} in Hive location. * @param partition partition these paths correspond to. * @return transformed location in the target. * @throws IOException if cannot generate a single target location. */ Path getTargetLocation(FileSystem sourceFs, FileSystem targetFs, Path path, Optional<Partition> partition) throws IOException { return getTargetPathHelper().getTargetPath(path, targetFs, partition, false); }
private FileSet<CopyEntity> fileSetForPartition(final Partition partition) { return new HivePartitionFileSet(HiveCopyEntityHelper.this, partition, HiveCopyEntityHelper.this.dataset.getProperties()); } }
@Override public boolean apply(String db) { return HiveDatasetFinder.this.whitelistBlacklist.acceptDb(db); } });
/** * This method returns a sorted list of partitions. */ public List<Partition> getPartitionsFromDataset() throws IOException{ try (AutoReturnableObject<IMetaStoreClient> client = getClientPool().getClient()) { List<Partition> partitions = HiveUtils.getPartitions(client.get(), getTable(), Optional.<String>absent()); return sortPartitions(partitions); } }
@Test public void testDatasetFinder() throws Exception { List<HiveDatasetFinder.DbAndTable> dbAndTables = Lists.newArrayList(); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table1")); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table2")); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table3")); HiveMetastoreClientPool pool = getTestPool(dbAndTables); Properties properties = new Properties(); properties.put(HiveDatasetFinder.HIVE_DATASET_PREFIX + "." + WhitelistBlacklist.WHITELIST, ""); HiveDatasetFinder finder = new TestHiveDatasetFinder(FileSystem.getLocal(new Configuration()), properties, pool); List<HiveDataset> datasets = Lists.newArrayList(finder.getDatasetsIterator()); Assert.assertEquals(datasets.size(), 3); }
private List<FileSet<CopyEntity>> generateAllFileSets(Map<List<String>, Partition> partitionMap) { List<FileSet<CopyEntity>> fileSets = Lists.newArrayList(); for (Map.Entry<List<String>, Partition> partition : partitionMap.entrySet()) { fileSets.add(fileSetForPartition(partition.getValue())); HiveCopyEntityHelper.this.targetPartitions.remove(partition.getKey()); } if (!HiveCopyEntityHelper.this.targetPartitions.isEmpty()) { fileSets.add(new HivePartitionsDeregisterFileSet( HiveCopyEntityHelper.this.dataset.getTable().getCompleteName() + DEREGISTER_FILE_SET, HiveCopyEntityHelper.this.dataset, HiveCopyEntityHelper.this.targetPartitions.values(), HiveCopyEntityHelper.this)); } return fileSets; }
public HiveFileSet(String name, HiveDataset dataset) { super(name, dataset); this.table = dataset.getTable(); this.hiveDataset = dataset; } }
@Override public Iterator<FileSet<CopyEntity>> getFileSetIterator(FileSystem targetFs, CopyConfiguration configuration) throws IOException { if (!canCopyTable()) { return Iterators.emptyIterator(); } try { return new HiveCopyEntityHelper(this, configuration, targetFs).getCopyEntities(configuration); } catch (IOException ioe) { log.error("Failed to copy table " + this.table, ioe); return Iterators.emptyIterator(); } }
public static HiveLocationDescriptor forPartition(Partition partition, FileSystem fs, Properties properties) throws IOException { return new HiveLocationDescriptor(partition.getDataLocation(), HiveUtils.getInputFormat(partition.getTPartition().getSd()), fs, properties); }
/** * For backward compatibility when PathFilter is injected as a parameter. * @param client * @param table * @param filter * @return * @throws IOException */ public static List<Partition> getPartitions(IMetaStoreClient client, Table table, Optional<String> filter) throws IOException { return getPartitions(client, table, filter, Optional.<HivePartitionExtendedFilter>absent()); }
@Override public List<HiveDataset> findDatasets() throws IOException { return Lists.newArrayList(getDatasetsIterator()); }
@Override public boolean apply(String table) { return HiveDatasetFinder.this.whitelistBlacklist.acceptTable(db, table); } });
protected HiveDatasetFinder(FileSystem fs, Properties properties, ConfigClient configClient) throws IOException { this(fs, properties, createClientPool(properties), null, configClient); }
/** * See {@link #getCopyEntities(CopyConfiguration, Comparator, PushDownRequestor)}. This method does not pushdown any prioritizer. */ Iterator<FileSet<CopyEntity>> getCopyEntities(CopyConfiguration configuration) throws IOException { return getCopyEntities(configuration, null, null); }
/** * @return Whether database db might contain tables accepted by this {@link WhitelistBlacklist}. */ public boolean acceptDb(String db) { return accept(db, Optional.<String> absent()); }
private boolean accept(String db, Optional<String> table) { if (!this.blacklistMultimap.isEmpty() && multimapContains(this.blacklistMultimap, db, table, true)) { return false; } return this.whitelistMultimap.isEmpty() || multimapContains(this.whitelistMultimap, db, table, false); }
@Override public Path datasetRoot() { return super.getTable().getDataLocation(); } }
public static HiveLocationDescriptor forTable(Table table, FileSystem fs, Properties properties) throws IOException { return new HiveLocationDescriptor(table.getDataLocation(), HiveUtils.getInputFormat(table.getTTable().getSd()), fs, properties); }
public HiveDatasetFinder(FileSystem fs, Properties properties, EventSubmitter eventSubmitter) throws IOException { this(fs, properties, createClientPool(properties), eventSubmitter); }
public HiveDatasetFinder(FileSystem fs, Properties properties) throws IOException { this(fs, properties, createClientPool(properties)); }