@SuppressWarnings("unchecked") private boolean setInputPaths(JobContext jobContext, Job job) throws IOException { List<Path> paths = Lists.newArrayList((Iterator) (view == null ? dataset.pathIterator() : view.pathIterator())); LOG.debug("Input paths: {}", paths); if (paths.isEmpty()) { return false; } FileInputFormat.setInputPaths(job, paths.toArray(new Path[paths.size()])); // the following line is needed for Hadoop 1, otherwise the paths are not set Configuration contextConf = Hadoop.JobContext .getConfiguration.invoke(jobContext); Configuration jobConf = Hadoop.JobContext .getConfiguration.invoke(job); contextConf.set("mapred.input.dir", jobConf.get("mapred.input.dir")); return true; }
@Test public void testPartitionedReplace() { Assert.assertTrue("Should allow replacing a whole dataset", partitioned.canReplace(partitioned)); Assert.assertTrue( "Should not allow replacement test with a different dataset", !partitioned.canReplace(temporary)); Set<String> originalFiles = Sets.newHashSet( Iterators.transform(partitioned.pathIterator(), new GetFilename())); Set<String> replacementFiles = Sets.newHashSet( Iterators.transform(temporary.pathIterator(), new GetFilename())); Assert.assertEquals("Sanity check", originalFiles.size(), replacementFiles.size()); Assert.assertFalse("Sanity check", originalFiles.equals(replacementFiles)); partitioned.replace(partitioned, temporary); Set<String> replacedFiles = Sets.newHashSet( Iterators.transform(partitioned.pathIterator(), new GetFilename())); Assert.assertEquals("Should contain the replacement files", replacementFiles, replacedFiles); }
Iterators.transform(partitioned.pathIterator(), new GetFilename())); Set<String> replacementFiles = Sets.newHashSet( Iterators.transform(temporary.pathIterator(), new GetFilename())); Assert.assertFalse("Sanity check", originalFiles.equals(replacementFiles)); Iterators.transform(partitioned.pathIterator(), new GetFilename())); Assert.assertEquals("Should contain the only the replacement files", replacementFiles, replacedFiles);
@Test public void testReplaceSinglePartition() { FileSystemPartitionView<TestRecord> partition0 = partitioned.getPartitionView( new Path("id_hash=0")); FileSystemPartitionView<TestRecord> temp0 = temporary.getPartitionView( new Path("id_hash=0")); Assert.assertTrue("Should allow replacing a single partition", partitioned.canReplace(partition0)); Assert.assertFalse( "Should not allow replacement test with a different dataset", partitioned.canReplace(temp0)); Set<String> replacementFiles = Sets.newHashSet( Iterators.transform(temp0.pathIterator(), new GetFilename())); Set<String> originalPartitionFiles = Sets.newHashSet( Iterators.transform(partition0.pathIterator(), new GetFilename())); Assert.assertEquals("Sanity check", originalPartitionFiles.size(), replacementFiles.size()); Assert.assertFalse("Sanity check", originalPartitionFiles.equals(replacementFiles)); Set<String> expectedFiles = Sets.newHashSet( Iterators.transform(partitioned.pathIterator(), new GetFilename())); expectedFiles.removeAll(originalPartitionFiles); expectedFiles.addAll(replacementFiles); partitioned.replace(partition0, temp0); Set<String> replacedFiles = Sets.newHashSet( Iterators.transform(partitioned.pathIterator(), new GetFilename())); Assert.assertEquals("Should contain the replacement files", expectedFiles, replacedFiles); }
FileSystemDataset<Object> inDataset = (FileSystemDataset) repo.create( "import", "inputformat", inDescriptor); Iterator<Path> iter = inDataset.pathIterator().iterator(); Preconditions.checkArgument(iter.hasNext(), "Data path has no data files: " + source);
@Test public void testUnpartitionedReplace() { // recreate temporary without a partition strategy Datasets.delete("dataset:file:/tmp/datasets/temporary"); DatasetDescriptor descriptor = new DatasetDescriptor .Builder(unpartitioned.getDescriptor()) .location((URI) null) // clear the location .build(); temporary = Datasets.create("dataset:file:/tmp/datasets/temporary", descriptor, TestRecord.class); Assert.assertTrue("Should allow replacing an unpartitioned dataset", unpartitioned.canReplace(unpartitioned)); // make sure there are multiple files writeTestRecords(unpartitioned); writeTestRecords(unpartitioned); writeTestRecords(temporary); writeTestRecords(temporary); Set<String> originalFiles = Sets.newHashSet( Iterators.transform(unpartitioned.pathIterator(), new GetFilename())); Set<String> replacementFiles = Sets.newHashSet( Iterators.transform(temporary.pathIterator(), new GetFilename())); Iterators.transform(temporary.pathIterator(), new GetFilename()); Assert.assertFalse("Sanity check", originalFiles.equals(replacementFiles)); unpartitioned.replace(unpartitioned, temporary); Set<String> replacedFiles = Sets.newHashSet( Iterators.transform(unpartitioned.pathIterator(), new GetFilename())); Assert.assertEquals("Should contain the replacement files", replacementFiles, replacedFiles); }
(FileSystemDataset) repo.create("import", "json", jsonDescriptor); Iterator<Path> iter = jsonDataset.pathIterator().iterator(); Preconditions.checkArgument(iter.hasNext(), "JSON path has no data files: " + source);
Iterators.transform(partitioned.pathIterator(), new GetFilename())); Assert.assertEquals("Should contain the replacement files", replacementFiles, replacedFiles);
Iterators.transform(partitioned.pathIterator(), new GetFilename())); Assert.assertEquals("Should contain the replacement files", replacementFiles, replacedFiles);
(FileSystemDataset) repo.create("default", "csv", csvDescriptor); Iterator<Path> iter = csvDataset.pathIterator().iterator(); Preconditions.checkArgument(iter.hasNext(), "CSV path has no data files: " + source);