private OutputAttemptContext createOutputContext() { String tx = UUID.randomUUID().toString(); String attempt = UUID.randomUUID().toString(); return new OutputAttemptContext(tx, attempt, id, new Counter()); }
/** * Creates output context from execution ID and datasource ID. * @param executionId current execution ID * @param datasourceId target datasource ID * @return output context * @throws IllegalArgumentException if some parameters were {@code null} */ public static OutputTransactionContext createContext(String executionId, String datasourceId) { if (executionId == null) { throw new IllegalArgumentException("executionId must not be null"); //$NON-NLS-1$ } if (datasourceId == null) { throw new IllegalArgumentException("datasourceId must not be null"); //$NON-NLS-1$ } return new OutputTransactionContext(executionId, datasourceId, new Counter()); }
/** * Truncates the current target. * @param resourcePattern the target resource pattern * @throws IOException if failed to perform by I/O error * @since 0.7.3 */ public void truncate(String resourcePattern) throws IOException { if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format( "Truncating Direct I/O resources: {0}:{1} (id={2})", //$NON-NLS-1$ fullPath, resourcePattern, id)); } try { dataSource.delete(basePath, FilePattern.compile(resourcePattern), true, new Counter()); } catch (InterruptedException e) { throw (IOException) new InterruptedIOException("interrupted").initCause(e); //$NON-NLS-1$ } }
private static <T> DataModelSourceFactory load0( DataModelDefinition<T> definition, HadoopFileFormat<? super T> format, File source) throws IOException, InterruptedException { try (ModelInput<? super T> input = format.createInput( definition.getModelClass(), org.apache.hadoop.fs.FileSystem.getLocal(format.getConf()), new org.apache.hadoop.fs.Path(source.toURI()), new Counter())) { return collect(definition, input); } }
DirectOutputSink( DataModelDefinition<T> definition, HadoopFileFormat<? super T> format, org.apache.hadoop.fs.Path destination) throws IOException, InterruptedException { this.definition = definition; this.output = format.createOutput( definition.getModelClass(), org.apache.hadoop.fs.FileSystem.getLocal(format.getConf()), destination, new Counter()); }
private <T> ModelInput<T> open0( Class<? extends DataFormat<T>> formatClass, String originalBasePath, ResourcePattern resourcePattern) throws IOException, InterruptedException { DirectDataSourceRepository repo = prepareRepository(); String basePath = repo.getComponentPath(originalBasePath); DirectDataSource source = repo.getRelatedDataSource(originalBasePath); DataDefinition<T> definition = createDataDefinition(formatClass); List<DirectInputFragment> fragments = source.findInputFragments(definition, basePath, resourcePattern); return new DirectInputFragmentInput<>(source, definition, fragments.iterator(), new Counter()); }
private <T> RecordReader<NullWritable, Object> createRecordReader( DataDefinition<T> definition, BridgeInputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { assert definition != null; assert split != null; assert context != null; Configuration conf = context.getConfiguration(); T buffer = ReflectionUtils.newInstance(definition.getDataClass(), conf); Counter counter = new Counter(); ModelInput<T> input = createInput(context, split.group.containerPath, definition, counter, split.fragment); return new BridgeRecordReader<>(input, buffer, counter, split.fragment.getSize()); }
private static Counter createCounter(JobContext context) { assert context != null; if (context instanceof Progressable) { return new ProgressableCounter((Progressable) context); } else if (context instanceof org.apache.hadoop.mapred.JobContext) { return new ProgressableCounter(((org.apache.hadoop.mapred.JobContext) context).getProgressible()); } else { return new Counter(); } }
/** * Initializes the test. * @throws Exception if some errors were occurred */ @Before public void setUp() throws Exception { conf = new Configuration(true); if (definition.getDataFormat() instanceof Configurable) { ((Configurable) definition.getDataFormat()).setConf(conf); } mapping = new File(temp.getRoot(), "mapping").getCanonicalFile(); temporary = new File(temp.getRoot(), "temporary").getCanonicalFile(); localtemp = new File(temp.getRoot(), "localtemp").getCanonicalFile(); profile = new HadoopDataSourceProfile( conf, "testing", "testing", new Path(mapping.toURI()), new Path(temporary.toURI())); context = new OutputAttemptContext("tx", "atmpt", profile.getId(), new Counter()); }
/** * move multiple files. * @throws Exception if failed */ @Test public void move_threads() throws Exception { List<String> paths = new ArrayList<>(); List<String> expects = new ArrayList<>(); for (int i = 0; i < 1000; i++) { paths.add(String.format("src/%04d.csv", i)); expects.add(String.format("dst/%04d.csv", i)); } for (String s : paths) { touch(s); } FileSystem fs = getTempFileSystem(); HadoopDataSourceUtil.move(new Counter(), fs, getPath("src"), getPath("dst"), 4); assertThat(collect(), is(path(expects.toArray(new String[expects.size()])))); }
/** * Test for input invalid file. * @throws Exception if failed */ @Test(expected = IOException.class) public void input_invalid() throws Exception { LocalFileSystem fs = FileSystem.getLocal(conf); Path path = new Path(folder.newFile("testing").toURI()); try (FSDataOutputStream output = fs.create(path)) { output.writeUTF("Hello, world!"); } try (ModelInput<StringOption> in = format.createInput( StringOption.class, fs, path, 0, fs.getFileStatus(path).getLen(), new Counter())) { // do nothing } }
/** * Test for input empty file. * @throws Exception if failed */ @Test public void input_empty() throws Exception { LocalFileSystem fs = FileSystem.getLocal(conf); Path path = new Path(folder.newFile("testing").toURI()); fs.create(path).close(); try (ModelInput<StringOption> in = format.createInput( StringOption.class, fs, path, 0, fs.getFileStatus(path).getLen(), new Counter())) { assertThat("eof", in.readTo(new StringOption()), is(false)); } }
/** * invalid compressed output. * @throws Exception if failed */ @Test public void output_compressed_invalid() throws Exception { LocalFileSystem fs = FileSystem.getLocal(conf); Path path = new Path(folder.newFile("testing").toURI()); format.getConf().set(SequenceFileFormat.KEY_COMPRESSION_CODEC, "__INVALID__"); try (ModelOutput<StringOption> out = format.createOutput(StringOption.class, fs, path, new Counter())) { out.write(new StringOption("Hello, world!")); } try (SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(fs.makeQualified(path)))) { assertThat(reader.getCompressionCodec(), is(nullValue())); } }
/** * compressed output. * @throws Exception if failed */ @Test public void output_compressed_conf() throws Exception { LocalFileSystem fs = FileSystem.getLocal(conf); Path path = new Path(folder.newFile("testing").toURI()); format.getConf().set(SequenceFileFormat.KEY_COMPRESSION_CODEC, DefaultCodec.class.getName()); try (ModelOutput<StringOption> out = format.createOutput(StringOption.class, fs, path, new Counter())) { out.write(new StringOption("Hello, world!")); } try (SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(fs.makeQualified(path)))) { assertThat(reader.getCompressionCodec(), instanceOf(DefaultCodec.class)); } }
/** * compressed output. * @throws Exception if failed */ @Test public void output_no_compressed() throws Exception { LocalFileSystem fs = FileSystem.getLocal(conf); Path path = new Path(folder.newFile("testing.gz").toURI()); try (ModelOutput<StringOption> out = format.codec(null) .createOutput(StringOption.class, fs, path, new Counter())) { out.write(new StringOption("Hello, world!")); } try (SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(fs.makeQualified(path)))) { assertThat(reader.getCompressionCodec(), is(nullValue())); } }
/** * compressed output. * @throws Exception if failed */ @Test public void output_compressed() throws Exception { LocalFileSystem fs = FileSystem.getLocal(conf); Path path = new Path(folder.newFile("testing").toURI()); try (ModelOutput<StringOption> out = format.codec(new DefaultCodec()) .createOutput(StringOption.class, fs, path, new Counter())) { out.write(new StringOption("Hello, world!")); } try (SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(fs.makeQualified(path)))) { assertThat(reader.getCompressionCodec(), instanceOf(DefaultCodec.class)); } }
/** * move files simply. * @throws Exception if failed */ @Test public void move_simple() throws Exception { touch("src/a.csv"); FileSystem fs = getTempFileSystem(); HadoopDataSourceUtil.move(new Counter(), fs, getPath("src"), getPath("dst")); assertThat(collect(), is(path("dst/a.csv"))); }
/** * move multiple files. * @throws Exception if failed */ @Test public void move_merge() throws Exception { touch("src/a.csv"); touch("src/b.csv"); touch("dst/c.csv"); FileSystem fs = getTempFileSystem(); HadoopDataSourceUtil.move(new Counter(), fs, getPath("src"), getPath("dst")); assertThat(collect(), is(path("dst/a.csv", "dst/b.csv", "dst/c.csv"))); }
/** * move multiple files. * @throws Exception if failed */ @Test public void move_multiple() throws Exception { touch("src/a.csv"); touch("src/b.csv"); touch("src/c.csv"); FileSystem fs = getTempFileSystem(); HadoopDataSourceUtil.move(new Counter(), fs, getPath("src"), getPath("dst")); assertThat(collect(), is(path("dst/a.csv", "dst/b.csv", "dst/c.csv"))); }
/** * move deep files. * @throws Exception if failed */ @Test public void move_deep() throws Exception { touch("src/a.csv"); touch("src/a/b.csv"); touch("src/a/b/c.csv"); FileSystem fs = getTempFileSystem(); HadoopDataSourceUtil.move(new Counter(), fs, getPath("src"), getPath("dst")); assertThat(collect(), is(path("dst/a.csv", "dst/a/b.csv", "dst/a/b/c.csv"))); }