@Override public PDone expand(PCollection<T> input) { checkArgument(getRecordClass() != null, "withRecordClass() is required"); checkArgument(getRootElement() != null, "withRootElement() is required"); checkArgument(getFilenamePrefix() != null, "to() is required"); checkArgument(getCharset() != null, "withCharset() is required"); try { JAXBContext.newInstance(getRecordClass()); } catch (JAXBException e) { throw new RuntimeException("Error binding classes to a JAXB Context.", e); } ResourceId prefix = FileSystems.matchNewResource(getFilenamePrefix(), false /* isDirectory */); input.apply( FileIO.<T>write() .via( sink(getRecordClass()) .withCharset(Charset.forName(getCharset())) .withRootElement(getRootElement())) .to(prefix.getCurrentDirectory().toString()) .withPrefix(prefix.getFilename()) .withSuffix(".xml") .withIgnoreWindowing()); return PDone.in(input.getPipeline()); }
"Filenames should be resolved within a relative directory if '.to' is invoked", expected, getDefaultFileName(FileIO.write().to("test").withNaming(foo))); + "is not invoked", "foo", getDefaultFileName(FileIO.write().withNaming(foo))); resolveFileNaming(FileIO.write()) .getFilename( GlobalWindow.INSTANCE, "Default Naming should take prefix and suffix into account if provided", "foo-00000-of-00000.bar", resolveFileNaming(FileIO.write().withPrefix("foo").withSuffix(".bar")) .getFilename( GlobalWindow.INSTANCE, .resolve("output-00000-of-00000", RESOLVE_FILE) .toString(), resolveFileNaming(FileIO.write().to("test")) .getFilename( GlobalWindow.INSTANCE,
pc1 = UnboundedWrite.ofDefaultWindow(pc1); WriteFilesResult results = pc1.apply(FileIO.<String> write().withNumShards(1).via(TextIO.sink()).to(path)); return PDone.in(results.getPipeline());
pc1 = UnboundedWrite.ofDefaultWindow(pc1); WriteFilesResult results = pc1.apply(FileIO.<String> write().withNumShards(1).via(TextIO.sink()).to(path)); return PDone.in(results.getPipeline());
FileIO.<GenericClass>write() .via(AvroIO.sink(GenericClass.class)) .to(baseDir.toString())
public static void main(String[] args) { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); Pipeline pipeline = Pipeline.create(options); pipeline .apply("Generate sequence", GenerateSequence.from(0).to(10)) .apply("Produce text lines", ParDo.of(new DeterministicallyConstructTestTextLineFn())) .apply("Produce Avro records", ParDo.of(new DeterministicallyConstructAvroRecordsFn())) .setCoder(AvroCoder.of(SCHEMA)) .apply( "Write Parquet files", FileIO.<GenericRecord>write().via(ParquetIO.sink(SCHEMA)).to(options.getOutput())); pipeline.run(); }
@Test @Category(NeedsRunner.class) public void testWriteThenReadLarger() { List<Bird> birds = Lists.newArrayList(); for (int i = 0; i < 100; ++i) { birds.add(new Bird("Testing", "Bird number " + i)); } mainPipeline .apply(Create.of(birds)) .apply( FileIO.<Bird>write() .via(XmlIO.sink(Bird.class).withRootElement("birds")) .to(tmpFolder.getRoot().getAbsolutePath()) .withPrefix("birds") .withSuffix(".xml") .withNumShards(1)); mainPipeline.run(); PCollection<Bird> readBack = readPipeline.apply( XmlIO.<Bird>read() .from(new File(tmpFolder.getRoot(), "birds").getAbsolutePath() + "*") .withRecordClass(Bird.class) .withRootElement("birds") .withRecordElement("bird") .withMinBundleSize(100)); PAssert.that(readBack).containsInAnyOrder(birds); readPipeline.run(); }
@Test @Category(NeedsRunner.class) public void testWriteViaSink() throws Exception { List<String> data = ImmutableList.of("a", "b", "c", "d", "e", "f"); PAssert.that( p.apply(Create.of(data)) .apply( FileIO.<String>write() .to(tempFolder.getRoot().toString()) .withSuffix(".txt") .via(TextIO.sink()) .withIgnoreWindowing()) .getPerDestinationOutputFilenames() .apply(Values.create()) .apply(TextIO.readAll())) .containsInAnyOrder(data); p.run(); }
FileIO.<byte[]>write() .via(TFRecordIO.sink()) .to(baseDir.toString())
.apply(Create.of(birds)) .apply( FileIO.<Bird>write() .via(XmlIO.sink(Bird.class).withRootElement("birds").withCharset(charset)) .to(tmpFolder.getRoot().getAbsolutePath())