private void runTestWrite(String[] elems, String... base64) throws IOException { File tmpFile = Files.createTempFile(tempFolder.getRoot().toPath(), "file", ".tfrecords").toFile(); String filename = tmpFile.getPath(); PCollection<byte[]> input = writePipeline .apply(Create.of(Arrays.asList(elems))) .apply(ParDo.of(new StringToByteArray())); TFRecordIO.Write write = TFRecordIO.write().to(filename).withoutSharding(); input.apply(write); writePipeline.run(); FileInputStream fis = new FileInputStream(tmpFile); String written = BaseEncoding.base64().encode(ByteStreams.toByteArray(fis)); // bytes written may vary depending the order of elems assertThat(written, isIn(base64)); }
"Write via TFRecordIO.write", TFRecordIO.write() .to(baseFilenameViaWrite) .withNumShards(numShards) .withSuffix(suffix)
@Test public void testWriteDisplayData() { TFRecordIO.Write write = TFRecordIO.write() .to("/foo") .withSuffix("bar") .withShardNameTemplate("-SS-of-NN-") .withNumShards(100) .withCompression(GZIP); DisplayData displayData = DisplayData.from(write); assertThat(displayData, hasDisplayItem("filePrefix", "/foo")); assertThat(displayData, hasDisplayItem("fileSuffix", "bar")); assertThat(displayData, hasDisplayItem("shardNameTemplate", "-SS-of-NN-")); assertThat(displayData, hasDisplayItem("numShards", 100)); assertThat(displayData, hasDisplayItem("compressionType", GZIP.toString())); }
/** * Writes TFRecord file(s) with the given output prefix. The {@code prefix} will be used as a to * generate a {@link ResourceId} using any supported {@link FileSystem}. * * <p>In addition to their prefix, created files will have a shard identifier (see {@link * #withNumShards(int)}), and end in a common suffix, if given by {@link #withSuffix(String)}. * * <p>For more information on filenames, see {@link DefaultFilenamePolicy}. */ public Write to(String outputPrefix) { return to(FileBasedSink.convertToFileResourceIfPossible(outputPrefix)); }