/** * Writes to Avro file(s) with the specified metadata. * * <p>Supported value types are String, Long, and byte[]. */ public TypedWrite<UserT, DestinationT, OutputT> withMetadata(Map<String, Object> metadata) { Map<String, String> badKeys = Maps.newLinkedHashMap(); for (Map.Entry<String, Object> entry : metadata.entrySet()) { Object v = entry.getValue(); if (!(v instanceof String || v instanceof Long || v instanceof byte[])) { badKeys.put(entry.getKey(), v.getClass().getSimpleName()); } } checkArgument( badKeys.isEmpty(), "Metadata value type must be one of String, Long, or byte[]. Found {}", badKeys); return toBuilder().setMetadata(ImmutableMap.copyOf(metadata)).build(); }
/** * Specifies a format function to convert {@link UserT} to the output type. If {@link * #to(DynamicAvroDestinations)} is used, {@link DynamicAvroDestinations#formatRecord} must be * used instead. */ public TypedWrite<UserT, DestinationT, OutputT> withFormatFunction( @Nullable SerializableFunction<UserT, OutputT> formatFunction) { return toBuilder().setFormatFunction(formatFunction).build(); }
/** * Use a {@link DynamicAvroDestinations} object to vend {@link FilenamePolicy} objects. These * objects can examine the input record when creating a {@link FilenamePolicy}. A directory for * temporary files must be specified using {@link #withTempDirectory}. * * @deprecated Use {@link FileIO#write()} or {@link FileIO#writeDynamic()} instead. */ @Experimental(Kind.FILESYSTEM) @Deprecated public <NewDestinationT> TypedWrite<UserT, NewDestinationT, OutputT> to( DynamicAvroDestinations<UserT, NewDestinationT, OutputT> dynamicDestinations) { return toBuilder() .setDynamicDestinations((DynamicAvroDestinations) dynamicDestinations) .build(); }
/** * A {@link PTransform} that writes a {@link PCollection} to an avro file (or multiple avro files * matching a sharding pattern), with each element of the input collection encoded into its own * record of type OutputT. * * <p>This version allows you to apply {@link AvroIO} writes to a PCollection of a custom type * {@link UserT}. A format mechanism that converts the input type {@link UserT} to the output type * that will be written to the file must be specified. If using a custom {@link * DynamicAvroDestinations} object this is done using {@link * DynamicAvroDestinations#formatRecord}, otherwise the {@link * AvroIO.TypedWrite#withFormatFunction} can be used to specify a format function. * * <p>The advantage of using a custom type is that is it allows a user-provided {@link * DynamicAvroDestinations} object, set via {@link AvroIO.Write#to(DynamicAvroDestinations)} to * examine the custom type when choosing a destination. * * <p>If the output type is {@link GenericRecord} use {@link #writeCustomTypeToGenericRecords()} * instead. */ public static <UserT, OutputT> TypedWrite<UserT, Void, OutputT> writeCustomType() { return AvroIO.<UserT, OutputT>defaultWriteBuilder().setGenericRecords(false).build(); }
/** * Configures the number of output shards produced overall (when using unwindowed writes) or * per-window (when using windowed writes). * * <p>For unwindowed writes, constraining the number of shards is likely to reduce the * performance of a pipeline. Setting this value is not recommended unless you require a * specific number of output files. * * @param numShards the number of shards to use, or 0 to let the system decide. */ public TypedWrite<UserT, DestinationT, OutputT> withNumShards(int numShards) { checkArgument(numShards >= 0); return toBuilder().setNumShards(numShards).build(); }
/** Writes to Avro file(s) compressed using specified codec. */ public TypedWrite<UserT, DestinationT, OutputT> withCodec(CodecFactory codec) { return toBuilder().setCodec(new SerializableAvroCodecFactory(codec)).build(); }
/** * Writes to files named according to the given {@link FileBasedSink.FilenamePolicy}. A * directory for temporary files must be specified using {@link #withTempDirectory}. */ @Experimental(Kind.FILESYSTEM) public TypedWrite<UserT, DestinationT, OutputT> to(FilenamePolicy filenamePolicy) { return toBuilder().setFilenamePolicy(filenamePolicy).build(); }
/** Like {@link #to(ResourceId)}. */ @Experimental(Kind.FILESYSTEM) public TypedWrite<UserT, DestinationT, OutputT> toResource( ValueProvider<ResourceId> outputPrefix) { return toBuilder().setFilenamePrefix(outputPrefix).build(); }
/** Writes Avro records of the specified schema. */ public static Write<GenericRecord> writeGenericRecords(Schema schema) { return new Write<>( AvroIO.<GenericRecord, GenericRecord>defaultWriteBuilder() .setGenericRecords(true) .setSchema(schema) .build()); }
/** * Uses the given {@link ShardNameTemplate} for naming output files. This option may only be * used when using one of the default filename-prefix to() overrides. * * <p>See {@link DefaultFilenamePolicy} for how the prefix, shard name template, and suffix are * used. */ public TypedWrite<UserT, DestinationT, OutputT> withShardNameTemplate(String shardTemplate) { return toBuilder().setShardTemplate(shardTemplate).build(); }
/** * Writes a {@link PCollection} to an Avro file (or multiple Avro files matching a sharding * pattern). */ public static <T> Write<T> write(Class<T> recordClass) { return new Write<>( AvroIO.<T, T>defaultWriteBuilder() .setGenericRecords(false) .setSchema(ReflectData.get().getSchema(recordClass)) .build()); }
/** * Configures the filename suffix for written files. This option may only be used when using one * of the default filename-prefix to() overrides. * * <p>See {@link DefaultFilenamePolicy} for how the prefix, shard name template, and suffix are * used. */ public TypedWrite<UserT, DestinationT, OutputT> withSuffix(String filenameSuffix) { return toBuilder().setFilenameSuffix(filenameSuffix).build(); }
/** * Similar to {@link #writeCustomType()}, but specialized for the case where the output type is * {@link GenericRecord}. A schema must be specified either in {@link * DynamicAvroDestinations#getSchema} or if not using dynamic destinations, by using {@link * TypedWrite#withSchema(Schema)}. */ public static <UserT> TypedWrite<UserT, Void, GenericRecord> writeCustomTypeToGenericRecords() { return AvroIO.<UserT, GenericRecord>defaultWriteBuilder().setGenericRecords(true).build(); }
/** * Preserves windowing of input elements and writes them to files based on the element's window. * * <p>If using {@link #to(FileBasedSink.FilenamePolicy)}. Filenames will be generated using * {@link FilenamePolicy#windowedFilename}. See also {@link WriteFiles#withWindowedWrites()}. */ public TypedWrite<UserT, DestinationT, OutputT> withWindowedWrites() { return toBuilder().setWindowedWrites(true).build(); }
/** Set the base directory used to generate temporary files. */ @Experimental(Kind.FILESYSTEM) public TypedWrite<UserT, DestinationT, OutputT> withTempDirectory( ValueProvider<ResourceId> tempDirectory) { return toBuilder().setTempDirectory(tempDirectory).build(); }
/** * Sets the the output schema. Can only be used when the output type is {@link GenericRecord} * and when not using {@link #to(DynamicAvroDestinations)}. */ public TypedWrite<UserT, DestinationT, OutputT> withSchema(Schema schema) { return toBuilder().setSchema(schema).build(); }