/** * Creates a {@code Source<T>} instance from the Avro file(s) at the given path name. * * @param pathName The name of the path to the data on the filesystem * @param ptype The {@code AvroType} for the Avro records * @return A new {@code Source<T>} instance */ public static <T> Source<T> avroFile(String pathName, PType<T> ptype) { return avroFile(new Path(pathName), ptype); }
/** * Creates a {@code Source<T>} instance from the SequenceFile(s) at the given path name * from the value field of each key-value pair in the SequenceFile(s). * * @param pathName The name of the path to the data on the filesystem * @param valueClass The {@code Writable} type for the value of the SequenceFile entry * @return A new {@code Source<T>} instance */ public static <T extends Writable> Source<T> sequenceFile(String pathName, Class<T> valueClass) { return sequenceFile(new Path(pathName), valueClass); }
/** * Creates a {@code Source<T>} instance for the text file(s) at the given path name using * the provided {@code PType<T>} to convert the input text. * * @param pathName The name of the path to the data on the filesystem * @param ptype The {@code PType<T>} to use to process the input text * @return A new {@code Source<T>} instance */ public static <T> Source<T> textFile(String pathName, PType<T> ptype) { return textFile(new Path(pathName), ptype); }
/** * Creates a {@code Source<GenericData.Record>} by reading the schema of the Avro file * at the given path using the {@code FileSystem} information contained in the given * {@code Configuration} instance. If the path is a directory, the schema of a file in * the directory will be used to determine the schema to use. * * @param path The path to the data on the filesystem * @param conf The configuration information * @return A new {@code Source<GenericData.Record>} instance */ public static Source<GenericData.Record> avroFile(Path path, Configuration conf) { return avroFile(path, Avros.generics(getSchemaFromPath(path, conf))); }
return pipeline.read(From.avroFile(filePaths, pipeline.getConfiguration())); } else { return pipeline.read(From.avroFile(filePaths, Avros.generics(opts.inputFileReaderSchema))); return pipeline.read(source); } else if (opts.inputFileFormat.isAssignableFrom(TextInputFormat.class)) { Source source = From.textFile(filePaths); return pipeline.read(source); } else {
public static <K, V> TableSource<K, V> formattedFile(String path, Class<? extends FileInputFormat> formatClass, PType<K> keyType, PType<V> valueType) { return formattedFile(new Path(path), formatClass, keyType, valueType); }
/** * Creates a {@code TableSource<K,V>} for reading an Avro key/value file at the given path. * * @param path The path to the data on the filesystem * @param tableType Avro table type for deserializing the table data * @return a new {@code TableSource<K,V>} instance for reading Avro key/value data */ public static <K, V> TableSource<K, V> avroTableFile(Path path, PTableType<K, V> tableType) { return avroTableFile(ImmutableList.of(path), tableType); }
public static TableSource<ImmutableBytesWritable, Result> hbaseTable(String table) { return hbaseTable(table, new Scan()); }
/** * Creates a {@code SourceTarget<GenericData.Record>} by reading the schema of the Avro file * at the given path using the {@code FileSystem} information contained in the given * {@code Configuration} instance. If the path is a directory, the schema of a file in * the directory will be used to determine the schema to use. * * @param path The path to the data on the filesystem * @param conf The configuration information * @return A new {@code SourceTarget<GenericData.Record>} instance */ public static SourceTarget<GenericData.Record> avroFile(Path path, Configuration conf) { return avroFile(path, Avros.generics(From.getSchemaFromPath(path, conf))); }
/** * Creates a {@code Source<GenericData.Record>} by reading the schema of the Avro file * at the given paths using the {@code FileSystem} information contained in the given * {@code Configuration} instance. If the first path is a directory, the schema of a file in * the directory will be used to determine the schema to use. * * @param paths The path to the data on the filesystem * @param conf The configuration information * @return A new {@code Source<GenericData.Record>} instance */ public static Source<GenericData.Record> avroFile(List<Path> paths, Configuration conf) { Preconditions.checkArgument(!paths.isEmpty(), "At least one path must be supplied"); return avroFile(paths, Avros.generics(getSchemaFromPath(paths.get(0), conf))); }
/** * Creates a {@code TableSource<K, V>} for reading data from files that have custom * {@code FileInputFormat<K, V>} implementations not covered by the provided {@code TableSource} * and {@code Source} factory methods. * * @param pathName The name of the path to the data on the filesystem * @param formatClass The {@code FileInputFormat} implementation * @param keyClass The {@code Writable} to use for the key * @param valueClass The {@code Writable} to use for the value * @return A new {@code TableSource<K, V>} instance */ public static <K extends Writable, V extends Writable> TableSource<K, V> formattedFile( String pathName, Class<? extends FileInputFormat<K, V>> formatClass, Class<K> keyClass, Class<V> valueClass) { return formattedFile(new Path(pathName), formatClass, keyClass, valueClass); }
/** * Creates a {@code TableSource<K, V>} instance for the SequenceFile(s) at the given path name. * * @param pathName The name of the path to the data on the filesystem * @param keyClass The {@code Writable} subclass for the key of the SequenceFile entry * @param valueClass The {@code Writable} subclass for the value of the SequenceFile entry * @return A new {@code SourceTable<K, V>} instance */ public static <K extends Writable, V extends Writable> TableSource<K, V> sequenceFile( String pathName, Class<K> keyClass, Class<V> valueClass) { return sequenceFile(new Path(pathName), keyClass, valueClass); }
/** * Creates a {@code Source<String>} instance for the text file(s) at the given path name. * * @param pathName The name of the path to the data on the filesystem * @return A new {@code Source<String>} instance */ public static Source<String> textFile(String pathName) { return textFile(new Path(pathName)); }
/** * Creates a {@code Source<GenericData.Record>} by reading the schema of the Avro file * at the given path. If the path is a directory, the schema of a file in the directory * will be used to determine the schema to use. * * @param path The path to the data on the filesystem * @return A new {@code Source<GenericData.Record>} instance */ public static Source<GenericData.Record> avroFile(Path path) { return avroFile(path, new Configuration()); }
/** * Creates a {@code TableSource<K, V>} for reading data from files that have custom * {@code FileInputFormat} implementations not covered by the provided {@code TableSource} * and {@code Source} factory methods. * * @param pathName The name of the path to the data on the filesystem * @param formatClass The {@code FileInputFormat} implementation * @param keyType The {@code PType} to use for the key * @param valueType The {@code PType} to use for the value * @return A new {@code TableSource<K, V>} instance */ public static <K, V> TableSource<K, V> formattedFile(String pathName, Class<? extends FileInputFormat<?, ?>> formatClass, PType<K> keyType, PType<V> valueType) { return formattedFile(new Path(pathName), formatClass, keyType, valueType); }
/** * Creates a {@code Source<T>} instance from the SequenceFile(s) at the given path name * from the value field of each key-value pair in the SequenceFile(s). * * @param pathName The name of the path to the data on the filesystem * @param ptype The {@code PType} for the value of the SequenceFile entry * @return A new {@code Source<T>} instance */ public static <T> Source<T> sequenceFile(String pathName, PType<T> ptype) { return sequenceFile(new Path(pathName), ptype); }
/** * Creates a {@code Source<T>} instance for the text file(s) at the given path name using * the provided {@code PType<T>} to convert the input text. * * @param pathName The name of the path to the data on the filesystem * @param ptype The {@code PType<T>} to use to process the input text * @return A new {@code Source<T>} instance */ public static <T> Source<T> textFile(String pathName, PType<T> ptype) { return textFile(new Path(pathName), ptype); }
/** * Creates a {@code Source<GenericData.Record>} by reading the schema of the Avro file * at the given paths. If the path is a directory, the schema of a file in the directory * will be used to determine the schema to use. * * @param paths A list of paths to the data on the filesystem * @return A new {@code Source<GenericData.Record>} instance */ public static Source<GenericData.Record> avroFile(List<Path> paths) { return avroFile(paths, new Configuration()); }
/** * Creates a {@code TableSource<K, V>} for reading data from files that have custom * {@code FileInputFormat<K, V>} implementations not covered by the provided {@code TableSource} * and {@code Source} factory methods. * * @param pathName The name of the path to the data on the filesystem * @param formatClass The {@code FileInputFormat} implementation * @param keyClass The {@code Writable} to use for the key * @param valueClass The {@code Writable} to use for the value * @return A new {@code TableSource<K, V>} instance */ public static <K extends Writable, V extends Writable> TableSource<K, V> formattedFile( String pathName, Class<? extends FileInputFormat<K, V>> formatClass, Class<K> keyClass, Class<V> valueClass) { return formattedFile(new Path(pathName), formatClass, keyClass, valueClass); }
/** * Creates a {@code TableSource<K, V>} instance for the SequenceFile(s) at the given path name. * * @param pathName The name of the path to the data on the filesystem * @param keyClass The {@code Writable} subclass for the key of the SequenceFile entry * @param valueClass The {@code Writable} subclass for the value of the SequenceFile entry * @return A new {@code SourceTable<K, V>} instance */ public static <K extends Writable, V extends Writable> TableSource<K, V> sequenceFile( String pathName, Class<K> keyClass, Class<V> valueClass) { return sequenceFile(new Path(pathName), keyClass, valueClass); }