org.apache.avro.mapred.AvroJob.setMapperClass java code examples

                      Schema.create(Schema.Type.BYTES)));
AvroJob.setMapperClass(conf, mapperClass);
conf.setReducerClass(AvroStoreBuilderReducer.class);

@SuppressWarnings("deprecation")
public void testJobNoreducer() throws Exception {
 JobConf job = new JobConf();
 job.setNumReduceTasks(0);
 Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath());
 outputPath.getFileSystem(job).delete(outputPath);
 WordCountUtil.writeLinesFile(new File(INPUT_DIR.getRoot(),"lines.avro"));
 job.setJobName("AvroMultipleOutputs_noreducer");
 AvroJob.setInputSchema(job, Schema.create(Schema.Type.STRING));
 AvroJob.setOutputSchema(job, new Pair<Utf8, Long>(new Utf8(""), 0L).getSchema());
 AvroJob.setMapperClass(job, MapImpl.class);
 FileInputFormat.setInputPaths(job, new Path(INPUT_DIR.getRoot().toString()));
 FileOutputFormat.setOutputPath(job, outputPath);
 FileOutputFormat.setCompressOutput(job, false);
 AvroMultipleOutputs.addNamedOutput(job, "myavro2", AvroOutputFormat.class, Schema.create(Schema.Type.STRING));
 JobClient.runJob(job);
}

public void testOutputFormat() throws Exception {
 JobConf job = new JobConf();
 WordCountUtil wordCountUtil = new WordCountUtil("trevniMapredTest");
 wordCountUtil.writeLinesFile();
 AvroJob.setInputSchema(job, STRING);
 AvroJob.setOutputSchema(job, Pair.getPairSchema(STRING,LONG));
 AvroJob.setMapperClass(job, MapImpl.class);
 AvroJob.setCombinerClass(job, ReduceImpl.class);
 AvroJob.setReducerClass(job, ReduceImpl.class);
 FileInputFormat.setInputPaths(job, new Path(wordCountUtil.getDir().toString() + "/in"));
 FileOutputFormat.setOutputPath(job, new Path(wordCountUtil.getDir().toString() + "/out"));
 FileOutputFormat.setCompressOutput(job, true);
 job.setOutputFormat(AvroTrevniOutputFormat.class);
 JobClient.runJob(job);
 wordCountUtil.validateCountsFile();
}

public void testInputFormat() throws Exception {
 JobConf job = new JobConf();
 WordCountUtil wordCountUtil = new WordCountUtil("trevniMapredTest");
 Schema subSchema = Schema.parse("{\"type\":\"record\"," +
                 "\"name\":\"PairValue\","+
                 "\"fields\": [ " +
                 "{\"name\":\"value\", \"type\":\"long\"}" +
                 "]}");
 AvroJob.setInputSchema(job, subSchema);
 AvroJob.setMapperClass(job, Counter.class);
 FileInputFormat.setInputPaths(job, new Path(wordCountUtil.getDir().toString() + "/out/*"));
 job.setInputFormat(AvroTrevniInputFormat.class);
 job.setNumReduceTasks(0);                     // map-only
 job.setOutputFormat(NullOutputFormat.class);  // ignore output
 total = 0;
 JobClient.runJob(job);
 assertEquals(WordCountUtil.TOTAL, total);
}

@SuppressWarnings("deprecation")
public void testJob(String pathOut) throws Exception {
 JobConf job = new JobConf();
 String pathIn = INPUT_DIR.getRoot().getPath();
 WordCountUtil.writeLinesFile(pathIn + "/lines.avro");
 Path outputPath = new Path(pathOut);
 outputPath.getFileSystem(job).delete(outputPath);
 job.setJobName("wordcount");
 AvroJob.setInputSchema(job, Schema.create(Schema.Type.STRING));
 AvroJob.setOutputSchema(job, new Pair<Utf8, Long>(new Utf8(""), 0L).getSchema());
 AvroJob.setMapperClass(job, MapImpl.class);
 AvroJob.setCombinerClass(job, ReduceImpl.class);
 AvroJob.setReducerClass(job, ReduceImpl.class);
 FileInputFormat.setInputPaths(job, new Path(pathIn));
 FileOutputFormat.setOutputPath(job, new Path(pathOut));
 FileOutputFormat.setCompressOutput(job, true);
 WordCountUtil.setMeta(job);
 JobClient.runJob(job);
 WordCountUtil.validateCountsFile(new File(pathOut, "part-00000.avro"));
}

@Test
@SuppressWarnings("deprecation")
public void testJob() throws Exception {
 JobConf job = new JobConf();
 String dir = "target/testReflectJob";
 Path inputPath = new Path(dir + "/in");
 Path outputPath = new Path(dir + "/out");
 outputPath.getFileSystem(job).delete(outputPath);
 inputPath.getFileSystem(job).delete(inputPath);
 writeLinesFile(new File(dir+"/in"));
 job.setJobName("reflect");
 AvroJob.setInputSchema(job, ReflectData.get().getSchema(Text.class));
 AvroJob.setMapOutputSchema
  (job, new Pair(new Text(""), new Count(0L)).getSchema());
 AvroJob.setOutputSchema(job, ReflectData.get().getSchema(WordCount.class));
 AvroJob.setMapperClass(job, MapImpl.class);
 //AvroJob.setCombinerClass(job, ReduceImpl.class);
 AvroJob.setReducerClass(job, ReduceImpl.class);
 FileInputFormat.setInputPaths(job, inputPath);
 FileOutputFormat.setOutputPath(job, outputPath);
 AvroJob.setReflect(job); // use reflection
 JobClient.runJob(job);
 validateCountsFile(new File(new File(dir, "out"), "part-00000.avro"));
}

AvroJob.setOutputSchema(job, Weather.SCHEMA$);
AvroJob.setMapperClass(job, SortMapper.class);
AvroJob.setReducerClass(job, SortReducer.class);

@SuppressWarnings("deprecation")
public void testJob(String pathOut) throws Exception {
 JobConf job = new JobConf();
 String pathIn = INPUT_DIR.getRoot().getPath();
 File fileIn = new File(pathIn, "lines.avro");
 Path outputPath = new Path(pathOut);
 outputPath.getFileSystem(job).delete(outputPath);
 WordCountUtil.writeLinesFile(fileIn);
 job.setJobName("AvroMultipleOutputs");
 AvroJob.setInputSchema(job, Schema.create(Schema.Type.STRING));
 AvroJob.setOutputSchema(job,
     new Pair<Utf8, Long>(new Utf8(""), 0L).getSchema());
 AvroJob.setMapperClass(job, MapImpl.class);
 AvroJob.setReducerClass(job, ReduceImpl.class);
 FileInputFormat.setInputPaths(job, pathIn);
 FileOutputFormat.setOutputPath(job, outputPath);
 FileOutputFormat.setCompressOutput(job, false);
 AvroMultipleOutputs.addNamedOutput(job, "myavro", AvroOutputFormat.class, new Pair<Utf8, Long>(new Utf8(""), 0L).getSchema());
 AvroMultipleOutputs.addNamedOutput(job, "myavro1", AvroOutputFormat.class, Schema.create(Schema.Type.STRING));
 AvroMultipleOutputs.addNamedOutput(job, "myavro2", AvroOutputFormat.class, Schema.create(Schema.Type.STRING));
 WordCountUtil.setMeta(job);
 JobClient.runJob(job);
 WordCountUtil.validateCountsFile(new File(outputPath.toString(), "/part-00000.avro"));
}

/**
 * Creates a JobConf for a map-only job. Automatically loads the schema from each input file.
 * 
 * @param mapperClass AvroMapper subclass implementing the map phase
 * @param outputSchema Schema of the mapper output
 * @return A configured JobConf.
 * @throws IOException
 * @throws URISyntaxException 
 */
public JobConf createJobConf(Class<? extends AvroMapper> mapperClass, 
               Schema outputSchema) throws IOException, URISyntaxException
{
 JobConf conf = createJobConf();
 AvroJob.setMapperClass(conf, mapperClass);
 AvroJob.setReducerClass(conf, AvroReducer.class);
 AvroJob.setOutputSchema(conf, outputSchema);
 
 conf.setNumReduceTasks(0);
 return conf;
}

/**
 * Creates a JobConf for a map-reduce job. Loads the input schema from the input files.
 * 
 * @param mapperClass AvroMapper subclass for the mapper.
 * @param reducerClass AvroReducer subclass for the reducer.
 * @param mapperOutputSchema Mapper output schema. Must be an instance of org.apache.avro.mapred.Pair
 * @param outputSchema Reducer output schema
 * @return A configured JobConf.
 * @throws IOException
 * @throws URISyntaxException 
 */
public JobConf createJobConf(Class<? extends AvroMapper> mapperClass,
               Class<? extends AvroReducer> reducerClass,
               Schema mapperOutputSchema,
               Schema outputSchema) throws IOException, URISyntaxException
{
 JobConf conf = createJobConf();
 AvroJob.setMapperClass(conf, mapperClass);
 AvroJob.setReducerClass(conf, reducerClass);
 AvroJob.setMapOutputSchema(conf, mapperOutputSchema);
 AvroJob.setOutputSchema(conf, outputSchema);
 return conf;
}

public static void main(String... args) throws Exception {
 JobConf job = new JobConf();
 job.setJarByClass(SmallFilesMapReduce.class);
 Path input = new Path(args[0]);
 Path output = new Path(args[1]);
 output.getFileSystem(job).delete(output, true);
 AvroJob.setInputSchema(job,
   Stock.SCHEMA$);  //<co id="ch03_avro_mr_comment1"/>
 AvroJob.setMapOutputSchema(job, Pair.getPairSchema(Stock.SCHEMA$,
   Schema.create(Schema.Type.NULL)));
 AvroJob.setOutputSchema(job,
   Stock.SCHEMA$);
 FileInputFormat.setInputPaths(job, input);
 FileOutputFormat.setOutputPath(job, output);
 AvroJob.setMapperClass(job,
   Mapper.class);    //<co id="ch03_smallfilemr_comment2"/>
 AvroJob.setReducerClass(job,
   Reducer.class);
 FileOutputFormat.setCompressOutput(job, true);
 AvroJob.setOutputCodec(job, SNAPPY_CODEC);
 JobClient.runJob(job);
}

/**
 * Creates a JobConf for a map-only job with an explicitly set input Schema.
 * 
 * @param mapperClass AvroMapper subclass implementing the map phase
 * @param inputSchema Schema of the input data.
 * @param outputSchema Schema of the mapper output
 * @return A configured JobConf.
 * @throws IOException
 * @throws URISyntaxException 
 */
public JobConf createJobConf(Class<? extends AvroMapper> mapperClass, 
               Schema inputSchema, 
               Schema outputSchema) throws IOException, URISyntaxException
{
 JobConf conf = createJobConf();
 AvroJob.setMapperClass(conf, mapperClass);
 AvroJob.setReducerClass(conf, AvroReducer.class);
 
 AvroJob.setInputSchema(conf, inputSchema);
 AvroJob.setOutputSchema(conf, outputSchema);
 
 conf.setNumReduceTasks(0);
 return conf;
}

/**
 * Creates a JobConf for a map-reducer job with an explicitly set input schema.
 * 
 * @param mapperClass AvroMapper subclass for the mapper.
 * @param reducerClass AvroReducer subclass for the reducer.
 * @param inputSchema Schema of the input data.
 * @param mapperOutputSchema Mapper output schema. Must be an instance of org.apache.avro.mapred.Pair
 * @param outputSchema Reducer output schema
 * @return A configured JobConf.
 * @throws IOException
 * @throws URISyntaxException 
 */
public JobConf createJobConf(Class<? extends AvroMapper> mapperClass,
               Class<? extends AvroReducer> reducerClass,
               Schema inputSchema,
               Schema mapperOutputSchema,
               Schema outputSchema) throws IOException, URISyntaxException
{
 JobConf conf = createJobConf();
 
 AvroJob.setMapperClass(conf, mapperClass);
 AvroJob.setReducerClass(conf, reducerClass);
 
 AvroJob.setInputSchema(conf, inputSchema);
 AvroJob.setMapOutputSchema(conf, mapperOutputSchema);
 AvroJob.setOutputSchema(conf, outputSchema);
 return conf;
}

AvroJob.setMapperClass(conf, mapperClass);
AvroJob.setReducerClass(conf, reducerClass);
AvroJob.setCombinerClass(conf, combinerClass);

AvroJob.setMapperClass(conf, mapperClass);
AvroJob.setReducerClass(conf, reducerClass);
AvroJob.setCombinerClass(conf, combinerClass);

public void testInputFormat() throws Exception {
 JobConf job = new JobConf();
 Schema subSchema = Schema.parse("{\"type\":\"record\"," +
                 "\"name\":\"PairValue\","+
                 "\"fields\": [ " + 
                 "{\"name\":\"value\", \"type\":\"long\"}" + 
                 "]}");
 AvroJob.setInputSchema(job, subSchema);
 AvroJob.setMapperClass(job, Counter.class);        
 FileInputFormat.setInputPaths(job, new Path(DIR + "/out/*"));
 job.setInputFormat(AvroTrevniInputFormat.class);
 job.setNumReduceTasks(0);                     // map-only
 job.setOutputFormat(NullOutputFormat.class);  // ignore output
 total = 0;
 JobClient.runJob(job);
 assertEquals(WordCountUtil.TOTAL, total);
}

AvroJob.setMapperClass(conf, Mapper.class);
AvroJob.setReducerClass(conf, Reducer.class);

public void testOutputFormat() throws Exception {
 JobConf job = new JobConf();
 
 WordCountUtil.writeLinesFile();
 
 AvroJob.setInputSchema(job, STRING);
 AvroJob.setOutputSchema(job, Pair.getPairSchema(STRING,LONG));
 
 AvroJob.setMapperClass(job, MapImpl.class);        
 AvroJob.setCombinerClass(job, ReduceImpl.class);
 AvroJob.setReducerClass(job, ReduceImpl.class);
 
 FileInputFormat.setInputPaths(job, new Path(DIR + "/in"));
 FileOutputFormat.setOutputPath(job, new Path(DIR + "/out"));
 FileOutputFormat.setCompressOutput(job, true);
 
 job.setOutputFormat(AvroTrevniOutputFormat.class);
 JobClient.runJob(job);
 
 WordCountUtil.validateCountsFile();
}

AvroJob.setMapperClass(conf, Mapper.class);
AvroJob.setCombinerClass(conf, Reducer.class);
AvroJob.setReducerClass(conf, Reducer.class);

AvroJob.setInputSchema(conf, Pair.getPairSchema(Schema.create(Type.INT),PageDetail.getClassSchema()));
AvroJob.setMapperClass(conf, Mapper.class);
AvroJob.setCombinerClass(conf, Combiner.class) ;
AvroJob.setReducerClass(conf, Reducer.class);

Javadoc

Configure a job's mapper implementation.

Popular methods of AvroJob

getOutputSchema
Return a job's output key schema.
getMapOutputSchema
Return a job's map output key schema.
setInputSchema
Configure a job's map input schema.
setOutputSchema
Configure a job's output schema. Unless this is a map-only job, this must be a Pair schema.
setReducerClass
Configure a job's reducer implementation.
getInputSchema
Return a job's map input schema.
setCombinerClass
Configure a job's combiner implementation.
setMapOutputSchema
Configure a job's map output schema. The map output schema defaults to the output schema and need on
setOutputCodec
Configure a job's output compression codec.
configureAvroInput
configureAvroJob
configureAvroOutput

Popular in Java

Updating database using SQL prepared statement
getSupportFragmentManager (FragmentActivity)
setContentView (Activity)
requestLocationUpdates (LocationManager)
File (java.io)
An "abstract" representation of a file system entity identified by a pathname. The pathname may be a
BigDecimal (java.math)
An immutable arbitrary-precision signed decimal.A value is represented by an arbitrary-precision "un
Dictionary (java.util)
Note: Do not use this class since it is obsolete. Please use the Map interface for new implementatio
StringTokenizer (java.util)
Breaks a string into tokens; new code should probably use String#split.> // Legacy code: StringTo
LogFactory (org.apache.commons.logging)
Factory for creating Log instances, with discovery and configuration features similar to that employ
Join (org.hibernate.mapping)
Best IntelliJ plugins

How to use setMapperClassmethodin org.apache.avro.mapred.AvroJob

Best Java code snippets using org.apache.avro.mapred.AvroJob.setMapperClass (Showing top 20 results out of 315)

How to use
setMapperClass
method
in
org.apache.avro.mapred.AvroJob