@Override public void map(Weather w, AvroCollector<Pair<Weather,Void>> collector, Reporter reporter) throws IOException { collector.collect(new Pair<>(w, (Void) null)); }
@Override public void map(String text, AvroCollector<Pair<String,Long>> collector, Reporter reporter) throws IOException { StringTokenizer tokens = new StringTokenizer(text); while (tokens.hasMoreTokens()) collector.collect(new Pair<>(tokens.nextToken(), 1L)); } }
@Override public void reduce(String word, Iterable<Long> counts, AvroCollector<Pair<String,Long>> collector, Reporter reporter) throws IOException { long sum = 0; for (long count : counts) sum += count; collector.collect(new Pair<>(word, sum)); } }
@Override public void reduce(Utf8 word, Iterable<Long> counts, AvroCollector<Pair<Utf8, Long>> collector, Reporter reporter) throws IOException { long sum = 0; for (long count : counts) sum += count; collector.collect(new Pair<>(word, sum)); } }
/** Called with all map output values with a given key. By default, pairs * key with each value, collecting {@link Pair} instances. */ @SuppressWarnings("unchecked") public void reduce(K key, Iterable<V> values, AvroCollector<OUT> collector, Reporter reporter) throws IOException { if (outputPair == null) outputPair = new Pair<>(AvroJob.getOutputSchema(getConf())); for (V value : values) { outputPair.set(key, value); collector.collect((OUT)outputPair); } }
public void map(LongWritable key, Text value, OutputCollector<AvroWrapper<Pair<Long,Utf8>>,NullWritable> out, Reporter reporter) throws IOException { out.collect(new AvroWrapper<>(new Pair<>(key.get(), new Utf8(value.toString()))), NullWritable.get()); } }
@Override public void map(Utf8 text, AvroCollector<Pair<Utf8, Long>> collector, Reporter reporter) throws IOException { StringTokenizer tokens = new StringTokenizer(text.toString()); while (tokens.hasMoreTokens()) collector.collect(new Pair<>(new Utf8(tokens.nextToken()), 1L)); } }
@Test public void testCollectionFailure() throws Exception { try { new Pair("foo", new ArrayList()); } catch (AvroRuntimeException e) { assertTrue(e.getMessage().startsWith("Cannot infer schema")); return; } fail("Expected an AvroRuntimeException"); }
public void map(LongWritable key, Text value, OutputCollector<AvroWrapper<Pair<Long,GenericData.Record>>,NullWritable> out, Reporter reporter) throws IOException { GenericData.Record optional_entry = new GenericData.Record(createInnerSchema("optional_field_1")); optional_entry.put("optional_field_1", 0L); GenericData.Array<GenericData.Record> array = new GenericData.Array<>(1, createArraySchema()); array.add(optional_entry); GenericData.Record container = new GenericData.Record(createSchema()); container.put("Optional", array); out.collect(new AvroWrapper<>(new Pair<>(key.get(), container)), NullWritable.get()); } }
@Override public void map(Utf8 text, Collector<Pair<Utf8,Long>> collector) throws IOException { StringTokenizer tokens = new StringTokenizer(text.toString()); while (tokens.hasMoreTokens()) collector.collect(new Pair<>(new Utf8(tokens.nextToken()), 1L)); }
@Override public Pair<K,V> next(Pair<K,V> reuse) throws IOException { prepare(); if (!hasNext()) throw new NoSuchElementException(); Pair<K,V> result = reuse; if (result == null) result = new Pair<>(schema); result.key(keyConverter.convert(key)); reader.getCurrentValue(value); result.value(valConverter.convert(value)); // swap key and spareKey Writable k = key; key = spareKey; spareKey = k; ready = false; return result; }
@Override public void map( NamesRecord nameRecord, AvroCollector<Pair<KeyRecord, JoinableRecord>> collector, Reporter reporter) throws IOException { collector.collect( new Pair<>( new KeyRecord(nameRecord.id), new JoinableRecord(nameRecord.getClass().getName(), nameRecord.id, nameRecord.name, -1L))); }
@Override public void map( BalancesRecord balanceRecord, AvroCollector<Pair<KeyRecord, JoinableRecord>> collector, Reporter reporter) throws IOException { collector.collect( new Pair<>( new KeyRecord(balanceRecord.id), new JoinableRecord(balanceRecord.getClass().getName(), balanceRecord.id, "", balanceRecord.balance))); }
@Override public void map(Utf8 text, AvroCollector<Pair<Utf8, Long>> collector, Reporter reporter) throws IOException { StringTokenizer tokens = new StringTokenizer(text.toString()); while (tokens.hasMoreTokens()) { String tok = tokens.nextToken(); collector.collect(new Pair<>(new Utf8(tok), 1L)); amos.getCollector("myavro2", reporter).collect(new Pair<Utf8, Long>(new Utf8(tok), 1L).toString()); } }
@Override public void map(Text text, AvroCollector<Pair<Text,Count>> collector, Reporter reporter) throws IOException { StringTokenizer tokens = new StringTokenizer(text.toString()); while (tokens.hasMoreTokens()) collector.collect(new Pair<>(new Text(tokens.nextToken()), new Count(1L))); } }
@Override public void reduce(Utf8 word, Iterable<Long> counts, AvroCollector<Pair<Utf8, Long>> collector, Reporter reporter) throws IOException { long sum = 0; for (long count : counts) sum += count; Pair<Utf8, Long> outputvalue = new Pair<>(word, sum); amos.getCollector("myavro", reporter).collect(outputvalue); amos.collect("myavro1", reporter, outputvalue.toString()); amos.collect("myavro", reporter, new Pair<Utf8, Long>(new Utf8(""), 0L).getSchema(), outputvalue, "testavrofile"); amos.collect("myavro", reporter, Schema.create(Schema.Type.STRING), outputvalue.toString(), "testavrofile1"); collector.collect(new Pair<>(word, sum)); }
@SuppressWarnings("deprecation") public void testJobNoreducer() throws Exception { JobConf job = new JobConf(); job.setNumReduceTasks(0); Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath()); outputPath.getFileSystem(job).delete(outputPath); WordCountUtil.writeLinesFile(new File(INPUT_DIR.getRoot(),"lines.avro")); job.setJobName("AvroMultipleOutputs_noreducer"); AvroJob.setInputSchema(job, Schema.create(Schema.Type.STRING)); AvroJob.setOutputSchema(job, new Pair<Utf8, Long>(new Utf8(""), 0L).getSchema()); AvroJob.setMapperClass(job, MapImpl.class); FileInputFormat.setInputPaths(job, new Path(INPUT_DIR.getRoot().toString())); FileOutputFormat.setOutputPath(job, outputPath); FileOutputFormat.setCompressOutput(job, false); AvroMultipleOutputs.addNamedOutput(job, "myavro2", AvroOutputFormat.class, Schema.create(Schema.Type.STRING)); JobClient.runJob(job); }
@SuppressWarnings("deprecation") public void testJob(String pathOut) throws Exception { JobConf job = new JobConf(); String pathIn = INPUT_DIR.getRoot().getPath(); WordCountUtil.writeLinesFile(pathIn + "/lines.avro"); Path outputPath = new Path(pathOut); outputPath.getFileSystem(job).delete(outputPath); job.setJobName("wordcount"); AvroJob.setInputSchema(job, Schema.create(Schema.Type.STRING)); AvroJob.setOutputSchema(job, new Pair<Utf8, Long>(new Utf8(""), 0L).getSchema()); AvroJob.setMapperClass(job, MapImpl.class); AvroJob.setCombinerClass(job, ReduceImpl.class); AvroJob.setReducerClass(job, ReduceImpl.class); FileInputFormat.setInputPaths(job, new Path(pathIn)); FileOutputFormat.setOutputPath(job, new Path(pathOut)); FileOutputFormat.setCompressOutput(job, true); WordCountUtil.setMeta(job); JobClient.runJob(job); WordCountUtil.validateCountsFile(new File(pathOut, "part-00000.avro")); }
@Test @SuppressWarnings("deprecation") public void testJob() throws Exception { JobConf job = new JobConf(); String dir = "target/testReflectJob"; Path inputPath = new Path(dir + "/in"); Path outputPath = new Path(dir + "/out"); outputPath.getFileSystem(job).delete(outputPath); inputPath.getFileSystem(job).delete(inputPath); writeLinesFile(new File(dir+"/in")); job.setJobName("reflect"); AvroJob.setInputSchema(job, ReflectData.get().getSchema(Text.class)); AvroJob.setMapOutputSchema (job, new Pair(new Text(""), new Count(0L)).getSchema()); AvroJob.setOutputSchema(job, ReflectData.get().getSchema(WordCount.class)); AvroJob.setMapperClass(job, MapImpl.class); //AvroJob.setCombinerClass(job, ReduceImpl.class); AvroJob.setReducerClass(job, ReduceImpl.class); FileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); AvroJob.setReflect(job); // use reflection JobClient.runJob(job); validateCountsFile(new File(new File(dir, "out"), "part-00000.avro")); }
@SuppressWarnings("deprecation") public void testJob(String pathOut) throws Exception { JobConf job = new JobConf(); String pathIn = INPUT_DIR.getRoot().getPath(); File fileIn = new File(pathIn, "lines.avro"); Path outputPath = new Path(pathOut); outputPath.getFileSystem(job).delete(outputPath); WordCountUtil.writeLinesFile(fileIn); job.setJobName("AvroMultipleOutputs"); AvroJob.setInputSchema(job, Schema.create(Schema.Type.STRING)); AvroJob.setOutputSchema(job, new Pair<Utf8, Long>(new Utf8(""), 0L).getSchema()); AvroJob.setMapperClass(job, MapImpl.class); AvroJob.setReducerClass(job, ReduceImpl.class); FileInputFormat.setInputPaths(job, pathIn); FileOutputFormat.setOutputPath(job, outputPath); FileOutputFormat.setCompressOutput(job, false); AvroMultipleOutputs.addNamedOutput(job, "myavro", AvroOutputFormat.class, new Pair<Utf8, Long>(new Utf8(""), 0L).getSchema()); AvroMultipleOutputs.addNamedOutput(job, "myavro1", AvroOutputFormat.class, Schema.create(Schema.Type.STRING)); AvroMultipleOutputs.addNamedOutput(job, "myavro2", AvroOutputFormat.class, Schema.create(Schema.Type.STRING)); WordCountUtil.setMeta(job); JobClient.runJob(job); WordCountUtil.validateCountsFile(new File(outputPath.toString(), "/part-00000.avro")); }