@Override public void setConf(Configuration conf) { super.setConf(conf); if (conf != null) schema = Pair.getKeySchema(AvroJob.getMapOutputSchema(conf)); }
/** Returns the specified output serializer. */ public Serializer<AvroWrapper<T>> getSerializer(Class<AvroWrapper<T>> c) { // AvroWrapper used for final output, AvroKey or AvroValue for map output boolean isFinalOutput = c.equals(AvroWrapper.class); Configuration conf = getConf(); Schema schema = isFinalOutput ? AvroJob.getOutputSchema(conf) : (AvroKey.class.isAssignableFrom(c) ? Pair.getKeySchema(AvroJob.getMapOutputSchema(conf)) : Pair.getValueSchema(AvroJob.getMapOutputSchema(conf))); GenericData dataModel = AvroJob.createDataModel(conf); return new AvroWrapperSerializer(dataModel.createDatumWriter(schema)); }
/** Returns the specified map output deserializer. Defaults to the final * output deserializer if no map output schema was specified. */ public Deserializer<AvroWrapper<T>> getDeserializer(Class<AvroWrapper<T>> c) { Configuration conf = getConf(); boolean isKey = AvroKey.class.isAssignableFrom(c); Schema schema = isKey ? Pair.getKeySchema(AvroJob.getMapOutputSchema(conf)) : Pair.getValueSchema(AvroJob.getMapOutputSchema(conf)); GenericData dataModel = AvroJob.createMapOutputDataModel(conf); DatumReader<T> datumReader = dataModel.createDatumReader(schema); return new AvroWrapperDeserializer(datumReader, isKey); }
@Override public void setConf(Configuration conf) { super.setConf(conf); if (conf != null) { Schema mapOutputSchema = AvroJob.getMapOutputSchema(conf); Schema keySchema = org.apache.avro.mapred.Pair.getKeySchema(mapOutputSchema); schema = keySchema.getFields().get(0).schema(); } }
@Override public void setConf(Configuration conf) { super.setConf(conf); if (conf != null) { Schema mapOutputSchema = AvroJob.getMapOutputSchema(conf); Schema keySchema = org.apache.avro.mapred.Pair.getKeySchema(mapOutputSchema); schema = keySchema.getFields().get(0).schema(); } }
@Override public void setConf(Configuration conf) { super.setConf(conf); if (conf != null) { Schema mapOutputSchema = AvroJob.getMapOutputSchema(conf); Schema keySchema = org.apache.avro.mapred.Pair.getKeySchema(mapOutputSchema); schema = keySchema.getFields().get(0).schema(); mode = AvroMode.fromShuffleConfiguration(conf); } }
@Override public void setConf(Configuration conf) { super.setConf(conf); if (conf != null) schema = Pair.getKeySchema(AvroJob.getMapOutputSchema(conf)); }
@Override public void setConf(Configuration conf) { super.setConf(conf); if (conf != null) schema = Pair.getKeySchema(AvroJob.getMapOutputSchema(conf)); }
/** Returns the specified map output deserializer. Defaults to the final * output deserializer if no map output schema was specified. */ public Deserializer<AvroWrapper<T>> getDeserializer(Class<AvroWrapper<T>> c) { boolean isKey = AvroKey.class.isAssignableFrom(c); Configuration conf = getConf(); Schema schema = isKey ? Pair.getKeySchema(AvroJob.getMapOutputSchema(conf)) : Pair.getValueSchema(AvroJob.getMapOutputSchema(conf)); DatumReader<T> datumReader = null; if (conf.getBoolean(AvroJob.MAP_OUTPUT_IS_REFLECT, false)) { ReflectDataFactory factory = (ReflectDataFactory) ReflectionUtils.newInstance( conf.getClass("crunch.reflectdatafactory", ReflectDataFactory.class), conf); datumReader = factory.getReader(schema); } else { datumReader = new SpecificDatumReader<T>(schema); } return new AvroWrapperDeserializer(datumReader, isKey); }
/** * Returns the specified map output deserializer. Defaults to the final output * deserializer if no map output schema was specified. */ public Deserializer<AvroWrapper<T>> getDeserializer(Class<AvroWrapper<T>> c) { boolean isKey = AvroKey.class.isAssignableFrom(c); Configuration conf = getConf(); Schema schema = isKey ? Pair.getKeySchema(AvroJob.getMapOutputSchema(conf)) : Pair.getValueSchema(AvroJob .getMapOutputSchema(conf)); DatumReader<T> datumReader = null; if (conf.getBoolean(AvroJob.MAP_OUTPUT_IS_REFLECT, false)) { ReflectDataFactory factory = (ReflectDataFactory) ReflectionUtils.newInstance( conf.getClass("crunch.reflectdatafactory", ReflectDataFactory.class), conf); datumReader = factory.getReader(schema); } else { datumReader = new SpecificDatumReader<T>(schema); } return new AvroWrapperDeserializer(datumReader, isKey); }
/** Returns the specified output serializer. */ public Serializer<AvroWrapper<T>> getSerializer(Class<AvroWrapper<T>> c) { // AvroWrapper used for final output, AvroKey or AvroValue for map output boolean isFinalOutput = c.equals(AvroWrapper.class); Configuration conf = getConf(); Schema schema = isFinalOutput ? AvroJob.getOutputSchema(conf) : (AvroKey.class.isAssignableFrom(c) ? Pair .getKeySchema(AvroJob.getMapOutputSchema(conf)) : Pair.getValueSchema(AvroJob.getMapOutputSchema(conf))); ReflectDataFactory factory = Avros.getReflectDataFactory(conf); ReflectDatumWriter<T> writer = factory.getWriter(schema); return new AvroWrapperSerializer(writer); }
/** Returns the specified output serializer. */ public Serializer<AvroWrapper<T>> getSerializer(Class<AvroWrapper<T>> c) { // AvroWrapper used for final output, AvroKey or AvroValue for map output boolean isFinalOutput = c.equals(AvroWrapper.class); Configuration conf = getConf(); Schema schema = isFinalOutput ? AvroJob.getOutputSchema(conf) : (AvroKey.class.isAssignableFrom(c) ? Pair.getKeySchema(AvroJob.getMapOutputSchema(conf)) : Pair.getValueSchema(AvroJob.getMapOutputSchema(conf))); GenericData dataModel = AvroJob.createDataModel(conf); return new AvroWrapperSerializer(dataModel.createDatumWriter(schema)); }
/** Returns the specified output serializer. */ public Serializer<AvroWrapper<T>> getSerializer(Class<AvroWrapper<T>> c) { // AvroWrapper used for final output, AvroKey or AvroValue for map output boolean isFinalOutput = c.equals(AvroWrapper.class); Configuration conf = getConf(); Schema schema = isFinalOutput ? AvroJob.getOutputSchema(conf) : (AvroKey.class.isAssignableFrom(c) ? Pair .getKeySchema(AvroJob.getMapOutputSchema(conf)) : Pair.getValueSchema(AvroJob.getMapOutputSchema(conf))); ReaderWriterFactory factory = AvroMode.fromShuffleConfiguration(conf); DatumWriter<T> writer = factory.getWriter(schema); return new AvroWrapperSerializer(writer); }
/** Returns the specified output serializer. */ public Serializer<AvroWrapper<T>> getSerializer(Class<AvroWrapper<T>> c) { // AvroWrapper used for final output, AvroKey or AvroValue for map output boolean isFinalOutput = c.equals(AvroWrapper.class); Configuration conf = getConf(); Schema schema = isFinalOutput ? AvroJob.getOutputSchema(conf) : (AvroKey.class.isAssignableFrom(c) ? Pair.getKeySchema(AvroJob.getMapOutputSchema(conf)) : Pair.getValueSchema(AvroJob.getMapOutputSchema(conf))); ReflectDataFactory factory = Avros.getReflectDataFactory(conf); ReflectDatumWriter<T> writer = factory.getWriter(); writer.setSchema(schema); return new AvroWrapperSerializer(writer); }
/** Returns the specified map output deserializer. Defaults to the final * output deserializer if no map output schema was specified. */ public Deserializer<AvroWrapper<T>> getDeserializer(Class<AvroWrapper<T>> c) { Configuration conf = getConf(); boolean isKey = AvroKey.class.isAssignableFrom(c); Schema schema = isKey ? Pair.getKeySchema(AvroJob.getMapOutputSchema(conf)) : Pair.getValueSchema(AvroJob.getMapOutputSchema(conf)); GenericData dataModel = AvroJob.createMapOutputDataModel(conf); DatumReader<T> datumReader = dataModel.createDatumReader(schema); return new AvroWrapperDeserializer(datumReader, isKey); }
/** Returns the specified map output deserializer. Defaults to the final * output deserializer if no map output schema was specified. */ public Deserializer<AvroWrapper<T>> getDeserializer(Class<AvroWrapper<T>> c) { // We need not rely on mapred.task.is.map here to determine whether map // output or final output is desired, since the mapreduce framework never // creates a deserializer for final output, only for map output. boolean isKey = AvroKey.class.isAssignableFrom(c); Schema schema = isKey ? Pair.getKeySchema(AvroJob.getMapOutputSchema(getConf())) : Pair.getValueSchema(AvroJob.getMapOutputSchema(getConf())); return new AvroWrapperDeserializer(new SpecificDatumReader<T>(schema), isKey); }
/** Returns the specified output serializer. */ public Serializer<AvroWrapper<T>> getSerializer(Class<AvroWrapper<T>> c) { // Here we must rely on mapred.task.is.map to tell whether the map output // or final output is needed. boolean isMap = getConf().getBoolean("mapred.task.is.map", false); Schema schema = !isMap ? AvroJob.getOutputSchema(getConf()) : (AvroKey.class.isAssignableFrom(c) ? Pair.getKeySchema(AvroJob.getMapOutputSchema(getConf())) : Pair.getValueSchema(AvroJob.getMapOutputSchema(getConf()))); return new AvroWrapperSerializer(new SpecificDatumWriter<T>(schema)); }
/** * Returns the specified map output deserializer. Defaults to the final output * deserializer if no map output schema was specified. */ public Deserializer<AvroWrapper<T>> getDeserializer(Class<AvroWrapper<T>> c) { boolean isKey = AvroKey.class.isAssignableFrom(c); Configuration conf = getConf(); Schema schema = isKey ? Pair.getKeySchema(AvroJob.getMapOutputSchema(conf)) : Pair.getValueSchema(AvroJob .getMapOutputSchema(conf)); DatumReader<T> datumReader = null; if (conf.getBoolean(AvroJob.MAP_OUTPUT_IS_REFLECT, false)) { datumReader = AvroMode.REFLECT.withFactoryFromConfiguration(conf).getReader(schema); } else { datumReader = AvroMode.fromShuffleConfiguration(conf).getReader(schema); } return new AvroWrapperDeserializer(datumReader, isKey); }
keySchema = Pair.getKeySchema(pairSchema); valueSchema = Pair.getValueSchema(pairSchema); pair = new Pair<GenericRecord, GenericRecord>(pairSchema);