dataSource = MRInputLegacy.createConfigBuilder(conf, inputFormatClass).groupSplits(true) .setCustomInitializerDescriptor(descriptor).build(); } else { MultiMRInput.createConfigBuilder(conf, inputFormatClass).groupSplits(false).build(); } else { dataSource = MRInputLegacy.createConfigBuilder(conf, inputFormatClass).groupSplits(false).build();
} else { dsd = MRInputLegacy.createConfigBuilder(stage1Conf, TextInputFormat.class, inputPath) .groupSplits(false).build();
} else { dsd = MRInputLegacy.createConfigBuilder(mapStageConf, TextInputFormat.class, inputPath).build();
} else { dsd = MRInputLegacy.createConfigBuilder(stage1Conf, TextInputFormat.class, inputPath) .groupSplits(false).build();
dataSource = MRInputLegacy.createConfigBuilder(conf, inputFormatClass).groupSplits(true) .setCustomInitializerDescriptor(descriptor).build(); } else { MultiMRInput.createConfigBuilder(conf, inputFormatClass).groupSplits(false).build(); } else { dataSource = MRInputLegacy.createConfigBuilder(conf, inputFormatClass).groupSplits(false).build();
} else { dataSource = MRInputLegacy.createConfigBuilder(mapStageConf, SleepInputFormat.class) .generateSplitsInAM(generateSplitsInAM).build();
.createConfigBuilder(new Configuration(tezConf), TextInputFormat.class, inputPath1.toUri().toString()) .groupSplits(!isDisableSplitGrouping()) .generateSplitsInAM(!isGenerateSplitInClient()).build()); .createConfigBuilder(new Configuration(tezConf), TextInputFormat.class, inputPath2.toUri().toString()) .groupSplits(!isDisableSplitGrouping()) .generateSplitsInAM(!isGenerateSplitInClient()).build());
configBuilder.groupSplits( conf.getBoolean( FlowRuntimeProps.COMBINE_SPLITS, true ) ); configBuilder.groupSplits( false ); DataSourceDescriptor dataSourceDescriptor = configBuilder.build();
configBuilder.groupSplits( conf.getBoolean( FlowRuntimeProps.COMBINE_SPLITS, true ) ); configBuilder.groupSplits( false ); DataSourceDescriptor dataSourceDescriptor = configBuilder.build();
.createConfigBuilder(new Configuration(tezConf), TextInputFormat.class, hashPath.toUri().toString()) .groupSplits(!isDisableSplitGrouping()) .generateSplitsInAM(!isGenerateSplitInClient()).build()); .createConfigBuilder(new Configuration(tezConf), TextInputFormat.class, streamPath.toUri().toString()) .groupSplits(!isDisableSplitGrouping()) .generateSplitsInAM(!isGenerateSplitInClient()).build());
inputConf.set(FileInputFormat.INPUT_DIR, inputPath); MRInput.MRInputConfigBuilder configurer = MRInput.createConfigBuilder(inputConf, null); DataSourceDescriptor dataSource = configurer.generateSplitsInAM(false).build();
.groupSplits(false).build()); Vertex v2 = Vertex.create(VERTEX2, ProcessorDescriptor.create(TokenProcessor.class.getName())); v2.addDataSource(INPUT, MRInput.createConfigBuilder(new Configuration(tezConf), TextInputFormat.class, inputPath2) .groupSplits(false).build()); Vertex v3 = Vertex.create(VERTEX3, ProcessorDescriptor.create(TokenProcessor.class.getName())); v3.addDataSource(INPUT, MRInput.createConfigBuilder(new Configuration(tezConf), TextInputFormat.class, inputPath3) .groupSplits(false).build()); CartesianProductConfig cartesianProductConfig; if (isPartitioned) {
TextInputFormat.class, inputPath).groupSplits(!disableSplitGrouping) .generateSplitsInAM(!isGenerateSplitInClient).build();
TextInputFormat.class, inputPath).groupSplits(!isDisableSplitGrouping()) .generateSplitsInAM(!isGenerateSplitInClient()).build();
MRInput .createConfigBuilder(new Configuration(tezConf), TextInputFormat.class, lhs.toUri().toString()).groupSplits(!isDisableSplitGrouping()) .generateSplitsInAM(!isGenerateSplitInClient()).build()); setVertexExecutionContext(lhsVertex, getLhsExecutionContext()); MRInput .createConfigBuilder(new Configuration(tezConf), TextInputFormat.class, rhs.toUri().toString()).groupSplits(!isDisableSplitGrouping()) .generateSplitsInAM(!isGenerateSplitInClient()).build()); setVertexExecutionContext(rhsVertex, getRhsExecutionContext());
/** * Create an {@link MultiMRInputConfigBuilder} to configure a {@link MultiMRInput}</p> * The preferred usage model is to provide all of the parameters, and use methods to configure * the Input. * <p/> * For legacy applications, which may already have a fully configured {@link * org.apache.hadoop.conf.Configuration} * instance, the inputFormat can be specified as null * <p/> * Typically, this will be used along with a custom {@link org.apache.tez.dag.api.VertexManagerPlugin} * or {@link org.apache.tez.runtime.api.InputInitializer} to generate the multiple inputs to be * used by each task. If this is not setup, this will work the same as {@link * org.apache.tez.mapreduce.input.MRInput} </p> * Grouping of splits is disabled by default. * * @param conf Configuration for the {@link MRInput}. This configuration instance will be * modified in place * @param inputFormat InputFormat derived class. This can be null. If the InputFormat specified * is * null, the provided configuration should be complete. * @return {@link MultiMRInputConfigBuilder} */ public static MultiMRInputConfigBuilder createConfigBuilder(Configuration conf, @Nullable Class<?> inputFormat) { MultiMRInputConfigBuilder configBuilder = new MultiMRInputConfigBuilder(conf, inputFormat); configBuilder.setInputClassName(MultiMRInput.class.getName()).groupSplits(false); return configBuilder; }
/** * Create an {@link org.apache.tez.mapreduce.input.MRInput.MRInputConfigBuilder} * for {@link org.apache.hadoop.mapreduce.lib.input.FileInputFormat} * or {@link org.apache.hadoop.mapred.FileInputFormat} format based InputFormats. * <p/> * The preferred usage model is to provide all of the parameters, and use methods to configure * the Input. * <p/> * For legacy applications, which may already have a fully configured {@link Configuration} * instance, the inputFormat and inputPath can be specified as null * * @param conf Configuration for the {@link MRInput}. This configuration instance will be * modified in place * @param inputFormat InputFormat derived class. This can be null. If the InputFormat specified * is * null, the provided configuration should be complete. * @param inputPaths Comma separated input paths * @return {@link org.apache.tez.mapreduce.input.MRInput.MRInputConfigBuilder} */ public static MRInputConfigBuilder createConfigBuilder(Configuration conf, @Nullable Class<?> inputFormat, @Nullable String inputPaths) { MRInputConfigBuilder configurer = new MRInputConfigBuilder(conf, inputFormat); if (inputPaths != null) { return configurer.setInputPaths(inputPaths); } return configurer; }
/** * Create an {@link org.apache.tez.mapreduce.input.MRInput.MRInputConfigBuilder} </p> * The preferred usage model is to provide all of the parameters, and use methods to configure * the Input. * <p/> * For legacy applications, which may already have a fully configured {@link Configuration} * instance, the inputFormat can be specified as null * * @param conf Configuration for the {@link MRInput}. This configuration instance will be * modified in place * @param inputFormat InputFormat derived class. This can be null. If the InputFormat specified * is * null, the provided configuration should be complete. * @return {@link org.apache.tez.mapreduce.input.MRInput.MRInputConfigBuilder} */ public static MRInputConfigBuilder createConfigBuilder(Configuration conf, @Nullable Class<?> inputFormat) { return new MRInputConfigBuilder(conf, inputFormat); }
dataSource = MRInputLegacy.createConfigBuilder(conf, inputFormatClass).groupSplits(true) .setCustomInitializerDescriptor(descriptor).build(); } else { MultiMRInput.createConfigBuilder(conf, inputFormatClass).groupSplits(false).build(); } else { dataSource = MRInputLegacy.createConfigBuilder(conf, inputFormatClass).groupSplits(false).build();