org.apache.crunch.io.FormatBundle.getFormatClass java code examples

public Class<? extends InputFormat<?, ?>> getInputFormatClass() {
 return bundle.getFormatClass();
}

@Override
public String toString() {
 return new StringBuilder()
   .append(formatBundle.getFormatClass().getSimpleName())
   .append("(")
   .append(path)
   .append(")")
   .toString();
}

private static void configureJob(
  String namedOutput,
  Job job,
  OutputConfig outConfig) throws IOException {
 job.getConfiguration().set(BASE_OUTPUT_NAME, namedOutput);
 job.setOutputFormatClass(outConfig.bundle.getFormatClass());
 job.setOutputKeyClass(outConfig.keyClass);
 job.setOutputValueClass(outConfig.valueClass);
 outConfig.bundle.configure(job.getConfiguration());
}

@Override
@SuppressWarnings("unchecked")
public void configureSource(Job job, int inputId) throws IOException {
 Configuration conf = job.getConfiguration();
 //an id of -1 indicates that this is the only input so just use it directly
 if (inputId == -1) {
  job.setMapperClass(CrunchMapper.class);
  job.setInputFormatClass(inputBundle.getFormatClass());
  inputBundle.configure(conf);
 } else {
  //there are multiple inputs for this mapper so add it as a CrunchInputs and need a fake path just to
  //make it play well with other file based inputs.
  Path dummy = new Path("/kafka/" + inputId);
  CrunchInputs.addInputPath(job, dummy, inputBundle, inputId);
 }
}

@Override
public void configureSource(Job job, int inputId) throws IOException {
 TableMapReduceUtil.addDependencyJars(job);
 Configuration conf = job.getConfiguration();
 conf.setStrings("io.serializations", conf.get("io.serializations"),
   ResultSerialization.class.getName());
 if (inputId == -1) {
  job.setMapperClass(CrunchMapper.class);
  job.setInputFormatClass(inputBundle.getFormatClass());
  inputBundle.configure(conf);
 } else {
  Path dummy = new Path("/hbase/" + table);
  CrunchInputs.addInputPath(job, dummy, inputBundle, inputId);
 }
}

protected void configureForMapReduce(Job job, Class keyClass, Class valueClass,
  FormatBundle formatBundle, Path outputPath, String name) {
 try {
  FileOutputFormat.setOutputPath(job, outputPath);
 } catch (Exception e) {
  throw new RuntimeException(e);
 }
 if (name == null) {
  job.setOutputFormatClass(formatBundle.getFormatClass());
  formatBundle.configure(job.getConfiguration());
  job.setOutputKeyClass(keyClass);
  job.setOutputValueClass(valueClass);
 } else {
  CrunchOutputs.addNamedOutput(job, name, formatBundle, keyClass, valueClass);
 }
}

@Override
public void configureSource(Job job, int inputId) throws IOException {
 Configuration conf = job.getConfiguration();
 //an id of -1 indicates that this is the only input so just use it directly
 if (inputId == -1) {
  job.setMapperClass(CrunchMapper.class);
  job.setInputFormatClass(inputBundle.getFormatClass());
  inputBundle.configure(conf);
 } else {
  //there are multiple inputs for this mapper so add it as a CrunchInputs and need a fake path just to
  //make it play well with other file based inputs.
  Path dummy = new Path("/kafka/" + inputId);
  CrunchInputs.addInputPath(job, dummy, inputBundle, inputId);
 }
}

@Override
public void configureSource(Job job, int inputId) throws IOException {
 TableMapReduceUtil.addDependencyJars(job);
 Configuration conf = job.getConfiguration();
 conf.setStrings("io.serializations", conf.get("io.serializations"),
   ResultSerialization.class.getName());
 if (inputId == -1) {
  job.setMapperClass(CrunchMapper.class);
  job.setInputFormatClass(inputBundle.getFormatClass());
  inputBundle.configure(conf);
 } else {
  Path dummy = new Path("/hbase/" + table);
  CrunchInputs.addInputPath(job, dummy, inputBundle, inputId);
 }
}

private TaskAttemptContext getContext(String nameOutput) throws IOException {
 TaskAttemptContext taskContext = taskContextCache.get(nameOutput);
 if (taskContext != null) {
  return taskContext;
 }
 // The following trick leverages the instantiation of a record writer via
 // the job thus supporting arbitrary output formats.
 OutputConfig outConfig = namedOutputs.get(nameOutput);
 Configuration conf = new Configuration(baseContext.getConfiguration());
 Job job = new Job(conf);
 job.getConfiguration().set("crunch.namedoutput", nameOutput);
 job.setOutputFormatClass(outConfig.bundle.getFormatClass());
 job.setOutputKeyClass(outConfig.keyClass);
 job.setOutputValueClass(outConfig.valueClass);
 outConfig.bundle.configure(job.getConfiguration());
 taskContext = TaskAttemptContextFactory.create(
  job.getConfiguration(), baseContext.getTaskAttemptID());
 taskContextCache.put(nameOutput, taskContext);
 return taskContext;
}

@Override
public void configureSource(Job job, int inputId) throws IOException {
 if (inputId == -1) {
  FileInputFormat.addInputPath(job, path);
  job.setInputFormatClass(inputBundle.getFormatClass());
  inputBundle.configure(job.getConfiguration());
 } else {
  CrunchInputs.addInputPath(job, path, inputBundle, inputId);
 }
}

@Override
public void configureSource(Job job, int inputId) throws IOException {
 Configuration jobConf = job.getConfiguration();
 if (hcatConf == null) {
  hcatConf = configureHCatFormat(jobConf, bundle, database, table, filter);
 }
 if (inputId == -1) {
  job.setMapperClass(CrunchMapper.class);
  job.setInputFormatClass(bundle.getFormatClass());
  bundle.configure(jobConf);
 } else {
  Path dummy = new Path("/hcat/" + database + "/" + table);
  CrunchInputs.addInputPath(job, dummy, bundle, inputId);
 }
}

@Override
@SuppressWarnings("unchecked")
public void configureSource(Job job, int inputId) throws IOException {
 Configuration conf = job.getConfiguration();
 if (inputId == -1) {
  job.setMapperClass(CrunchMapper.class);
  job.setInputFormatClass(formatBundle.getFormatClass());
  formatBundle.configure(conf);
 } else {
  Path dummy = new Path("/view/" + view.getDataset().getName());
  CrunchInputs.addInputPath(job, dummy, formatBundle, inputId);
 }
}

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException, InterruptedException {
 List<InputSplit> splits = Lists.newArrayList();
 Configuration base = job.getConfiguration();
 Map<FormatBundle, Map<Integer, List<Path>>> formatNodeMap = CrunchInputs.getFormatNodeMap(job);
 // First, build a map of InputFormats to Paths
 for (Map.Entry<FormatBundle, Map<Integer, List<Path>>> entry : formatNodeMap.entrySet()) {
  FormatBundle inputBundle = entry.getKey();
  Configuration conf = new Configuration(base);
  inputBundle.configure(conf);
  Job jobCopy = new Job(conf);
  InputFormat<?, ?> format = (InputFormat<?, ?>) ReflectionUtils.newInstance(inputBundle.getFormatClass(),
    jobCopy.getConfiguration());
  for (Map.Entry<Integer, List<Path>> nodeEntry : entry.getValue().entrySet()) {
   Integer nodeIndex = nodeEntry.getKey();
   List<Path> paths = nodeEntry.getValue();
   FileInputFormat.setInputPaths(jobCopy, paths.toArray(new Path[paths.size()]));
   // Get splits for each input path and tag with InputFormat
   // and Mapper types by wrapping in a TaggedInputSplit.
   List<InputSplit> pathSplits = format.getSplits(jobCopy);
   for (InputSplit pathSplit : pathSplits) {
    splits.add(new CrunchInputSplit(pathSplit, inputBundle.getFormatClass(),
      nodeIndex, jobCopy.getConfiguration()));
   }
  }
 }
 return splits;
}

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException, InterruptedException {
 List<InputSplit> splits = Lists.newArrayList();
 Configuration base = job.getConfiguration();
 Map<FormatBundle, Map<Integer, List<Path>>> formatNodeMap = CrunchInputs.getFormatNodeMap(job);
 // First, build a map of InputFormats to Paths
 for (Map.Entry<FormatBundle, Map<Integer, List<Path>>> entry : formatNodeMap.entrySet()) {
  FormatBundle inputBundle = entry.getKey();
  Configuration conf = new Configuration(base);
  inputBundle.configure(conf);
  Job jobCopy = new Job(conf);
  InputFormat<?, ?> format = (InputFormat<?, ?>) ReflectionUtils.newInstance(inputBundle.getFormatClass(),
    jobCopy.getConfiguration());
  if (format instanceof FileInputFormat && !conf.getBoolean(RuntimeParameters.DISABLE_COMBINE_FILE, true)) {
   format = new CrunchCombineFileInputFormat<Object, Object>(jobCopy);
  }
  for (Map.Entry<Integer, List<Path>> nodeEntry : entry.getValue().entrySet()) {
   Integer nodeIndex = nodeEntry.getKey();
   List<Path> paths = nodeEntry.getValue();
   FileInputFormat.setInputPaths(jobCopy, paths.toArray(new Path[paths.size()]));
   // Get splits for each input path and tag with InputFormat
   // and Mapper types by wrapping in a TaggedInputSplit.
   List<InputSplit> pathSplits = format.getSplits(jobCopy);
   for (InputSplit pathSplit : pathSplits) {
    splits.add(new CrunchInputSplit(pathSplit, inputBundle, nodeIndex, jobCopy.getConfiguration()));
   }
  }
 }
 return splits;
}

@Override
@SuppressWarnings("unchecked")
public void configureForMapReduce(Job job, PType<?> ptype, Path outputPath, String name) {
 Preconditions.checkNotNull(name, "Output name should not be null"); // see CRUNCH-82
 Converter converter = getConverter(ptype);
 Class<?> keyClass = converter.getKeyClass();
 Class<?> valueClass = Void.class;
 CrunchOutputs.addNamedOutput(job, name, formatBundle, keyClass, valueClass);
 job.setOutputFormatClass(formatBundle.getFormatClass());
 formatBundle.configure(job.getConfiguration());
}

@Override
public Iterator<T> read(FileSystem fs, Path path) {
 final Configuration conf = new Configuration(fs.getConf());
 bundle.configure(conf);
 ptype.initialize(conf);
 final InputFormat fmt = ReflectionUtils.newInstance(bundle.getFormatClass(), conf);
 final TaskAttemptContext ctxt = new TaskAttemptContextImpl(conf, new TaskAttemptID());
 try {
  Job job = new Job(conf);
  FileInputFormat.addInputPath(job, path);
  return Iterators.concat(Lists.transform(fmt.getSplits(job), new Function<InputSplit, Iterator<T>>() {
   @Override
   public Iterator<T> apply(InputSplit split) {
    try {
     RecordReader reader = fmt.createRecordReader(split, ctxt);
     reader.initialize(split, ctxt);
     return new RecordReaderIterator<T>(reader, ptype);
    } catch (Exception e) {
     LOG.error("Error reading split: " + split, e);
     throw new CrunchRuntimeException(e);
    }
   }
  }).iterator());
 } catch (Exception e) {
  LOG.error("Error reading path: " + path, e);
  throw new CrunchRuntimeException(e);
 }
}

@Override
public Iterator<HCatRecord> iterator() {
 try {
  Job job = Job.getInstance(bundle.configure(conf));
  final InputFormat fmt = ReflectionUtils.newInstance(bundle.getFormatClass(), conf);
  final TaskAttemptContext ctxt = new TaskAttemptContextImpl(conf, new TaskAttemptID());
  return Iterators.concat(Lists.transform(fmt.getSplits(job), new Function<InputSplit, Iterator<HCatRecord>>() {
   @Override
   public Iterator<HCatRecord> apply(InputSplit split) {
    RecordReader reader = null;
    try {
     reader = fmt.createRecordReader(split, ctxt);
     reader.initialize(split, ctxt);
    } catch (IOException | InterruptedException e) {
     throw new CrunchRuntimeException(e);
    }
    return new HCatRecordReaderIterator(reader);
   }
  }).iterator());
 } catch (Exception e) {
  throw new CrunchRuntimeException(e);
 }
}

CrunchOutputs.OutputConfig outConfig =
    CrunchOutputs.getNamedOutputs(job.getConfiguration()).get("out0");
job.setOutputFormatClass(outConfig.bundle.getFormatClass());
job.setOutputKeyClass(outConfig.keyClass);
job.setOutputValueClass(outConfig.valueClass);
CrunchOutputs.OutputConfig outConfig =
    CrunchOutputs.getNamedOutputs(job.getConfiguration()).get("out0");
job.setOutputFormatClass(outConfig.bundle.getFormatClass());
job.setOutputKeyClass(outConfig.keyClass);
job.setOutputValueClass(outConfig.valueClass);

CrunchOutputs.OutputConfig outConfig =
    CrunchOutputs.getNamedOutputs(job.getConfiguration()).get("out0");
job.setOutputFormatClass(outConfig.bundle.getFormatClass());
job.setOutputKeyClass(outConfig.keyClass);
job.setOutputValueClass(outConfig.valueClass);
CrunchOutputs.OutputConfig outConfig =
    CrunchOutputs.getNamedOutputs(job.getConfiguration()).get("out0");
job.setOutputFormatClass(outConfig.bundle.getFormatClass());
job.setOutputKeyClass(outConfig.keyClass);
job.setOutputValueClass(outConfig.valueClass);

Popular methods of FormatBundle

Popular in Java

Parsing JSON documents to java classes using gson
onRequestPermissionsResult (Fragment)
getContentResolver (Context)
compareTo (BigDecimal)
IOException (java.io)
Signals a general, I/O-related error. Error details may be specified when calling the constructor, a
Charset (java.nio.charset)
A charset is a named mapping between Unicode characters and byte sequences. Every Charset can decode
HashSet (java.util)
HashSet is an implementation of a Set. All optional operations (adding and removing) are supported.
TimeUnit (java.util.concurrent)
A TimeUnit represents time durations at a given unit of granularity and provides utility methods to
ReentrantLock (java.util.concurrent.locks)
A reentrant mutual exclusion Lock with the same basic behavior and semantics as the implicit monitor
JCheckBox (javax.swing)
Best plugins for Eclipse

How to use getFormatClassmethodin org.apache.crunch.io.FormatBundle

Best Java code snippets using org.apache.crunch.io.FormatBundle.getFormatClass (Showing top 19 results out of 315)

How to use
getFormatClass
method
in
org.apache.crunch.io.FormatBundle