Codota Logo
GuaguaInputSplit
Code IndexAdd Codota to your IDE (free)

How to use
GuaguaInputSplit
in
ml.shifu.guagua.hadoop.io

Best Java code snippets using ml.shifu.guagua.hadoop.io.GuaguaInputSplit (Showing top 20 results out of 315)

  • Add the Codota plugin to your IDE and get smart completions
private void myMethod () {
Dictionary d =
  • Codota Iconnew Hashtable()
  • Codota IconBundle bundle;bundle.getHeaders()
  • Codota Iconnew Properties()
  • Smart code suggestions by Codota
}
origin: ml.shifu/guagua-mapreduce-examples

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
  List<InputSplit> splits = super.getSplits(job);
  List<InputSplit> newSplits = new ArrayList<InputSplit>();
  for(int i = 0; i < job.getConfiguration().getInt(NNConstants.NN_TEST_SCALE, 1); i++) {
    for(InputSplit inputSplit: splits) {
      if(isNotPigOrHadoopMetaFile(((FileSplit) inputSplit).getPath())) {
        newSplits.add(new GuaguaInputSplit(false, new FileSplit[] { (FileSplit) inputSplit }));
      }
    }
  }
  newSplits.add(new GuaguaInputSplit(true, (FileSplit) null));
  int mapperSize = newSplits.size();
  LOG.info("inputs size including master: {}", mapperSize);
  LOG.debug("input splits inclduing: {}", newSplits);
  job.getConfiguration().set(GuaguaConstants.GUAGUA_WORKER_NUMBER, (mapperSize - 1) + "");
  return newSplits;
}
origin: ml.shifu/guagua-yarn

@Override
public void write(DataOutput out) throws IOException {
  out.writeBoolean(this.isMaster());
  if(!this.isMaster()) {
    int length = this.getFileSplits().length;
    out.writeInt(length);
    for(int i = 0; i < length; i++) {
      this.getFileSplits()[i].write(out);
    }
  }
}
origin: ShifuML/shifu

protected void addCrossValidationDataset(List<InputSplit> trainingSplit, JobContext context) throws IOException {
  List<InputSplit> trainingNoMaster = new ArrayList<InputSplit>();
  for(InputSplit split: trainingSplit) {
    GuaguaInputSplit guaguaInput = (GuaguaInputSplit) split;
    if(guaguaInput.isMaster()) {
      continue;
    }
    trainingNoMaster.add(guaguaInput);
  }
  List<List<FileSplit>> csSplits = this.getCrossValidationSplits(context, trainingNoMaster.size());
  for(int i = 0; i < csSplits.size(); i++) {
    List<FileSplit> oneInput = csSplits.get(i);
    GuaguaInputSplit guaguaInput = (GuaguaInputSplit) trainingNoMaster.get(i);
    int trainingSize = guaguaInput.getFileSplits().length;
    FileSplit[] finalSplits = (FileSplit[]) ArrayUtils.addAll(guaguaInput.getFileSplits(),
        oneInput.toArray(new FileSplit[0]));
    guaguaInput.setFileSplits(finalSplits);
    Boolean[] validationFlags = new Boolean[finalSplits.length];
    for(int j = 0; j < finalSplits.length; j++) {
      validationFlags[j] = j < trainingSize ? false : true;
    }
    guaguaInput.setExtensions(validationFlags);
  }
  LOG.info("Training input split size is: {}.", trainingSplit.size());
  LOG.info("Validation input split size is {}.", csSplits.size());
}
origin: ShifuML/guagua

@Override
public void readFields(DataInput in) throws IOException {
  this.setMaster(in.readBoolean());
  if(!isMaster()) {
    int len = in.readInt();
    FileSplit[] splits = new FileSplit[len];
    for(int i = 0; i < len; i++) {
      splits[i] = new FileSplit(null, 0, 0, (String[]) null);
      splits[i].readFields(in);
    }
    this.setFileSplits(splits);
  }
}
origin: ShifuML/guagua

@Override
public void readFields(DataInput in) throws IOException {
  this.setMaster(in.readBoolean());
  if(!isMaster()) {
    int len = in.readInt();
    FileSplit[] splits = new FileSplit[len];
      splits[i].readFields(in);
    this.setFileSplits(splits);
    int extLen = in.readInt();
    if(extLen > 0) {
      this.setExtensions(exts);
origin: ml.shifu/guagua-mapreduce

@Override
protected void setup(Context context) throws java.io.IOException, InterruptedException {
  GuaguaInputSplit inputSplit = (GuaguaInputSplit) context.getInputSplit();
  this.setMaster(inputSplit.isMaster());
  if(this.isMaster()) {
    context.setStatus("Master initializing ...");
    this.setGuaguaService(new GuaguaMasterService<MASTER_RESULT, WORKER_RESULT>());
  } else {
    context.setStatus("Worker initializing ...");
    this.setGuaguaService(new GuaguaWorkerService<MASTER_RESULT, WORKER_RESULT>());
    List<GuaguaFileSplit> splits = new LinkedList<GuaguaFileSplit>();
    for(int i = 0; i < inputSplit.getFileSplits().length; i++) {
      FileSplit fs = inputSplit.getFileSplits()[i];
      GuaguaFileSplit gfs = new GuaguaFileSplit(fs.getPath().toString(), fs.getStart(), fs.getLength());
      if(inputSplit.getExtensions() != null && i < inputSplit.getExtensions().length) {
        gfs.setExtension(inputSplit.getExtensions()[i]);
      }
      splits.add(gfs);
    }
    this.getGuaguaService().setSplits(splits);
  }
  Properties props = replaceConfToProps(context.getConfiguration());
  this.getGuaguaService().setAppId(context.getConfiguration().get(GuaguaMapReduceConstants.MAPRED_JOB_ID));
  this.getGuaguaService().setContainerId(
      context.getConfiguration().get(GuaguaMapReduceConstants.MAPRED_TASK_PARTITION));
  this.getGuaguaService().init(props);
  this.getGuaguaService().start();
}
origin: ml.shifu/guagua-mapreduce

/**
 * Data locality functions, return all hosts for all file splits.
 */
@Override
public String[] getLocations() throws IOException, InterruptedException {
  if(this.getFileSplits() == null || this.getFileSplits().length == 0) {
    return new String[0];
  }
  List<String> hosts = new ArrayList<String>();
  for(FileSplit fileSplit: this.getFileSplits()) {
    if(fileSplit != null) {
      hosts.addAll(Arrays.asList(fileSplit.getLocations()));
    }
  }
  return hosts.toArray(new String[0]);
}
origin: ml.shifu/guagua-mapreduce

@Override
public void readFields(DataInput in) throws IOException {
  this.setMaster(in.readBoolean());
  if(!isMaster()) {
    int len = in.readInt();
    FileSplit[] splits = new FileSplit[len];
      splits[i].readFields(in);
    this.setFileSplits(splits);
    int extLen = in.readInt();
    if(extLen > 0) {
      this.setExtensions(exts);
origin: ml.shifu/guagua-yarn

@Override
public void readFields(DataInput in) throws IOException {
  this.setMaster(in.readBoolean());
  if(!isMaster()) {
    int len = in.readInt();
    FileSplit[] splits = new FileSplit[len];
    for(int i = 0; i < len; i++) {
      splits[i] = new FileSplit(null, 0, 0, (String[]) null);
      splits[i].readFields(in);
    }
    this.setFileSplits(splits);
  }
}
origin: ShifuML/guagua

@Override
protected void setup(Context context) throws java.io.IOException, InterruptedException {
  GuaguaInputSplit inputSplit = (GuaguaInputSplit) context.getInputSplit();
  this.setMaster(inputSplit.isMaster());
  if(this.isMaster()) {
    context.setStatus("Master initializing ...");
    this.setGuaguaService(new GuaguaMasterService<MASTER_RESULT, WORKER_RESULT>());
  } else {
    context.setStatus("Worker initializing ...");
    this.setGuaguaService(new GuaguaWorkerService<MASTER_RESULT, WORKER_RESULT>());
    List<GuaguaFileSplit> splits = new LinkedList<GuaguaFileSplit>();
    for(int i = 0; i < inputSplit.getFileSplits().length; i++) {
      FileSplit fs = inputSplit.getFileSplits()[i];
      GuaguaFileSplit gfs = new GuaguaFileSplit(fs.getPath().toString(), fs.getStart(), fs.getLength());
      if(inputSplit.getExtensions() != null && i < inputSplit.getExtensions().length) {
        gfs.setExtension(inputSplit.getExtensions()[i]);
      }
      splits.add(gfs);
    }
    this.getGuaguaService().setSplits(splits);
  }
  Properties props = replaceConfToProps(context.getConfiguration());
  this.getGuaguaService().setAppId(context.getConfiguration().get(GuaguaMapReduceConstants.MAPRED_JOB_ID));
  this.getGuaguaService().setContainerId(
      context.getConfiguration().get(GuaguaMapReduceConstants.MAPRED_TASK_PARTITION));
  this.getGuaguaService().init(props);
  this.getGuaguaService().start();
}
origin: ShifuML/guagua

/**
 * Data locality functions, return all hosts for all file splits.
 */
@Override
public String[] getLocations() throws IOException, InterruptedException {
  if(this.getFileSplits() == null || this.getFileSplits().length == 0) {
    return new String[0];
  }
  List<String> hosts = new ArrayList<String>();
  for(FileSplit fileSplit: this.getFileSplits()) {
    if(fileSplit != null) {
      hosts.addAll(Arrays.asList(fileSplit.getLocations()));
    }
  }
  return hosts.toArray(new String[0]);
}
origin: ShifuML/guagua

@Override
public void write(DataOutput out) throws IOException {
  out.writeBoolean(this.isMaster());
  if(!this.isMaster()) {
    int length = this.getFileSplits().length;
    out.writeInt(length);
    for(int i = 0; i < length; i++) {
      this.getFileSplits()[i].write(out);
    }
  }
}
origin: ml.shifu/guagua-yarn

/**
 * Copy from pig implementation, need to check this code logic.
 */
public static List<InputSplit> getFinalCombineGuaguaSplits(List<InputSplit> newSplits, long combineSize)
    throws IOException {
  List<List<InputSplit>> combinePigSplits;
  try {
    combinePigSplits = getCombineGuaguaSplits(newSplits, combineSize);
  } catch (InterruptedException e) {
    throw new GuaguaRuntimeException(e);
  }
  newSplits = new ArrayList<InputSplit>();
  for(List<InputSplit> inputSplits: combinePigSplits) {
    FileSplit[] fss = new FileSplit[inputSplits.size()];
    for(int i = 0; i < inputSplits.size(); i++) {
      fss[i] = (FileSplit) (inputSplits.get(i));
    }
    newSplits.add(new GuaguaInputSplit(false, fss));
  }
  return newSplits;
}
origin: ml.shifu/guagua-yarn

/**
 * Data locality functions, return all hosts for all file splits.
 */
@Override
public String[] getLocations() throws IOException, InterruptedException {
  if(this.getFileSplits() == null || this.getFileSplits().length == 0) {
    return new String[0];
  }
  List<String> hosts = new ArrayList<String>();
  for(FileSplit fileSplit: this.getFileSplits()) {
    if(fileSplit != null) {
      hosts.addAll(Arrays.asList(fileSplit.getLocations()));
    }
  }
  return hosts.toArray(new String[0]);
}
origin: ml.shifu/guagua-mapreduce

/**
 * For master split, use <code>Long.MAX_VALUE</code> as its length to make it is the first task for Hadoop job. It
 * is convenient for users to check master in Hadoop UI.
 */
@Override
public long getLength() throws IOException, InterruptedException {
  if(isMaster()) {
    return Long.MAX_VALUE;
  }
  long len = 0;
  for(FileSplit split: this.getFileSplits()) {
    len += split.getLength();
  }
  return len;
}
origin: ml.shifu/guagua-mapreduce

/**
 * Copy from pig implementation, need to check this code logic.
 */
protected List<InputSplit> getFinalCombineGuaguaSplits(List<InputSplit> newSplits, long combineSize)
    throws IOException {
  List<List<InputSplit>> combinePigSplits;
  try {
    combinePigSplits = getCombineGuaguaSplits(newSplits, combineSize);
  } catch (InterruptedException e) {
    Thread.currentThread().interrupt();
    throw new GuaguaRuntimeException(e);
  }
  newSplits = new ArrayList<InputSplit>();
  for(List<InputSplit> inputSplits: combinePigSplits) {
    FileSplit[] fss = new FileSplit[inputSplits.size()];
    for(int i = 0; i < inputSplits.size(); i++) {
      fss[i] = (FileSplit) (inputSplits.get(i));
    }
    newSplits.add(new GuaguaInputSplit(false, fss));
  }
  return newSplits;
}
origin: ShifuML/guagua

/**
 * Data locality functions, return all hosts for all file splits.
 */
@Override
public String[] getLocations() throws IOException, InterruptedException {
  if(this.getFileSplits() == null || this.getFileSplits().length == 0) {
    return new String[0];
  }
  List<String> hosts = new ArrayList<String>();
  for(FileSplit fileSplit: this.getFileSplits()) {
    if(fileSplit != null) {
      hosts.addAll(Arrays.asList(fileSplit.getLocations()));
    }
  }
  return hosts.toArray(new String[0]);
}
origin: ml.shifu/guagua-yarn

/**
 * For master split, use <code>Long.MAX_VALUE</code> as its length to make it is the first task for Hadoop job. It
 * is convenient for users to check master in Hadoop UI.
 */
@Override
public long getLength() throws IOException, InterruptedException {
  if(isMaster()) {
    return Long.MAX_VALUE;
  }
  long len = 0;
  for(FileSplit split: this.getFileSplits()) {
    len += split.getLength();
  }
  return len;
}
origin: ShifuML/guagua

/**
 * Copy from pig implementation, need to check this code logic.
 */
protected List<InputSplit> getFinalCombineGuaguaSplits(List<InputSplit> newSplits, long combineSize)
    throws IOException {
  List<List<InputSplit>> combinePigSplits;
  try {
    combinePigSplits = getCombineGuaguaSplits(newSplits, combineSize);
  } catch (InterruptedException e) {
    Thread.currentThread().interrupt();
    throw new GuaguaRuntimeException(e);
  }
  newSplits = new ArrayList<InputSplit>();
  for(List<InputSplit> inputSplits: combinePigSplits) {
    FileSplit[] fss = new FileSplit[inputSplits.size()];
    for(int i = 0; i < inputSplits.size(); i++) {
      fss[i] = (FileSplit) (inputSplits.get(i));
    }
    newSplits.add(new GuaguaInputSplit(false, fss));
  }
  return newSplits;
}
origin: ml.shifu/guagua-yarn

GuaguaInputSplit inputSplit = (GuaguaInputSplit) (this.inputSplits.get(currentPartition - 1));
String host = null;
FileSplit[] fileSplits = inputSplit.getFileSplits();
if(fileSplits != null) {
  try {
ml.shifu.guagua.hadoop.ioGuaguaInputSplit

Javadoc

InputSplit implementation in guagua for Hadoop MapReduce job.

If mapper with GuaguaInputSplit#isMaster true means it is master, for master so far #fileSplits is null.

For worker, input #fileSplits are included, here FileSplit array is used to make guagua support combining FileSplits in one task.

Most used methods

  • <init>
    Constructor with #isMaster and #fileSplits settings.
  • getFileSplits
  • isMaster
  • setFileSplits
  • setExtensions
  • setMaster
  • getExtensions

Popular in Java

  • Start an intent from android
  • setScale (BigDecimal)
  • getOriginalFilename (MultipartFile)
    Return the original filename in the client's filesystem.This may contain path information depending
  • getExternalFilesDir (Context)
  • FlowLayout (java.awt)
    A flow layout arranges components in a left-to-right flow, much like lines of text in a paragraph. F
  • EOFException (java.io)
    Thrown when a program encounters the end of a file or stream during an input operation.
  • Permission (java.security)
    Abstract class for representing access to a system resource. All permissions have a name (whose inte
  • Reference (javax.naming)
  • Filter (javax.servlet)
    A filter is an object that performs filtering tasks on either the request to a resource (a servlet o
  • XPath (javax.xml.xpath)
    XPath provides access to the XPath evaluation environment and expressions. Evaluation of XPath Expr
Codota Logo
  • Products

    Search for Java codeSearch for JavaScript codeEnterprise
  • IDE Plugins

    IntelliJ IDEAWebStormAndroid StudioEclipseVisual Studio CodePyCharmSublime TextPhpStormVimAtomGoLandRubyMineEmacsJupyter
  • Company

    About UsContact UsCareers
  • Resources

    FAQBlogCodota Academy Plugin user guide Terms of usePrivacy policyJava Code IndexJavascript Code Index
Get Codota for your IDE now