org.kitesdk.data.mapreduce.DatasetKeyOutputFormat.load java code examples

private static DatasetRepository getDatasetRepository(JobContext jobContext) {
 Configuration conf = Hadoop.JobContext.getConfiguration.invoke(jobContext);
 DatasetRepository repo = DatasetRepositories.repositoryFor(conf.get(KITE_OUTPUT_URI));
 if (repo instanceof TemporaryDatasetRepositoryAccessor) {
  Dataset<Object> dataset = load(jobContext).getDataset();
  String namespace = dataset.getNamespace();
  repo = ((TemporaryDatasetRepositoryAccessor) repo)
    .getTemporaryRepository(namespace, getJobDatasetName(jobContext));
 }
 return repo;
}

@Override
@SuppressWarnings("unchecked")
public RecordWriter<E, Void> getRecordWriter(TaskAttemptContext taskAttemptContext) {
 Configuration conf = Hadoop.TaskAttemptContext
   .getConfiguration.invoke(taskAttemptContext);
 View<E> target = load(taskAttemptContext);
 View<E> working;
 if (usePerTaskAttemptDatasets(target, conf)) {
  working = loadOrCreateTaskAttemptView(taskAttemptContext);
 } else {
  working = target;
 }
 boolean copyRecords = conf.getBoolean(KITE_COPY_RECORDS, false);
 String partitionDir = conf.get(KITE_PARTITION_DIR);
 if (working.getDataset().getDescriptor().isPartitioned() &&
   partitionDir != null) {
  if (!(target instanceof FileSystemDataset)) {
   throw new UnsupportedOperationException("Partitions only supported for " +
     "FileSystemDataset. Dataset: " + target);
  }
  FileSystemDataset fsDataset = (FileSystemDataset) target;
  PartitionKey key = fsDataset.keyFromDirectory(new Path(partitionDir));
  if (key != null && !key.getValues().isEmpty()) {
   working = fsDataset.getPartition(key, true);
  }
  return new DatasetRecordWriter<E>(working, copyRecords);
 } else {
  return new DatasetRecordWriter<E>(working, copyRecords);
 }
}

private static DatasetRepository getDatasetRepository(JobContext jobContext) {
 Configuration conf = Hadoop.JobContext.getConfiguration.invoke(jobContext);
 DatasetRepository repo = DatasetRepositories.repositoryFor(conf.get(KITE_OUTPUT_URI));
 if (repo instanceof TemporaryDatasetRepositoryAccessor) {
  Dataset<Object> dataset = load(jobContext).getDataset();
  String namespace = dataset.getNamespace();
  repo = ((TemporaryDatasetRepositoryAccessor) repo)
    .getTemporaryRepository(namespace, getJobDatasetName(jobContext));
 }
 return repo;
}

@Override
@SuppressWarnings("unchecked")
public RecordWriter<E, Void> getRecordWriter(TaskAttemptContext taskAttemptContext) {
 Configuration conf = Hadoop.TaskAttemptContext
   .getConfiguration.invoke(taskAttemptContext);
 View<E> target = load(taskAttemptContext);
 View<E> working;
 if (usePerTaskAttemptDatasets(target)) {
  working = loadOrCreateTaskAttemptView(taskAttemptContext);
 } else {
  working = target;
 }
 boolean copyRecords = conf.getBoolean(KITE_COPY_RECORDS, false);
 String partitionDir = conf.get(KITE_PARTITION_DIR);
 if (working.getDataset().getDescriptor().isPartitioned() &&
   partitionDir != null) {
  if (!(target instanceof FileSystemDataset)) {
   throw new UnsupportedOperationException("Partitions only supported for " +
     "FileSystemDataset. Dataset: " + target);
  }
  FileSystemDataset fsDataset = (FileSystemDataset) target;
  PartitionKey key = fsDataset.keyFromDirectory(new Path(partitionDir));
  if (key != null && !key.getValues().isEmpty()) {
   working = fsDataset.getPartition(key, true);
  }
  return new DatasetRecordWriter<E>(working, copyRecords);
 } else {
  return new DatasetRecordWriter<E>(working, copyRecords);
 }
}

@Override
public OutputCommitter getOutputCommitter(TaskAttemptContext taskAttemptContext) {
 Configuration conf = Hadoop.TaskAttemptContext.getConfiguration.invoke(taskAttemptContext);
 DefaultConfiguration.init(conf);
 View<E> view = load(taskAttemptContext);
 return usePerTaskAttemptDatasets(view, conf) ?
   new MergeOutputCommitter<E>() : new NullOutputCommitter();
}

@Override
public OutputCommitter getOutputCommitter(TaskAttemptContext taskAttemptContext) {
 Configuration conf = Hadoop.TaskAttemptContext.getConfiguration.invoke(taskAttemptContext);
 DefaultConfiguration.init(conf);
 View<E> view = load(taskAttemptContext);
 return usePerTaskAttemptDatasets(view) ?
   new MergeOutputCommitter<E>() : new NullOutputCommitter();
}

@Override
public void checkOutputSpecs(JobContext jobContext) {
 // The committer setup will fail if the output dataset does not exist
 View<E> target = load(jobContext);
 Configuration conf = Hadoop.JobContext.getConfiguration.invoke(jobContext);
 switch (conf.getEnum(KITE_WRITE_MODE, WriteMode.DEFAULT)) {
  case APPEND:
   break;
  case OVERWRITE:
   // if the merge won't use replace, then delete the existing data
   if (!canReplace(target)) {
    target.deleteAll();
   }
   break;
  default:
  case DEFAULT:
   boolean isReady = false;
   if (target instanceof Signalable) {
    isReady = ((Signalable)target).isReady();
   }
   if (isReady || !target.isEmpty()) {
    throw new DatasetException(
      "View is not empty or has been signaled as ready: " + target);
   }
   break;
 }
}

@Override
public void checkOutputSpecs(JobContext jobContext) {
 // The committer setup will fail if the output dataset does not exist
 View<E> target = load(jobContext);
 Configuration conf = Hadoop.JobContext.getConfiguration.invoke(jobContext);
 switch (conf.getEnum(KITE_WRITE_MODE, WriteMode.DEFAULT)) {
  case APPEND:
   break;
  case OVERWRITE:
   // if the merge won't use replace, then delete the existing data
   if (!canReplace(target)) {
    target.deleteAll();
   }
   break;
  default:
  case DEFAULT:
   boolean isReady = false;
   if (target instanceof Signalable) {
    isReady = ((Signalable)target).isReady();
   }
   if (isReady || !target.isEmpty()) {
    throw new DatasetException(
      "View is not empty or has been signaled as ready: " + target);
   }
   break;
 }
}

/**
 * The job dataset may already exist if the ApplicationMaster was restarted
 */
@SuppressWarnings("unchecked")
private static <E> Dataset<E> loadOrCreateJobDataset(JobContext jobContext) {
 Dataset<Object> dataset = load(jobContext).getDataset();
 String jobDatasetName = getJobDatasetName(jobContext);
 DatasetRepository repo = getDatasetRepository(jobContext);
 if (repo.exists(TEMP_NAMESPACE, jobDatasetName)) {
  Dataset<E> tempDataset = repo.load(TEMP_NAMESPACE, jobDatasetName,
   DatasetKeyOutputFormat.<E>getType(jobContext));
  try {
   Compatibility.checkCompatible(dataset.getDescriptor(),
    tempDataset.getDescriptor());
   return tempDataset;
  } catch (RuntimeException ex) {
   // swallow
  }
 }
 return repo.create(TEMP_NAMESPACE, jobDatasetName,
   copy(dataset.getDescriptor()),
   DatasetKeyOutputFormat.<E>getType(jobContext));
}

/**
 * The job dataset may already exist if the ApplicationMaster was restarted
 */
@SuppressWarnings("unchecked")
private static <E> Dataset<E> loadOrCreateJobDataset(JobContext jobContext) {
 Dataset<Object> dataset = load(jobContext).getDataset();
 String jobDatasetName = getJobDatasetName(jobContext);
 DatasetRepository repo = getDatasetRepository(jobContext);
 if (repo.exists(TEMP_NAMESPACE, jobDatasetName)) {
  Dataset<E> tempDataset = repo.load(TEMP_NAMESPACE, jobDatasetName,
   DatasetKeyOutputFormat.<E>getType(jobContext));
  try {
   Compatibility.checkCompatible(dataset.getDescriptor(),
    tempDataset.getDescriptor());
   return tempDataset;
  } catch (RuntimeException ex) {
   // swallow
  }
 }
 return repo.create(TEMP_NAMESPACE, jobDatasetName,
   copy(dataset.getDescriptor()),
   DatasetKeyOutputFormat.<E>getType(jobContext));
}

Javadoc

The job dataset may already exist if the ApplicationMaster was restarted

Popular methods of DatasetKeyOutputFormat

configure
Configures the Job to use the DatasetKeyOutputFormat and returns a helper to add further configurati
canReplace
copy
getDatasetRepository
getJobDatasetName
getTaskAttemptDatasetName
getType
loadJobDataset
loadOrCreateTaskAttemptDataset
loadOrCreateTaskAttemptView
usePerTaskAttemptDatasets

usePerTaskAttemptDatasets

Popular in Java

Making http requests using okhttp
getExternalFilesDir (Context)
getContentResolver (Context)
getSystemService (Context)
EOFException (java.io)
Thrown when a program encounters the end of a file or stream during an input operation.
ByteBuffer (java.nio)
A buffer for bytes. A byte buffer can be created in either one of the following ways: * #allocate
Permission (java.security)
Legacy security code; do not use.
TimerTask (java.util)
The TimerTask class represents a task to run at a specified time. The task may be run once or repeat
DateTimeFormat (org.joda.time.format)
Factory that creates instances of DateTimeFormatter from patterns and styles. Datetime formatting i
FlowLayout (java.awt)
A flow layout arranges components in a left-to-right flow, much like lines of text in a paragraph. F
CodeWhisperer alternatives

How to use loadmethodin org.kitesdk.data.mapreduce.DatasetKeyOutputFormat

Best Java code snippets using org.kitesdk.data.mapreduce.DatasetKeyOutputFormat.load (Showing top 10 results out of 315)

How to use
load
method
in
org.kitesdk.data.mapreduce.DatasetKeyOutputFormat