org.kitesdk.data.spi.filesystem.FileSystemDataset.getDescriptor java code examples

public FileSystemViewKeyInputFormat(FileSystemDataset<E> dataset,
  Configuration conf) {
 this.dataset = dataset;
 this.view = null;
 LOG.debug("Dataset: {}", dataset);
 Format format = dataset.getDescriptor().getFormat();
 setConfigProperties(conf, format, dataset.getSchema(), dataset.getType());
}

public FileSystemViewKeyInputFormat(FileSystemView<E> view, Configuration conf) {
 this.dataset = (FileSystemDataset<E>) view.getDataset();
 this.view = view;
 LOG.debug("View: {}", view);
 Format format = dataset.getDescriptor().getFormat();
 setConfigProperties(conf, format, view.getSchema(), view.getType());
}

/**
 * Adds configuration for {@code DatasetKeyOutputFormat} to write to the
 * given {@link Dataset} or {@link View} instance.
 *
 * @param view a dataset or view
 * @return this for method chaining
 */
public ConfigBuilder writeTo(View<?> view) {
 if (view instanceof FileSystemDataset) {
  FileSystemDataset dataset = (FileSystemDataset) view;
  conf.set(KITE_PARTITION_DIR,
    String.valueOf(dataset.getDescriptor().getLocation()));
 }
 withType(view.getType());
 return writeTo(view.getUri());
}

/**
 * Adds configuration for {@code DatasetKeyOutputFormat} to write to the
 * given {@link Dataset} or {@link View} instance.
 *
 * @param view a dataset or view
 * @return this for method chaining
 */
public ConfigBuilder writeTo(View<?> view) {
 if (view instanceof FileSystemDataset) {
  FileSystemDataset dataset = (FileSystemDataset) view;
  conf.set(KITE_PARTITION_DIR,
    String.valueOf(dataset.getDescriptor().getLocation()));
 }
 withType(view.getType());
 return writeTo(view.getUri());
}

@Override
public void merge(FileSystemDataset<E> update) {
 DatasetDescriptor updateDescriptor = update.getDescriptor();

@Override
@SuppressWarnings({"unchecked", "deprecation"})
public List<InputSplit> getSplits(JobContext jobContext) throws IOException {
 Configuration conf = Hadoop.JobContext.getConfiguration.invoke(jobContext);
 Job job = new Job(conf);
 Format format = dataset.getDescriptor().getFormat();
 if (setInputPaths(jobContext, job)) {
  if (Formats.AVRO.equals(format)) {
   AvroJob.setInputKeySchema(job, dataset.getDescriptor().getSchema());
   AvroCombineInputFormat<E> delegate = new AvroCombineInputFormat<E>();
   return delegate.getSplits(jobContext);
  } else if (Formats.PARQUET.equals(format)) {
   AvroParquetCombineInputFormat delegate = new AvroParquetCombineInputFormat();
   return delegate.getSplits(jobContext);
  } else if (Formats.JSON.equals(format)) {
   return new JSONInputFormat().getSplits(jobContext);
  } else if (Formats.CSV.equals(format)) {
   // this generates an unchecked cast exception?
   return new CSVInputFormat().getSplits(jobContext);
  } else if (Formats.INPUTFORMAT.equals(format)) {
   return InputFormatUtil.newInputFormatInstance(dataset.getDescriptor())
     .getSplits(jobContext);
  } else {
   throw new UnsupportedOperationException(
     "Not a supported format: " + format);
  }
 } else {
  return ImmutableList.of();
 }
}

@SuppressWarnings("unchecked")
private RecordReader<E, Void> createUnfilteredRecordReader(InputSplit inputSplit,
  TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
 Format format = dataset.getDescriptor().getFormat();
 if (Formats.AVRO.equals(format)) {
  return new AvroKeyReaderWrapper(new AvroCombineInputFormat<E>());
 } else if (Formats.PARQUET.equals(format)) {
  return new ValueReaderWrapper(new AvroParquetCombineInputFormat());
 } else if (Formats.JSON.equals(format)) {
  JSONInputFormat<E> delegate = new JSONInputFormat<E>();
  delegate.setView(view != null ? view : dataset);
  return delegate.createRecordReader(inputSplit, taskAttemptContext);
 } else if (Formats.CSV.equals(format)) {
  CSVInputFormat<E> delegate = new CSVInputFormat<E>();
  delegate.setView(view != null ? view : dataset);
  return delegate.createRecordReader(inputSplit, taskAttemptContext);
 } else if (Formats.INPUTFORMAT.equals(format)) {
  return InputFormatUtil.newRecordReader(dataset.getDescriptor());
 } else {
  throw new UnsupportedOperationException(
    "Not a supported format: " + format);
 }
}

@Override
@edu.umd.cs.findbugs.annotations.SuppressWarnings(
  value="BC_UNCONFIRMED_CAST_OF_RETURN_VALUE",
  justification="Writer is known to be IncrementalWriter")
public FileSystemWriter.IncrementalWriter<E> load(StorageKey key) throws Exception {
 Preconditions.checkState(view.getDataset() instanceof FileSystemDataset,
   "FileSystemWriters cannot create writer for " + view.getDataset());
 FileSystemDataset dataset = (FileSystemDataset) view.getDataset();
 Path partition = convert.fromKey(key);
 FileSystemWriter<E> writer = FileSystemWriter.newWriter(
   dataset.getFileSystem(),
   new Path(dataset.getDirectory(), partition),
   conf.getRollIntervalMillis(), conf.getTargetFileSize(),
   dataset.getDescriptor(), view.getAccessor().getWriteSchema());
 PartitionListener listener = dataset.getPartitionListener();
 if (listener != null) {
  listener.partitionAdded(
    dataset.getNamespace(), dataset.getName(), partition.toString());
 }
 // initialize the writer after calling the listener
 // this lets the listener decide if and how to create the
 // partition directory
 writer.initialize();
 return (FileSystemWriter.IncrementalWriter<E>) writer;
}

@Override
public FileSystemWriter<E> load(StorageKey key) throws Exception {
 Preconditions.checkState(view.getDataset() instanceof FileSystemDataset,
   "FileSystemWriters cannot create writer for " + view.getDataset());
 FileSystemDataset dataset = (FileSystemDataset) view.getDataset();
 Path partition = convert.fromKey(key);
 FileSystemWriter<E> writer = FileSystemWriter.newWriter(
   dataset.getFileSystem(),
   new Path(dataset.getDirectory(), partition),
   conf.getRollIntervalMillis(), conf.getTargetFileSize(),
   dataset.getDescriptor(), view.getAccessor().getWriteSchema());
 PartitionListener listener = dataset.getPartitionListener();
 if (listener != null) {
  listener.partitionAdded(
    dataset.getNamespace(), dataset.getName(), partition.toString());
 }
 // initialize the writer after calling the listener
 // this lets the listener decide if and how to create the
 // partition directory
 writer.initialize();
 return writer;
}

@Test
public void testUnpartitionedReplace() {
 // recreate temporary without a partition strategy
 Datasets.delete("dataset:file:/tmp/datasets/temporary");
 DatasetDescriptor descriptor = new DatasetDescriptor
   .Builder(unpartitioned.getDescriptor())
   .location((URI) null) // clear the location
   .build();
 temporary = Datasets.create("dataset:file:/tmp/datasets/temporary",
   descriptor, TestRecord.class);
 Assert.assertTrue("Should allow replacing an unpartitioned dataset",
   unpartitioned.canReplace(unpartitioned));
 // make sure there are multiple files
 writeTestRecords(unpartitioned);
 writeTestRecords(unpartitioned);
 writeTestRecords(temporary);
 writeTestRecords(temporary);
 Set<String> originalFiles = Sets.newHashSet(
   Iterators.transform(unpartitioned.pathIterator(), new GetFilename()));
 Set<String> replacementFiles = Sets.newHashSet(
   Iterators.transform(temporary.pathIterator(), new GetFilename()));
 Iterators.transform(temporary.pathIterator(), new GetFilename());
 Assert.assertFalse("Sanity check", originalFiles.equals(replacementFiles));
 unpartitioned.replace(unpartitioned, temporary);
 Set<String> replacedFiles = Sets.newHashSet(
   Iterators.transform(unpartitioned.pathIterator(), new GetFilename()));
 Assert.assertEquals("Should contain the replacement files",
   replacementFiles, replacedFiles);
}

  .build();
Assert.assertTrue("Dataset is partitioned", ds.getDescriptor()
  .isPartitioned());
Assert.assertEquals(partitionStrategy, ds.getDescriptor()
  .getPartitionStrategy());

  .build();
Assert.assertTrue("Dataset is partitioned", ds.getDescriptor()
  .isPartitioned());
Assert.assertEquals(partitionStrategy, ds.getDescriptor()
  .getPartitionStrategy());

  .build();
Assert.assertTrue("Dataset is partitioned", ds.getDescriptor()
 .isPartitioned());
Assert.assertEquals(partitionStrategy, ds.getDescriptor()
 .getPartitionStrategy());

  .build();
Assert.assertTrue("Dataset is partitioned", ds.getDescriptor()
 .isPartitioned());
Assert.assertEquals(partitionStrategy, ds.getDescriptor()
 .getPartitionStrategy());

@Test
public void testWriteAndRead() throws IOException {
 FileSystemDataset<Record> ds = new FileSystemDataset.Builder<Record>()
   .namespace("ns")
   .name("test")
   .configuration(getConfiguration())
   .descriptor(new DatasetDescriptor.Builder()
     .schemaUri(USER_SCHEMA_URL)
     .format(format)
     .compressionType(compressionType)
     .location(testDirectory)
     .build())
   .type(Record.class)
   .build();
 Assert.assertFalse("Dataset is not partitioned", ds.getDescriptor()
  .isPartitioned());
 writeTestUsers(ds, 10);
 checkTestUsers(ds, 10);
}

Popular methods of FileSystemDataset

keyFromDirectory
pathIterator
addExistingPartitions
deleteAll
dirIterator
Returns an iterator that provides all leaf-level directories in this view.
getCoveringPartitions
getDirectory
Returns the closest directory for the given path.
getFileSystem
getPartition
getPartitionView
<init>
canReplace

Popular in Java

Updating database using SQL prepared statement
getSharedPreferences (Context)
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
scheduleAtFixedRate (Timer)
InputStreamReader (java.io)
A class for turning a byte stream into a character stream. Data read from the source input stream is
System (java.lang)
Provides access to system-related information and resources including standard input and output. Ena
SecureRandom (java.security)
This class generates cryptographically secure pseudo-random numbers. It is best to invoke SecureRand
Deque (java.util)
A linear collection that supports element insertion and removal at both ends. The name deque is shor
HashMap (java.util)
HashMap is an implementation of Map. All optional operations are supported.All elements are permitte
Option (scala)
Best IntelliJ plugins

How to use getDescriptormethodin org.kitesdk.data.spi.filesystem.FileSystemDataset

Best Java code snippets using org.kitesdk.data.spi.filesystem.FileSystemDataset.getDescriptor (Showing top 15 results out of 315)

How to use
getDescriptor
method
in
org.kitesdk.data.spi.filesystem.FileSystemDataset