org.kitesdk.data.DatasetDescriptor.isPartitioned java code examples

/**
 * Get the {@link PartitionStrategy}, if this dataset is partitioned. Calling
 * this method on a non-partitioned dataset is an error. Instead, use the
 * {@link #isPartitioned()} method prior to invocation.
 */
public PartitionStrategy getPartitionStrategy() {
 Preconditions
   .checkState(
     isPartitioned(),
     "Attempt to retrieve the partition strategy on a non-partitioned descriptor:%s",
     this);
 return partitionStrategy;
}

public void addExistingPartitions() {
 if (partitionListener != null && descriptor.isPartitioned()) {
  for (Iterator<Path> i = dirIterator(); i.hasNext(); ) {
   Path partition = i.next();
   LOG.info("Adding partition {}", partition);
   partitionListener.partitionAdded(namespace, name, partition.toString());
  }
 }
}

@Override
@SuppressWarnings("deprecation")
public void dropPartition(PartitionKey key) {
 Preconditions.checkState(descriptor.isPartitioned(),
  "Attempt to drop a partition on a non-partitioned dataset (name:%s)",
  name);
 Preconditions.checkNotNull(key, "Partition key may not be null");
 LOG.debug("Dropping partition with key:{} dataset:{}", key, name);
 Path partitionDirectory = toDirectoryName(directory, key);
 try {
  if (!fileSystem.delete(partitionDirectory, true)) {
   throw new IOException("Partition directory " + partitionDirectory
    + " for key " + key + " does not exist");
  }
 } catch (IOException e) {
  throw new DatasetIOException("Unable to locate or drop dataset partition directory " + partitionDirectory, e);
 }
}

PathIterator pathIterator() {
 if (dataset.getDescriptor().isPartitioned()) {
  return new PathIterator(fs, root, partitionIterator());
 } else {
  return new PathIterator(fs, root, null);
 }
}

@Override
public void setConf(Configuration configuration) {
 conf = configuration;
 View<E> view = load(configuration);
 String partitionDir = conf.get(KITE_PARTITION_DIR);
 if (view.getDataset().getDescriptor().isPartitioned() && partitionDir != null) {
  delegate = getDelegateInputFormatForPartition(view.getDataset(), partitionDir, conf);
 } else {
  delegate = getDelegateInputFormat(view, conf);
 }
}

@Override
public void setConf(Configuration configuration) {
 conf = configuration;
 View<E> view = load(configuration);
 String partitionDir = conf.get(KITE_PARTITION_DIR);
 if (view.getDataset().getDescriptor().isPartitioned() && partitionDir != null) {
  delegate = getDelegateInputFormatForPartition(view.getDataset(), partitionDir, conf);
 } else {
  delegate = getDelegateInputFormat(view, conf);
 }
}

private static <E> View<E> loadOrCreateTaskAttemptView(TaskAttemptContext taskContext) {
 Configuration conf = Hadoop.JobContext.getConfiguration.invoke(taskContext);
 Map<String, String> uriOptions = Registration.lookupDatasetUri(
   URI.create(URI.create(conf.get(KITE_OUTPUT_URI)).getSchemeSpecificPart())).second();
 Dataset<E> dataset = loadOrCreateTaskAttemptDataset(taskContext);
 if (dataset instanceof AbstractDataset) {
  DatasetDescriptor descriptor = dataset.getDescriptor();
  Schema schema = descriptor.getSchema();
  PartitionStrategy strategy = null;
  if (descriptor.isPartitioned()) {
   strategy = descriptor.getPartitionStrategy();
  }
  Constraints constraints = Constraints.fromQueryMap(
    schema, strategy, uriOptions);
  return ((AbstractDataset<E>) dataset).filter(constraints);
 } else {
  return dataset;
 }
}

private static <E> View<E> loadOrCreateTaskAttemptView(TaskAttemptContext taskContext) {
 Configuration conf = Hadoop.JobContext.getConfiguration.invoke(taskContext);
 Map<String, String> uriOptions = Registration.lookupDatasetUri(
   URI.create(URI.create(conf.get(KITE_OUTPUT_URI)).getSchemeSpecificPart())).second();
 Dataset<E> dataset = loadOrCreateTaskAttemptDataset(taskContext);
 if (dataset instanceof AbstractDataset) {
  DatasetDescriptor descriptor = dataset.getDescriptor();
  Schema schema = descriptor.getSchema();
  PartitionStrategy strategy = null;
  if (descriptor.isPartitioned()) {
   strategy = descriptor.getPartitionStrategy();
  }
  Constraints constraints = Constraints.fromQueryMap(
    schema, strategy, uriOptions);
  return ((AbstractDataset<E>) dataset).filter(constraints);
 } else {
  return dataset;
 }
}

public static void checkPartitionedBy(DatasetDescriptor descriptor,
                   String fieldName) {
 Preconditions.checkArgument(descriptor.isPartitioned(),
   "Descriptor %s is not partitioned", descriptor);
 Preconditions.checkArgument(
   Accessor.getDefault().hasPartitioner(descriptor.getPartitionStrategy(), fieldName),
   "Descriptor %s is not partitioned by '%s'", descriptor, fieldName);
}

public Builder(Dataset dataset) {
 if (!dataset.getDescriptor().isPartitioned()) {
  throw new DatasetException("Dataset is not partitioned");
 }
 this.strategy = dataset.getDescriptor().getPartitionStrategy();
 this.values = Maps.newHashMap();
}

/**
 * Returns an iterator that provides all leaf-level directories in this view.
 *
 * @return leaf-directory iterator
 */
Iterator<Path> dirIterator() {
 if (dataset.getDescriptor().isPartitioned()) {
  return Iterators.transform(partitionIterator(), new Function<StorageKey, Path>() {
   @Override
   @edu.umd.cs.findbugs.annotations.SuppressWarnings(
     value="NP_PARAMETER_MUST_BE_NONNULL_BUT_MARKED_AS_NULLABLE",
     justification="False positive, initialized above as non-null.")
   public Path apply(@Nullable StorageKey key) {
    return new Path(root, key.getPath());
   }
  });
 } else {
  return Iterators.singletonIterator(root);
 }
}

 @SuppressWarnings("unchecked")
 private static <E, V extends View<E>> V view(Dataset<E> dataset,
                        Map<String, String> uriOptions) {
  if (dataset instanceof AbstractDataset) {
   DatasetDescriptor descriptor = dataset.getDescriptor();
   Schema schema = descriptor.getSchema();
   PartitionStrategy strategy = null;
   if (descriptor.isPartitioned()) {
    strategy = descriptor.getPartitionStrategy();
   }
   Constraints constraints = Constraints.fromQueryMap(
     schema, strategy, uriOptions);
   return (V) ((AbstractDataset) dataset).filter(constraints);
  } else {
   return (V) dataset;
  }
 }
}

private static Schema getEmbeddedSchema(DatasetDescriptor descriptor) {
 // the SchemaManager stores schemas, so this embeds the column mapping and
 // partition strategy in the schema. the result is parsed by
 // AvroKeyEntitySchemaParser
 Schema schema = descriptor.getSchema();
 if (descriptor.isColumnMapped()) {
  schema = ColumnMappingParser
    .embedColumnMapping(schema, descriptor.getColumnMapping());
 }
 if (descriptor.isPartitioned()) {
  schema = PartitionStrategyParser
    .embedPartitionStrategy(schema, descriptor.getPartitionStrategy());
 }
 return schema;
}

private static Schema getEmbeddedSchema(DatasetDescriptor descriptor) {
 // the SchemaManager stores schemas, so this embeds the column mapping and
 // partition strategy in the schema. the result is parsed by
 // AvroKeyEntitySchemaParser
 Schema schema = descriptor.getSchema();
 if (descriptor.isColumnMapped()) {
  schema = ColumnMappingParser
    .embedColumnMapping(schema, descriptor.getColumnMapping());
 }
 if (descriptor.isPartitioned()) {
  schema = PartitionStrategyParser
    .embedPartitionStrategy(schema, descriptor.getPartitionStrategy());
 }
 return schema;
}

@Override
public DatasetWriter<E> newWriter() {
 checkSchemaForWrite();
 AbstractDatasetWriter<E> writer;
 if (dataset.getDescriptor().isPartitioned()) {
  writer = PartitionedDatasetWriter.newWriter(this);
 } else {
  writer = FileSystemWriter.newWriter(
    fs, root, -1, -1 /* get from descriptor */, dataset.getDescriptor(), this.getAccessor().getWriteSchema());
 }
 writer.initialize();
 return writer;
}

@Override
public <E> Dataset<E> load(String namespace, String name, Class<E> type) {
 Preconditions.checkNotNull(namespace, "Namespace cannot be null");
 Preconditions.checkNotNull(name, "Dataset name cannot be null");
 LOG.debug("Loading dataset: {}", name);
 DatasetDescriptor descriptor = metadataProvider.load(namespace, name);
 FileSystemDataset<E> ds = new FileSystemDataset.Builder<E>()
   .namespace(namespace)
   .name(name)
   .configuration(conf)
   .descriptor(descriptor)
   .type(type)
   .uri(new URIBuilder(getUri(), namespace, name).build())
   .partitionKey(descriptor.isPartitioned() ? new PartitionKey() : null)
   .partitionListener(getPartitionListener())
   .build();
 LOG.debug("Loaded dataset:{}", ds);
 return ds;
}

boolean deleteAllUnsafe(boolean useTrash) {
 boolean deleted = false;
 if (dataset.getDescriptor().isPartitioned()) {
  for (StorageKey key : partitionIterator()) {
   deleted = (useTrash ? FileSystemUtil.cleanlyMoveToTrash(fs, root, key.getPath())
     : FileSystemUtil.cleanlyDelete(fs, root, key.getPath())) || deleted;
   if (listener != null) {
    // the relative path is the partition name, so we can simply delete it
    // in Hive
    listener.partitionDeleted(dataset.getNamespace(),
      dataset.getName(), key.getPath().toString());
   }
  }
 }
 else {
  for (Path path : pathIterator()) {
   deleted = (useTrash ? FileSystemUtil.cleanlyMoveToTrash(fs, root, path)
     : FileSystemUtil.cleanlyDelete(fs, root, path)) || deleted;
  }
 }
 return deleted;
}

@Test
public void testMultipleAvroFilesInOneFolder() throws Exception {
 File folder = temp.newFolder("a/b/c/d/e");
 Path root = new Path(temp.getRoot().toURI());
 FileSystem fs = LocalFileSystem.getInstance();
 // create a two Avro files in parent
 Path parent = new Path(folder.toURI());
 createAvroUserFile(fs, parent);
 createAvroUserFile(fs, parent);
 DatasetDescriptor descriptor = Iterables.getOnlyElement(
   FileSystemUtil.findPotentialDatasets(fs, root));
 Assert.assertFalse("Should not flag at mixed depth",
   descriptor.hasProperty("kite.filesystem.mixed-depth"));
 Assert.assertEquals("Should be directly under parent",
   parent.toUri(), descriptor.getLocation());
 Assert.assertEquals("Should use user schema",
   USER_SCHEMA, descriptor.getSchema());
 Assert.assertEquals("Should have Avro format",
   Formats.AVRO, descriptor.getFormat());
 Assert.assertFalse("Should not be partitioned", descriptor.isPartitioned());
}

@Test
public void testMultipleParquetFilesInOneFolder() throws Exception {
 File folder = temp.newFolder("a/b/c/d/e");
 Path root = new Path(temp.getRoot().toURI());
 FileSystem fs = LocalFileSystem.getInstance();
 // create a single Avro file
 Path parent = new Path(folder.toURI());
 createParquetEventFile(fs, parent);
 createParquetEventFile(fs, parent);
 DatasetDescriptor descriptor = Iterables.getOnlyElement(
   FileSystemUtil.findPotentialDatasets(fs, root));
 Assert.assertFalse("Should not flag at mixed depth",
   descriptor.hasProperty("kite.filesystem.mixed-depth"));
 Assert.assertEquals("Should be directly under parent",
   parent.toUri(), descriptor.getLocation());
 Assert.assertEquals("Should use event schema",
   EVENT_SCHEMA, descriptor.getSchema());
 Assert.assertEquals("Should have Parquet format",
   Formats.PARQUET, descriptor.getFormat());
 Assert.assertFalse("Should not be partitioned", descriptor.isPartitioned());
}

@Test
public void testWriteAndRead() throws IOException {
 FileSystemDataset<Record> ds = new FileSystemDataset.Builder<Record>()
   .namespace("ns")
   .name("test")
   .configuration(getConfiguration())
   .descriptor(new DatasetDescriptor.Builder()
     .schemaUri(USER_SCHEMA_URL)
     .format(format)
     .compressionType(compressionType)
     .location(testDirectory)
     .build())
   .type(Record.class)
   .build();
 Assert.assertFalse("Dataset is not partitioned", ds.getDescriptor()
  .isPartitioned());
 writeTestUsers(ds, 10);
 checkTestUsers(ds, 10);
}

Javadoc

Returns true if an associated dataset is partitioned (that is, has an associated PartitionStrategy), false otherwise.

Popular methods of DatasetDescriptor

getSchema
getPartitionStrategy
getFormat
getLocation
getProperty
listProperties
getColumnMapping
getSchemaUrl
hasProperty
isColumnMapped
getCompressionType
<init>
Create an instance of this class with the supplied Schema, optional URL, Format, optional location U

Popular in Java

Updating database using SQL prepared statement
getSystemService (Context)
onRequestPermissionsResult (Fragment)
startActivity (Activity)
FileReader (java.io)
A specialized Reader that reads from a file in the file system. All read requests made by calling me
BigInteger (java.math)
An immutable arbitrary-precision signed integer.FAST CRYPTOGRAPHY This implementation is efficient f
SQLException (java.sql)
An exception that indicates a failed JDBC operation. It provides the following information about pro
BufferedImage (java.awt.image)
The BufferedImage subclass describes an java.awt.Image with an accessible buffer of image data. All
Modifier (javassist)
The Modifier class provides static methods and constants to decode class and member access modifiers
BoxLayout (javax.swing)
Top plugins for WebStorm

How to use isPartitionedmethodin org.kitesdk.data.DatasetDescriptor

Best Java code snippets using org.kitesdk.data.DatasetDescriptor.isPartitioned (Showing top 20 results out of 315)

How to use
isPartitioned
method
in
org.kitesdk.data.DatasetDescriptor