/** * Returns whether the value of the descriptor property is {@code true}. * * @param property a String property name * @param descriptor a {@link DatasetDescriptor} * @return {@code true} if set and "true", {@code false} otherwise. */ public static boolean isEnabled(String property, DatasetDescriptor descriptor) { if (descriptor.hasProperty(property)) { // return true if and only if the property value is "true" return Boolean.valueOf(descriptor.getProperty(property)); } return false; }
/** * Returns whether the value of the descriptor property is {@code false}. * * @param property a String property name * @param descriptor a {@link DatasetDescriptor} * @return {@code true} if set and "false", {@code false} otherwise. */ public static boolean isDisabled(String property, DatasetDescriptor descriptor) { if (descriptor.hasProperty(property)) { // return true if and only if the property value is "false" return !Boolean.valueOf(descriptor.getProperty(property)); } return false; }
public InputFormatReader(FileSystem fs, Path path, DatasetDescriptor descriptor) { this.fs = fs; this.path = path; this.descriptor = descriptor; this.state = ReaderWriterState.NEW; // set up the configuration from the descriptor properties this.conf = new Configuration(fs.getConf()); for (String prop : descriptor.listProperties()) { conf.set(prop, descriptor.getProperty(prop)); } this.attemptContext = Hadoop.TaskAttemptContext.ctor.newInstance(conf, FAKE_ID); }
private CSVProperties(DatasetDescriptor descriptor) { this.charset = coalesce( descriptor.getProperty(CHARSET_PROPERTY), descriptor.getProperty(OLD_CHARSET_PROPERTY), DEFAULT_CHARSET); this.delimiter= coalesce( descriptor.getProperty(DELIMITER_PROPERTY), descriptor.getProperty(OLD_DELIMITER_PROPERTY), DEFAULT_DELIMITER); this.quote = coalesce( descriptor.getProperty(QUOTE_CHAR_PROPERTY), descriptor.getProperty(OLD_QUOTE_CHAR_PROPERTY), DEFAULT_QUOTE); this.escape = coalesce( descriptor.getProperty(ESCAPE_CHAR_PROPERTY), descriptor.getProperty(OLD_ESCAPE_CHAR_PROPERTY), DEFAULT_ESCAPE); this.header = descriptor.getProperty(HEADER_PROPERTY); this.useHeader = Boolean.parseBoolean(coalesce( descriptor.getProperty(HAS_HEADER_PROPERTY), DEFAULT_HAS_HEADER)); final String linesToSkipString = coalesce( descriptor.getProperty(LINES_TO_SKIP_PROPERTY), descriptor.getProperty(OLD_LINES_TO_SKIP_PROPERTY)); int lines = DEFAULT_LINES_TO_SKIP; if (linesToSkipString != null) {
private HColumnDescriptor configure(HColumnDescriptor column, DatasetDescriptor descriptor) { if (descriptor.hasProperty(REPLICATION_ID_PROP)) { String value = descriptor.getProperty(REPLICATION_ID_PROP); try { column.setScope(Integer.valueOf(value)); } catch (NumberFormatException e) { throw new IllegalArgumentException( "Invalid replication scope: " + value, e); } } return column; }
private HColumnDescriptor configure(HColumnDescriptor column, DatasetDescriptor descriptor) { if (descriptor.hasProperty(REPLICATION_ID_PROP)) { String value = descriptor.getProperty(REPLICATION_ID_PROP); try { column.setScope(Integer.valueOf(value)); } catch (NumberFormatException e) { throw new IllegalArgumentException( "Invalid replication scope: " + value, e); } } return column; }
/** * Returns the value of the property parsed as a long, or the default value. * <p> * If the value cannot be parsed as a long, this will return the default * value. * * @param prop a String property name * @param descriptor a {@link DatasetDescriptor} * @param defaultValue default value if prop is not present or is invalid * @return the value of prop parsed as a long or the default value */ public static long getLong(String prop, DatasetDescriptor descriptor, long defaultValue) { if (descriptor.hasProperty(prop)) { String asString = descriptor.getProperty(prop); try { return Long.parseLong(asString); } catch (NumberFormatException e) { // return the default value } } return defaultValue; }
private static void addPropertiesForDescriptor(Table table, DatasetDescriptor descriptor) { // copy custom properties to the table if (!descriptor.listProperties().isEmpty()) { for (String property : descriptor.listProperties()) { // no need to check the reserved list, those are not set on descriptors table.getParameters().put(property, descriptor.getProperty(property)); } // set which properties are custom and should be set on descriptors table.getParameters().put(CUSTOM_PROPERTIES_PROPERTY_NAME, NAME_JOINER.join(descriptor.listProperties())); } }
private static void addPropertiesForDescriptor(Table table, DatasetDescriptor descriptor) { // copy custom properties to the table if (!descriptor.listProperties().isEmpty()) { for (String property : descriptor.listProperties()) { // no need to check the reserved list, those are not set on descriptors table.getParameters().put(property, descriptor.getProperty(property)); } // set which properties are custom and should be set on descriptors table.getParameters().put(CUSTOM_PROPERTIES_PROPERTY_NAME, NAME_JOINER.join(descriptor.listProperties())); } }
private FileSystemWriter(FileSystem fs, Path path, long rollIntervalMillis, long targetFileSize, DatasetDescriptor descriptor, Schema writerSchema) { Preconditions.checkNotNull(fs, "File system is not defined"); Preconditions.checkNotNull(path, "Destination directory is not defined"); Preconditions.checkNotNull(descriptor, "Descriptor is not defined"); this.fs = fs; this.directory = path; this.rollIntervalMillis = rollIntervalMillis; this.targetFileSize = targetFileSize; this.descriptor = descriptor; this.conf = new Configuration(fs.getConf()); this.state = ReaderWriterState.NEW; this.schema = writerSchema; // copy file format settings from custom properties to the Configuration for (String prop : descriptor.listProperties()) { conf.set(prop, descriptor.getProperty(prop)); } // For performance reasons we will skip temp file creation if the file system does not support // efficient renaming, and write the file directly. this.useTempPath = FileSystemUtil.supportsRename(fs.getUri(), conf); }
public static <K, V> FileInputFormat<K, V> newInputFormatInstance( DatasetDescriptor descriptor) { DynConstructors.Ctor<FileInputFormat<K, V>> ctor = new DynConstructors.Builder() .impl(descriptor.getProperty(INPUT_FORMAT_CLASS_PROP)) .build(); return ctor.newInstance(); }
@edu.umd.cs.findbugs.annotations.SuppressWarnings( value="DM_CONVERT_CASE", justification="For record types only") public static <E> RecordReader<E, Void> newRecordReader( DatasetDescriptor descriptor) { String typeString = descriptor.getProperty(INPUT_FORMAT_RECORD_PROP); RecordType type = RecordType.VALUE; if (typeString != null) { type = RecordType.valueOf(typeString.trim().toUpperCase()); } if (type == RecordType.KEY) { FileInputFormat<E, Object> format = newInputFormatInstance(descriptor); return new KeyReaderWrapper<E>(format); } else if (type == RecordType.VALUE) { FileInputFormat<Object, E> format = newInputFormatInstance(descriptor); return new ValueReaderWrapper<E>(format); } else { throw new DatasetException("[BUG] Invalid record type: " + type); } } }
private static void printInfo(Logger console, Dataset<?> dataset) { DatasetDescriptor desc = dataset.getDescriptor(); String schema = ColumnMappingParser.removeEmbeddedMapping( PartitionStrategyParser.removeEmbeddedStrategy(desc.getSchema())) .toString(true); Collection<String> properties = desc.listProperties(); console.info("\nDataset \"{}\":", dataset.getName()); console.info("\tURI: \"{}\"", dataset.getUri()); console.info("\tSchema: {}", indent(schema)); if (desc.isPartitioned()) { console.info("\tPartition strategy: {}", indent(desc.getPartitionStrategy().toString(true))); } else { console.info("\tNot partitioned"); } if (desc.isColumnMapped()) { console.info("\tColumn mapping: {}", indent(desc.getColumnMapping().toString(true))); } if (!properties.isEmpty()) { StringBuilder sb = new StringBuilder(); for (String prop : properties) { sb.append("\n\t\t").append(prop).append("=") .append(desc.getProperty(prop)); } console.info("\tProperties:{}", sb.toString()); } }
properties.setProperty(property, descriptor.getProperty(property));
@Test public void testUpdate() throws IOException { DatasetDescriptor updated = new DatasetDescriptor.Builder(descriptor) .property("parquet.block.size", "1024") .build(); DatasetDescriptor saved = provider.update("default", "old_2", updated); Assert.assertNotNull("Should find saved metadata", saved); Assert.assertEquals("Should update old dataset successfully", updated.getProperty("parquet.block.size"), saved.getProperty("parquet.block.size")); DatasetDescriptor loaded = provider.load("default", "old_2"); Assert.assertNotNull("Should find saved metadata", loaded); Assert.assertEquals("Should make changes on disk", updated.getProperty("parquet.block.size"), loaded.getProperty("parquet.block.size")); Assert.assertFalse("Should not move metadata to new location", local.exists(new Path(root, new Path("default", "old_2")))); } }
/** * Adds configuration for {@code DatasetKeyInputFormat} to read from the * given {@link Dataset} or {@link View} instance. * * @param view a dataset or view * @return this for method chaining */ public ConfigBuilder readFrom(View<?> view) { DatasetDescriptor descriptor = view.getDataset().getDescriptor(); // if this is a partitioned dataset, add the partition location if (view instanceof FileSystemDataset) { conf.set(KITE_PARTITION_DIR, String.valueOf(descriptor.getLocation())); } // add descriptor properties to the config for (String property : descriptor.listProperties()) { conf.set(property, descriptor.getProperty(property)); } if (DataModelUtil.isGeneric(view.getType())) { Schema datasetSchema = view.getDataset().getDescriptor().getSchema(); // only set the read schema if the view is a projection if (!datasetSchema.equals(view.getSchema())) { withSchema(view.getSchema()); } } else { withType(view.getType()); } conf.set(KITE_INPUT_URI, view.getUri().toString()); return this; }
/** * Adds configuration for {@code DatasetKeyInputFormat} to read from the * given {@link Dataset} or {@link View} instance. * * @param view a dataset or view * @return this for method chaining */ public ConfigBuilder readFrom(View<?> view) { DatasetDescriptor descriptor = view.getDataset().getDescriptor(); // if this is a partitioned dataset, add the partition location if (view instanceof FileSystemDataset) { conf.set(KITE_PARTITION_DIR, String.valueOf(descriptor.getLocation())); } // add descriptor properties to the config for (String property : descriptor.listProperties()) { conf.set(property, descriptor.getProperty(property)); } if (DataModelUtil.isGeneric(view.getType())) { Schema datasetSchema = view.getDataset().getDescriptor().getSchema(); // only set the read schema if the view is a projection if (!datasetSchema.equals(view.getSchema())) { withSchema(view.getSchema()); } } else { withType(view.getType()); } conf.set(KITE_INPUT_URI, view.getUri().toString()); return this; }
@Test public void testCustomProperties() { final String propName = "my.custom.property"; final String propValue = "string"; DatasetDescriptor descriptorWithProp = new DatasetDescriptor.Builder(testDescriptor) .property(propName, propValue) .build(); DatasetDescriptor created = provider.create(NAMESPACE, NAME, descriptorWithProp); Assert.assertTrue("Should have custom property", created.hasProperty(propName)); Assert.assertEquals("Should have correct custom property value", propValue, created.getProperty(propName)); Assert.assertTrue("List should contain property name", created.listProperties().contains(propName)); DatasetDescriptor loaded = provider.load(NAMESPACE, NAME); Assert.assertTrue("Should have custom property", loaded.hasProperty(propName)); Assert.assertEquals("Should have correct custom property value", propValue, loaded.getProperty(propName)); Assert.assertTrue("List should contain property name", created.listProperties().contains(propName)); }