/** * Get the name of the dataset from the URI * * @param uri The dataset or view URI * @return The dataset name */ private static String uriToName(URI uri) { return Registration.lookupDatasetUri(URI.create( uri.getRawSchemeSpecificPart())).second().get("dataset"); }
protected static Configuration getConfiguration(String configFiles) { Configuration conf = DefaultConfiguration.get(); if (configFiles == null || configFiles.isEmpty()) { return conf; } for (String file : COMMA.split(configFiles)) { // process each resource only once if (conf.getResource(file) == null) { // use Path instead of String to get the file from the FS conf.addResource(new Path(file)); } } return conf; }
@Override public void process(InputStream in) throws IOException { Schema as = JsonUtil.inferSchema( in, context.getProperty(RECORD_NAME).evaluateAttributeExpressions(inputFlowFile).getValue(), context.getProperty(NUM_RECORDS_TO_ANALYZE).evaluateAttributeExpressions(inputFlowFile).asInteger()); avroSchema.set(as.toString(context.getProperty(PRETTY_AVRO_OUTPUT).asBoolean())); } });
CSVProperties props = new CSVProperties.Builder() .charset(context.getProperty(CHARSET).evaluateAttributeExpressions(incomingCSV).getValue()) .delimiter(context.getProperty(DELIMITER).evaluateAttributeExpressions(incomingCSV).getValue()) .quote(context.getProperty(QUOTE).evaluateAttributeExpressions(incomingCSV).getValue()) .escape(context.getProperty(ESCAPE).evaluateAttributeExpressions(incomingCSV).getValue()) .hasHeader(context.getProperty(HAS_HEADER).evaluateAttributeExpressions(incomingCSV).asBoolean()) .linesToSkip(context.getProperty(LINES_TO_SKIP).evaluateAttributeExpressions(incomingCSV).asInteger()) .build(); final Schema schema; try { schema = getSchema(schemaProperty, DefaultConfiguration.get()); } catch (SchemaNotFoundException e) { getLogger().error("Cannot find schema: " + schemaProperty);
@OnScheduled protected void setDefaultConfiguration(ProcessContext context) throws IOException { DefaultConfiguration.set(getConfiguration( context.getProperty(CONF_XML_FILES).evaluateAttributeExpressions().getValue())); }
@Override public TimeDomain load(Pair<PartitionStrategy, String> entry) { return new TimeDomain(entry.first(), entry.second()); } });
public MarkerRange build() { MarkerRange markerRange = new MarkerRange(comparator); if (start != null) { markerRange = markerRange.from(start.build()); } if (end != null) { markerRange = markerRange.to(end.build()); } return markerRange; } }
@Override @SuppressWarnings("unchecked") public MarkerRange.Builder update( MarkerRange.Builder current, Object item) { for (FieldPartitioner fp : fields) { Object value = fp.apply(item); current.addToStart(fp.getName(), value); current.addToEnd(fp.getName(), value); } return current; } }
public JSONFileReader(FileSystem fileSystem, Path path, EntityAccessor<E> accessor) { this.fs = fileSystem; this.path = path; this.schema = accessor.getReadSchema(); this.model = DataModelUtil.getDataModelForType(accessor.getType()); this.state = ReaderWriterState.NEW; }
/** * Parses the Mapping Descriptor from a File * * @param file * The File that contains the Mapping Descriptor in JSON format. * @return ColumnMapping. */ public static ColumnMapping parse(File file) { return buildColumnMapping(JsonUtil.parse(file)); }
/** * Parses a PartitionStrategy from a File * * @param file * The File that contains the PartitionStrategy in JSON format. * @return The PartitionStrategy. */ public static PartitionStrategy parse(File file) { return buildPartitionStrategy(JsonUtil.parse(file)); }
@Override public void process(InputStream in) throws IOException { try (DataFileStream<Record> stream = new DataFileStream<>( in, AvroUtil.newDatumReader(schema, Record.class))) { IncompatibleSchemaException.check( SchemaValidationUtil.canRead(stream.getSchema(), schema), "Incompatible file schema %s, expected %s", stream.getSchema(), schema); long written = 0L; try (DatasetWriter<Record> writer = target.newWriter()) { for (Record record : stream) { writer.write(record); written += 1; } } finally { session.adjustCounter("Stored records", written, true /* cannot roll back the write */); } } } });
/** * Returns a list of field names from the schema that contain characters that * are known to be incompatible with some projects, such as Hive or HBase. * * @param schema a {@link org.apache.avro.Schema} to check * @return a {@link java.util.List} of incompatible field names */ private static List<String> getIncompatibleNames(Schema schema) { NameValidation validation = new NameValidation(); SchemaUtil.visit(schema, validation); return validation.getIncompatibleNames(); }
public Predicate<Marker> project(Predicate<Long> predicate) { if (predicate instanceof In) { return new TimeSetPredicate((In<Long>) predicate); } else if (predicate instanceof Range) { return new TimeRangePredicate((Range<Long>) predicate); } else { return null; } }
try { inputSchema = getSchema(inputSchemaProperty, DefaultConfiguration.get()); } catch (SchemaNotFoundException e) { getLogger().error("Cannot find schema: " + inputSchemaProperty); try { outputSchema = getSchema(outputSchemaProperty, DefaultConfiguration.get()); } catch (SchemaNotFoundException e) { getLogger().error("Cannot find schema: " + outputSchemaProperty);
/** * Parses the Mapping Descriptor as a JSON string. * * @param mappingDescriptor * The mapping descriptor as a JSON string * @return ColumnMapping */ public static ColumnMapping parse(String mappingDescriptor) { return buildColumnMapping(JsonUtil.parse(mappingDescriptor)); }
/** * Parses a PartitionStrategy from a JSON string. * * @param json * The JSON string * @return The PartitionStrategy. */ public static PartitionStrategy parse(String json) { return buildPartitionStrategy(JsonUtil.parse(json)); }
final Schema schema; try { schema = getSchema(schemaProperty, DefaultConfiguration.get()); } catch (SchemaNotFoundException e) { getLogger().error("Cannot find schema: " + schemaProperty);
/** * Parses the Mapping Descriptor from an input stream * * @param in * The input stream that contains the Mapping Descriptor in JSON * format. * @return ColumnMapping. */ public static ColumnMapping parse(InputStream in) { return buildColumnMapping(JsonUtil.parse(in)); }
/** * Parses a PartitionStrategy from an input stream * * @param in * The input stream that contains the PartitionStrategy in JSON * format. * @return The PartitionStrategy. */ public static PartitionStrategy parse(InputStream in) { return buildPartitionStrategy(JsonUtil.parse(in)); }