org.kitesdk.data.spi java code examples

/**
 * Get the name of the dataset from the URI
 *
 * @param uri The dataset or view URI
 * @return The dataset name
 */
private static String uriToName(URI uri) {
 return Registration.lookupDatasetUri(URI.create(
   uri.getRawSchemeSpecificPart())).second().get("dataset");
}

protected static Configuration getConfiguration(String configFiles) {
  Configuration conf = DefaultConfiguration.get();
  if (configFiles == null || configFiles.isEmpty()) {
    return conf;
  }
  for (String file : COMMA.split(configFiles)) {
    // process each resource only once
    if (conf.getResource(file) == null) {
      // use Path instead of String to get the file from the FS
      conf.addResource(new Path(file));
    }
  }
  return conf;
}

  @Override
  public void process(InputStream in) throws IOException {
    Schema as = JsonUtil.inferSchema(
        in, context.getProperty(RECORD_NAME).evaluateAttributeExpressions(inputFlowFile).getValue(),
        context.getProperty(NUM_RECORDS_TO_ANALYZE).evaluateAttributeExpressions(inputFlowFile).asInteger());
    avroSchema.set(as.toString(context.getProperty(PRETTY_AVRO_OUTPUT).asBoolean()));
  }
});

CSVProperties props = new CSVProperties.Builder()
    .charset(context.getProperty(CHARSET).evaluateAttributeExpressions(incomingCSV).getValue())
    .delimiter(context.getProperty(DELIMITER).evaluateAttributeExpressions(incomingCSV).getValue())
    .quote(context.getProperty(QUOTE).evaluateAttributeExpressions(incomingCSV).getValue())
    .escape(context.getProperty(ESCAPE).evaluateAttributeExpressions(incomingCSV).getValue())
    .hasHeader(context.getProperty(HAS_HEADER).evaluateAttributeExpressions(incomingCSV).asBoolean())
    .linesToSkip(context.getProperty(LINES_TO_SKIP).evaluateAttributeExpressions(incomingCSV).asInteger())
    .build();
final Schema schema;
try {
  schema = getSchema(schemaProperty, DefaultConfiguration.get());
} catch (SchemaNotFoundException e) {
  getLogger().error("Cannot find schema: " + schemaProperty);

@OnScheduled
protected void setDefaultConfiguration(ProcessContext context)
    throws IOException {
  DefaultConfiguration.set(getConfiguration(
      context.getProperty(CONF_XML_FILES).evaluateAttributeExpressions().getValue()));
}

 @Override
 public TimeDomain load(Pair<PartitionStrategy, String> entry) {
  return new TimeDomain(entry.first(), entry.second());
 }
});

 public MarkerRange build() {
  MarkerRange markerRange = new MarkerRange(comparator);
  if (start != null) {
   markerRange = markerRange.from(start.build());
  }
  if (end != null) {
   markerRange = markerRange.to(end.build());
  }
  return markerRange;
 }
}

 @Override
 @SuppressWarnings("unchecked")
 public MarkerRange.Builder update(
   MarkerRange.Builder current, Object item) {
  for (FieldPartitioner fp : fields) {
   Object value = fp.apply(item);
   current.addToStart(fp.getName(), value);
   current.addToEnd(fp.getName(), value);
  }
  return current;
 }
}

public JSONFileReader(FileSystem fileSystem, Path path,
           EntityAccessor<E> accessor) {
 this.fs = fileSystem;
 this.path = path;
 this.schema = accessor.getReadSchema();
 this.model = DataModelUtil.getDataModelForType(accessor.getType());
 this.state = ReaderWriterState.NEW;
}

/**
 * Parses the Mapping Descriptor from a File
 *
 * @param file
 *          The File that contains the Mapping Descriptor in JSON format.
 * @return ColumnMapping.
 */
public static ColumnMapping parse(File file) {
 return buildColumnMapping(JsonUtil.parse(file));
}

/**
 * Parses a PartitionStrategy from a File
 * 
 * @param file
 *          The File that contains the PartitionStrategy in JSON format.
 * @return The PartitionStrategy.
 */
public static PartitionStrategy parse(File file) {
 return buildPartitionStrategy(JsonUtil.parse(file));
}

  @Override
  public void process(InputStream in) throws IOException {
    try (DataFileStream<Record> stream = new DataFileStream<>(
        in, AvroUtil.newDatumReader(schema, Record.class))) {
      IncompatibleSchemaException.check(
          SchemaValidationUtil.canRead(stream.getSchema(), schema),
          "Incompatible file schema %s, expected %s",
          stream.getSchema(), schema);
      long written = 0L;
      try (DatasetWriter<Record> writer = target.newWriter()) {
        for (Record record : stream) {
          writer.write(record);
          written += 1;
        }
      } finally {
        session.adjustCounter("Stored records", written,
            true /* cannot roll back the write */);
      }
    }
  }
});

/**
 * Returns a list of field names from the schema that contain characters that
 * are known to be incompatible with some projects, such as Hive or HBase.
 *
 * @param schema a {@link org.apache.avro.Schema} to check
 * @return a {@link java.util.List} of incompatible field names
 */
private static List<String> getIncompatibleNames(Schema schema) {
 NameValidation validation = new NameValidation();
 SchemaUtil.visit(schema, validation);
 return validation.getIncompatibleNames();
}

public Predicate<Marker> project(Predicate<Long> predicate) {
 if (predicate instanceof In) {
  return new TimeSetPredicate((In<Long>) predicate);
 } else if (predicate instanceof Range) {
  return new TimeRangePredicate((Range<Long>) predicate);
 } else {
  return null;
 }
}

try {
  inputSchema = getSchema(inputSchemaProperty,
      DefaultConfiguration.get());
} catch (SchemaNotFoundException e) {
  getLogger().error("Cannot find schema: " + inputSchemaProperty);
try {
  outputSchema = getSchema(outputSchemaProperty,
      DefaultConfiguration.get());
} catch (SchemaNotFoundException e) {
  getLogger().error("Cannot find schema: " + outputSchemaProperty);

/**
 * Parses the Mapping Descriptor as a JSON string.
 * 
 * @param mappingDescriptor
 *          The mapping descriptor as a JSON string
 * @return ColumnMapping
 */
public static ColumnMapping parse(String mappingDescriptor) {
 return buildColumnMapping(JsonUtil.parse(mappingDescriptor));
}

/**
 * Parses a PartitionStrategy from a JSON string.
 * 
 * @param json
 *          The JSON string
 * @return The PartitionStrategy.
 */
public static PartitionStrategy parse(String json) {
 return buildPartitionStrategy(JsonUtil.parse(json));
}

final Schema schema;
try {
  schema = getSchema(schemaProperty, DefaultConfiguration.get());
} catch (SchemaNotFoundException e) {
  getLogger().error("Cannot find schema: " + schemaProperty);

/**
 * Parses the Mapping Descriptor from an input stream
 *
 * @param in
 *          The input stream that contains the Mapping Descriptor in JSON
 *          format.
 * @return ColumnMapping.
 */
public static ColumnMapping parse(InputStream in) {
 return buildColumnMapping(JsonUtil.parse(in));
}

/**
 * Parses a PartitionStrategy from an input stream
 * 
 * @param in
 *          The input stream that contains the PartitionStrategy in JSON
 *          format.
 * @return The PartitionStrategy.
 */
public static PartitionStrategy parse(InputStream in) {
  return buildPartitionStrategy(JsonUtil.parse(in));
}

How to use org.kitesdk.data.spi

Best Java code snippets using org.kitesdk.data.spi (Showing top 20 results out of 315)