org.kitesdk.data.spi.filesystem.CSVUtil java code examples

  @Override
  public void process(InputStream in) throws IOException {
    avroSchema.set(CSVUtil
        .inferSchema(
            context.getProperty(RECORD_NAME).evaluateAttributeExpressions(inputFlowFile).getValue(), in, props)
        .toString(context.getProperty(PRETTY_AVRO_OUTPUT).asBoolean()));
  }
});

                    boolean makeNullable)
 throws IOException {
CSVReader reader = newReader(incoming, props);
 header = newParser(props).parseLine(props.header);
 line = reader.readNext();
 Preconditions.checkNotNull(line, "No content to infer schema");
  if (i < line.length) {
   if (types[i] == null) {
    types[i] = inferFieldType(line[i]);
    if (types[i] != null) {
    .doc("Type inferred from '" + sample(values[i]) + "'")
    .type(schema(types[i], false)).noDefault();
 } else {
  SchemaBuilder.GenericDefault<Schema> defaultBuilder = fieldAssembler.name(fieldName)
    .doc("Type inferred from '" + sample(values[i]) + "'")
    .type(schema(types[i], makeNullable || foundNull));
  if (makeNullable || foundNull) {
   fieldAssembler = defaultBuilder.withDefault(null);

@Test
public void testSamplePrintableCharactersNotChanged() {
 String upper = "ABCDEFGHIJKLMNOPQRXTUVWXYZ";
 Assert.assertEquals("Upper case letters shouldn't be removed",
   upper, CSVUtil.sample(upper));
 String lower = "abcdefghijklmnopqrstuvwxyz";
 Assert.assertEquals("Lower case letters shouldn't be removed",
   lower, CSVUtil.sample(lower));
 String numbers = "0123456789";
 Assert.assertEquals("Numbers shouldn't be removed",
   numbers, CSVUtil.sample(numbers));
 String punctuation = " _-~+!@#$%^&*(){}[]<>,.?:;`'\"/\\|";
 Assert.assertEquals("Punctuation shouldn't be removed",
   punctuation, CSVUtil.sample(punctuation));
}

.inferNullableSchema(
  recordName, open(samplePaths.get(0)), props, required)
.toString(!minimize);

this.reader = CSVUtil.newReader(incoming, props);
 try {
  header = Lists.newArrayList(
    CSVUtil.newParser(props).parseLine(props.header));
 } catch (IOException e) {
  throw new DatasetIOException(

public CSVRecordParser(CSVProperties props, Schema schema, Class<E> type,
            @Nullable List<String> header) {
 this.parser = CSVUtil.newParser(props);
 this.builder = new CSVRecordBuilder<E>(
   DataModelUtil.getReaderSchema(type, schema),
   type, getHeader(props, header));
}

public static Schema inferSchema(String name, InputStream incoming,
                 CSVProperties props,
                 Set<String> requiredFields)
  throws IOException {
 return inferSchemaInternal(name, incoming, props, requiredFields, false);
}

@Override
public void open() throws IOException {
 this.outgoing = fs.create(path, true /* overwrite */);
 this.writer = CSVUtil.newWriter(outgoing, props);
}

 @Test
 public void testSampleNull() {
  String nullString = null;
  Assert.assertEquals("Should handle null like String.valueOf",
    String.valueOf(nullString), CSVUtil.sample(nullString));
 }
}

@Test
public void testSchemaNamespace() throws Exception {
 InputStream stream = new ByteArrayInputStream(csvLines.getBytes("utf8"));
 Schema schema = CSVUtil.inferNullableSchema("com.example.TestRecord",
   stream, new CSVProperties.Builder().hasHeader().build());
 Assert.assertEquals("Should use name", "TestRecord", schema.getName());
 Assert.assertEquals("Should set namespace",
   "com.example", schema.getNamespace());
}

 public static List<String> getHeader(CSVProperties props,
                    @Nullable List<String> header) {
  if (header != null) {
   return header;
  } else if (props.header != null) {
   try {
    return Lists.newArrayList(
      CSVUtil.newParser(props).parseLine(props.header));
   } catch (IOException e) {
    throw new DatasetIOException(
      "Failed to parse header from properties: " + props.header, e);
   }
  }
  return null;
 }
}

public static Schema inferNullableSchema(String name, InputStream incoming,
                     CSVProperties props,
                     Set<String> requiredFields)
  throws IOException {
 return inferSchemaInternal(name, incoming, props, requiredFields, true);
}

  @Override
  public void process(InputStream in) throws IOException {
    avroSchema.set(CSVUtil
        .inferSchema(
            context.getProperty(RECORD_NAME).evaluateAttributeExpressions(inputFlowFile).getValue(), in, props)
        .toString(context.getProperty(PRETTY_AVRO_OUTPUT).asBoolean()));
  }
});

@Test
public void testUnicodeRemoved() {
 String hasUnicode = "Unicode snowflake: \u2744";
 Assert.assertEquals("Should remove unicode",
   "Unicode snowflake: .", CSVUtil.sample(hasUnicode));
}

@Test
public void testNullableSchemaInference() throws Exception {
 InputStream stream = new ByteArrayInputStream(csvLines.getBytes("utf8"));
 Schema schema = CSVUtil.inferNullableSchema("TestRecord", stream,
   new CSVProperties.Builder().hasHeader().build(),
   ImmutableSet.of("float"));

 @Override
 public void run() {
  try {
   CSVUtil.inferSchema("TestRecord",
     new ByteArrayInputStream(csvLines.getBytes("utf8")),
     new CSVProperties.Builder().hasHeader().build(),
     ImmutableSet.of("nullable_string"));
  } catch (IOException e) {
   throw new RuntimeException("Schema inference threw IOException", e);
  }
 }
});

@Test
public void testSampleTruncated() {
 String longUrl = "https://github.com/kite-sdk/kite/commit/" +
   "bbe3e917875e879ca58b8afe90efa96cdd4691d1";
 Assert.assertEquals("Should truncate long values",
   "https://github.com/kite-sdk/kite/commit/bbe3e91787",
   CSVUtil.sample(longUrl));
}

@Test
public void testNullableSchemaInferenceWithoutHeader() throws Exception {
 InputStream stream = new ByteArrayInputStream(csvLines.getBytes("utf8"));
 Schema schema = CSVUtil.inferNullableSchema("TestRecord", stream,
   new CSVProperties.Builder().build(),
   ImmutableSet.of("long", "field_1"));

 @Override
 public void run() {
  try {
   CSVUtil.inferSchema("TestRecord",
     new ByteArrayInputStream(csvLines.getBytes("utf8")),
     new CSVProperties.Builder().hasHeader().build(),
     ImmutableSet.of("nullable_long"));
  } catch (IOException e) {
   throw new RuntimeException("Schema inference threw IOException", e);
  }
 }
});

@Test
public void testSchemaInferenceMissingExample() throws Exception {
 InputStream stream = new ByteArrayInputStream(
   "\none,two\n34,\n".getBytes("utf8"));
 Schema schema = CSVUtil.inferSchema("TestRecord", stream,
   new CSVProperties.Builder().linesToSkip(1).hasHeader().build());
 Assert.assertNotNull(schema.getField("one"));
 Assert.assertNotNull(schema.getField("two"));
 Assert.assertEquals("Should infer a long",
   schema(Schema.Type.LONG), schema.getField("one").schema());
 Assert.assertEquals("Should default to a string",
   nullable(Schema.Type.STRING), schema.getField("two").schema());
}

Most used methods

inferNullableSchema
inferSchema
sample
inferFieldType
inferSchemaInternal
newParser
newReader
newWriter
schema
Create a Schema for the given type. If the type is null, the schema will be a nullable String. If is

Popular in Java

Creating JSON documents from java classes using gson
getSharedPreferences (Context)
onCreateOptionsMenu (Activity)
scheduleAtFixedRate (Timer)
InetAddress (java.net)
An Internet Protocol (IP) address. This can be either an IPv4 address or an IPv6 address, and in pra
MessageDigest (java.security)
Uses a one-way hash function to turn an arbitrary number of bytes into a fixed-length byte sequence.
DateFormat (java.text)
Formats or parses dates and times.This class provides factories for obtaining instances configured f
IsNull (org.hamcrest.core)
Is the value null?
Table (com.google.common.collect)
A collection that associates an ordered pair of keys, called a row key and a column key, with a sing
Notification (javax.management)
From CI to AI: The AI layer in your organization

How to useCSVUtil in org.kitesdk.data.spi.filesystem

Best Java code snippets using org.kitesdk.data.spi.filesystem.CSVUtil (Showing top 20 results out of 315)

How to use
CSVUtil
in
org.kitesdk.data.spi.filesystem