@Override public InternalRow get() { Object[] values = new Object[requiredSchema.size()]; for (int i = 0; i < values.length; i++) { if ("i".equals(requiredSchema.apply(i).name())) { values[i] = start; } else if ("j".equals(requiredSchema.apply(i).name())) { values[i] = -start; } } return new GenericInternalRow(values); }
@Override public InternalRow get() { Object[] values = new Object[requiredSchema.size()]; for (int i = 0; i < values.length; i++) { if ("i".equals(requiredSchema.apply(i).name())) { values[i] = start; } else if ("j".equals(requiredSchema.apply(i).name())) { values[i] = -start; } } return new GenericInternalRow(values); }
@Override public Object apply(StructField field) { return fieldNames.contains(field.name()); } }).toSeq();
public SparkSourceDataConverter(@NonNull final StructType inputSchema, @NonNull final Schema outputSchema, @NonNull final Configuration conf, @NonNull final Set<String> requiredKeys, @NonNull final ErrorExtractor errorExtractor) { super(conf, errorExtractor); this.fields = Arrays.stream(inputSchema.fields()) .filter(f -> !f.name().startsWith("_")).toArray(StructField[]::new); this.jsonOutputSchema = outputSchema.toString(); this.requiredKeys = requiredKeys; }
@Override public int compare(Row first, Row second) { Date firstDate; Date secondDate; try { firstDate = format.parse(first.<String>getAs(field.name())); secondDate = format.parse(second.<String>getAs(field.name())); } catch (ParseException e) { throw new RuntimeException(e); } return firstDate.compareTo(secondDate); }
@Override public int compare(Row first, Row second) { Date firstDate; Date secondDate; try { firstDate = format.parse(first.<String>getAs(field.name())); secondDate = format.parse(second.<String>getAs(field.name())); } catch (ParseException e) { throw new RuntimeException(e); } return firstDate.compareTo(secondDate); }
@Override public Row setFarFutureTime(Row row) { return RowUtils.set(row, field.name(), format.format(farFuture)); }
@Override public Row appendFields(Row row) { return RowUtils.append(row, field.name(), field.dataType(), null); }
@Override public Row getTime(Row row) { return new RowWithSchema(getSchema(), RowUtils.get(row, field.name())); }
private Row carryForwardWhenNull(Row into, Row from) { if (!config.hasPath(CARRY_FORWARD_CONFIG_NAME) || !config.getBoolean(CARRY_FORWARD_CONFIG_NAME)) { return into; } for (StructField field : into.schema().fields()) { String fieldName = field.name(); if (RowUtils.get(into, fieldName) == null && RowUtils.get(from, fieldName) != null) { into = RowUtils.set(into, fieldName, RowUtils.get(from, fieldName)); } } return into; }
public static AnalyticsSchema analyticsSchemaFromStructType(StructType schema) { List<ColumnDefinition> colDefs = new ArrayList<>(); Iterator<StructField> fieldIter = schema.iterator(); while (fieldIter.hasNext()) { StructField field = fieldIter.next(); String name = field.name(); AnalyticsSchema.ColumnType type = AnalyticsCommonUtils.stringToColumnType(field.dataType().typeName()); colDefs.add(new ColumnDefinition(name, type)); } return new AnalyticsSchema(colDefs, Collections.<String>emptyList()); }
@Test public void testGetStructTypeEmptyMap() { Map<Integer, Integer> testMap = new HashMap<>(); Cells cells = new Cells("namespace", Cell.create("map", testMap)); StructType struct = CellsUtils.getStructTypeFromCells(cells); StructField[] fields = struct.fields(); assertEquals(fields.length, cells.size()); assertEquals(fields[0].name(), "map"); assertTrue(fields[0].dataType() instanceof MapType); }
private Row carryForwardWhenNull(Row into, Row from) { if (!doesCarryForward()) { return into; } for (StructField field : into.schema().fields()) { String fieldName = field.name(); if (RowUtils.get(into, fieldName) == null && RowUtils.get(from, fieldName) != null) { into = RowUtils.set(into, fieldName, RowUtils.get(from, fieldName)); } } return into; }
@Override public Row getPrecedingTime(Row row) { return new RowWithSchema(getSchema(), row.<Long>getAs(field.name()) - 1); } }
@Test public void testSubsetSchemaAllFields() { StructField field1 = DataTypes.createStructField("field1", DataTypes.StringType, true); StructField field2 = DataTypes.createStructField("field2", DataTypes.IntegerType, true); StructField field3 = DataTypes.createStructField("field3", DataTypes.FloatType, true); StructType schema = DataTypes.createStructType(Lists.newArrayList(field1, field2, field3)); StructType subset = RowUtils.subsetSchema(schema, Lists.newArrayList("field1", "field2", "field3")); assertEquals(subset.fields().length, 3); assertEquals(subset.fields()[0].name(), "field1"); assertEquals(subset.fields()[1].name(), "field2"); assertEquals(subset.fields()[2].name(), "field3"); }
@Test public void testSubtractSchemaNoFields() { StructField field1 = DataTypes.createStructField("field1", DataTypes.StringType, true); StructField field2 = DataTypes.createStructField("field2", DataTypes.IntegerType, true); StructField field3 = DataTypes.createStructField("field3", DataTypes.FloatType, true); StructType schema = DataTypes.createStructType(Lists.newArrayList(field1, field2, field3)); StructType subset = RowUtils.subtractSchema(schema, Lists.<String>newArrayList()); assertEquals(subset.fields().length, 3); assertEquals(subset.fields()[0].name(), "field1"); assertEquals(subset.fields()[1].name(), "field2"); assertEquals(subset.fields()[2].name(), "field3"); }
@Test public void testSubsetSchemaSomeFields() { StructField field1 = DataTypes.createStructField("field1", DataTypes.StringType, true); StructField field2 = DataTypes.createStructField("field2", DataTypes.IntegerType, true); StructField field3 = DataTypes.createStructField("field3", DataTypes.FloatType, true); StructType schema = DataTypes.createStructType(Lists.newArrayList(field1, field2, field3)); StructType subset = RowUtils.subsetSchema(schema, Lists.newArrayList("field1", "field3")); assertEquals(subset.fields().length, 2); assertEquals(subset.fields()[0].name(), "field1"); assertEquals(subset.fields()[1].name(), "field3"); }
@Test public void testSubtractSchemaSomeFields() { StructField field1 = DataTypes.createStructField("field1", DataTypes.StringType, true); StructField field2 = DataTypes.createStructField("field2", DataTypes.IntegerType, true); StructField field3 = DataTypes.createStructField("field3", DataTypes.FloatType, true); StructType schema = DataTypes.createStructType(Lists.newArrayList(field1, field2, field3)); StructType subset = RowUtils.subtractSchema(schema, Lists.newArrayList("field1", "field3")); assertEquals(subset.fields().length, 1); assertEquals(subset.fields()[0].name(), "field2"); }