/** * True if the provided row contains a mutation type field. */ public static boolean hasMutationTypeField(Row row) { for (String fieldName : row.schema().fieldNames()) { if (fieldName.equals(MutationType.MUTATION_TYPE_FIELD_NAME)) { return true; } } return false; }
private Set<String> getColumnFamilies(Row row) { Set<String> families = Sets.newHashSet(); for (String fieldName : row.schema().fieldNames()) { ColumnDef def = columns.get(fieldName); if (!def.cf.equals("rowkey")) { families.add(def.cf); } } return families; }
private boolean filtersRowKeyPrefix(Row row) { Set<String> rowColumnNames = Sets.newHashSet(row.schema().fieldNames()); Set<String> prefixColumnNames = Sets.newHashSet(keyColumns.subList(0, rowColumnNames.size())); return rowColumnNames.equals(prefixColumnNames); }
private boolean filtersEntireRowKey(Row row) { for (String keyColumn : keyColumns) { if (!Arrays.asList(row.schema().fieldNames()).contains(keyColumn)) { return false; } } return true; }
private byte[] serializeRow(Row row) throws IOException { StringBuilder sb = new StringBuilder(); for (StructField field : row.schema().fields()) { sb.append("/"); sb.append(field.name()); sb.append("="); sb.append(RowUtils.get(row, field.name())); } byte[] serialized = sb.toString().getBytes(Charsets.UTF_8); return serialized; }
public static Row remove(Row row, String fieldName) { List<StructField> removedFields = Lists.newArrayList(row.schema().fields()); removedFields.remove(row.fieldIndex(fieldName)); StructType removedSchema = new StructType(removedFields.toArray(new StructField[removedFields.size()])); List<Object> removedValues = Lists.newArrayList(RowUtils.valuesFor(row)); removedValues.remove(row.fieldIndex(fieldName)); return new RowWithSchema(removedSchema, removedValues.toArray()); }
private Stream<String> getPropertyNames(final Row row) { return Arrays.stream(row.schema().fieldNames()) .filter(f -> !ReservedPropertyNames.contains(f)) .filter(n -> !row.isNullAt(row.fieldIndex(n))); } }
@Override public Row call(Row arrived) throws Exception { long startTime = System.nanoTime(); if (schema == null) { schema = RowUtils.subsetSchema(arrived.schema(), keyFieldNames); } Row key = RowUtils.subsetRow(arrived, schema); long endTime = System.nanoTime(); accumulators.getDoubleAccumulators().get(ACCUMULATOR_SECONDS_EXTRACTING_KEYS).add( (endTime - startTime) / 1000.0 / 1000.0 / 1000.0); return key; } }
private FilterList getColumnValueFilters(Row row) { FilterList filterList = new FilterList(Operator.MUST_PASS_ALL); Set<String> filterColumnNames = Sets.newHashSet(row.schema().fieldNames()); for (Map.Entry<String, ColumnDef> column : columns.entrySet()) { if (!column.getValue().cf.equals("rowkey")) { if (filterColumnNames.contains(column.getKey())) { byte[] value = getColumnValueAsBytes(column.getValue().name, column.getValue().type, row); if (value != null) { SingleColumnValueFilter columnValueFilter = new SingleColumnValueFilter( Bytes.toBytes(column.getValue().cf), Bytes.toBytes(column.getValue().name), CompareFilter.CompareOp.EQUAL, value ); filterList.addFilter(columnValueFilter); } } } } return filterList; }
public static Row set(Row row, String fieldName, Object replacement) { Object[] values = new Object[row.length()]; for (int i = 0; i < row.schema().fields().length; i++) { if (i == row.fieldIndex(fieldName)) { values[i] = replacement; } else { values[i] = row.get(i); } } return new RowWithSchema(row.schema(), values); }
private boolean matchesValueFilter(Row row, Row filter) { for (String filterFieldName : filter.schema().fieldNames()) { Object rowValue = row.get(row.fieldIndex(filterFieldName)); Object filterValue = RowUtils.get(filter, filterFieldName); if (!rowValue.equals(filterValue)) { return false; } } return true; }
private Row carryForwardWhenNull(Row into, Row from) { if (!config.hasPath(CARRY_FORWARD_CONFIG_NAME) || !config.getBoolean(CARRY_FORWARD_CONFIG_NAME)) { return into; } for (StructField field : into.schema().fields()) { String fieldName = field.name(); if (RowUtils.get(into, fieldName) == null && RowUtils.get(from, fieldName) != null) { into = RowUtils.set(into, fieldName, RowUtils.get(from, fieldName)); } } return into; }
public static Row append(Row row, String fieldName, DataType fieldType, Object value) { StructType appendedSchema = row.schema().add(fieldName, fieldType); Object[] appendedValues = ObjectArrays.concat(valuesFor(row), value); Row appendedRow = new RowWithSchema(appendedSchema, appendedValues); return appendedRow; }
private KuduScanner scannerForFilters(Iterable<Row> filters, KuduTable table) throws KuduException { List<Row> filtersList = Lists.newArrayList(filters); if (filtersList.size() == 0) { throw new RuntimeException("Kudu existing filter was not provided."); } if (filtersList.get(0).schema() == null) { throw new RuntimeException("Kudu existing filter did not contain a schema."); } if (hasAccumulators()) { accumulators.getLongAccumulators().get(ACCUMULATOR_NUMBER_OF_SCANNERS).add(1); accumulators.getLongAccumulators().get(ACCUMULATOR_NUMBER_OF_FILTERS_SCANNED).add(filtersList.size()); } KuduScannerBuilder builder = getConnection().getClient().newScannerBuilder(table); for (String fieldName : filtersList.get(0).schema().fieldNames()) { ColumnSchema columnSchema = table.getSchema().getColumn(fieldName); List<Object> columnValues = Lists.newArrayList(); for (Row filter : filtersList) { Object columnValue = RowUtils.get(filter, fieldName); columnValues.add(columnValue); } KuduPredicate predicate = KuduPredicate.newInListPredicate(columnSchema, columnValues); builder = builder.addPredicate(predicate); } KuduScanner scanner = builder.build(); return scanner; }
@Override public Iterator<Row> call(Row row) throws Exception { // Retrieve the Command pipeline via ThreadLocal Pipeline pipeline = MorphlineUtils.getPipeline(morphlineFile, morphlineId); if (null == pipeline) { pipeline = MorphlineUtils.setPipeline(morphlineFile, morphlineId, new Collector(), true); } // Convert each Row into a Record StructType inputSchema = row.schema(); if (null == inputSchema) { throw new RuntimeException("Row does not have an associated StructType schema"); } Record inputRecord = new Record(); String[] fieldNames = inputSchema.fieldNames(); // TODO : Confirm nested object conversion for (int i = 0; i < fieldNames.length; i++) { inputRecord.put(fieldNames[i], row.get(i)); } // Process each Record via the Command pipeline List<Record> outputRecords = MorphlineUtils.executePipeline(pipeline, inputRecord, errorOnEmpty); // Convert each Record into a new Row List<Row> outputRows = Lists.newArrayListWithCapacity(outputRecords.size()); for (Record record : outputRecords) { outputRows.add(MorphlineUtils.convertToRow(outputSchema, record)); } return outputRows.iterator(); } };
@Override public Element _apply(final Row row) { final Element element; final String group = row.getAs(SchemaToStructTypeConverter.GROUP); final Object source = ArrayUtils.contains(row.schema().fieldNames(), SchemaToStructTypeConverter.SRC_COL_NAME) ? row.getAs(SchemaToStructTypeConverter.SRC_COL_NAME) : null; if (null != source) { final Object destination = row.getAs(SchemaToStructTypeConverter.DST_COL_NAME); final boolean directed = row.getAs(SchemaToStructTypeConverter.DIRECTED_COL_NAME); final MatchedVertex matchedVertex; if (ArrayUtils.contains(row.schema().fieldNames(), SchemaToStructTypeConverter.MATCHED_VERTEX_COL_NAME)) { final String matchedVertexStr = row.getAs(SchemaToStructTypeConverter.MATCHED_VERTEX_COL_NAME); matchedVertex = null != matchedVertexStr ? MatchedVertex.valueOf(matchedVertexStr) : null; } else { matchedVertex = null; } element = new Edge(group, source, destination, directed, matchedVertex, null); } else { final Object vertex = ArrayUtils.contains(row.schema().fieldNames(), SchemaToStructTypeConverter.VERTEX_COL_NAME) ? row.getAs(SchemaToStructTypeConverter.VERTEX_COL_NAME) : row.getAs(SchemaToStructTypeConverter.ID); element = new Entity(group, vertex); } getPropertyNames(row).forEach(n -> { element.putProperty(n, row.getAs(n)); }); return element; }
private Row carryForwardWhenNull(Row into, Row from) { if (!doesCarryForward()) { return into; } for (StructField field : into.schema().fields()) { String fieldName = field.name(); if (RowUtils.get(into, fieldName) == null && RowUtils.get(from, fieldName) != null) { into = RowUtils.set(into, fieldName, RowUtils.get(from, fieldName)); } } return into; }
@Test public void testAppendMutationTypeField() { Row rowWithout = new RowWithSchema(schemaWithoutMT, "hello"); Row rowWith = PlannerUtils.appendMutationTypeField(rowWithout); assertEquals(rowWith.schema(), schemaWithMT); }
@Test public void testRemoveMutationTypeField() { Row rowWith = new RowWithSchema(schemaWithMT, "hello", MutationType.DELETE.toString()); Row rowWithout = PlannerUtils.removeMutationTypeField(rowWith); assertEquals(rowWithout.schema(), schemaWithoutMT); }
@Test public void testToRowValueMapRowNested( final @Mocked Row inputRow, final @Mocked StructType innerSchema, final @Mocked StructType outerSchema ) { DataType field = DataTypes.createMapType(DataTypes.StringType, DataTypes.createMapType(DataTypes.StringType, DataTypes.IntegerType, true) ); Map<Object, Object> expectedInnerMap = Maps.newHashMap(); expectedInnerMap.put("field1", 1); expectedInnerMap.put("field2", 2); Map<Object, Object> expectedOuterMap = Maps.newHashMap(); expectedOuterMap.put("outer", expectedInnerMap); new Expectations() {{ inputRow.schema(); returns(outerSchema, innerSchema); outerSchema.fieldNames(); result = new String[] {"outer"}; innerSchema.fieldNames(); result = new String[] {"field1", "field2"}; inputRow.get(0); returns(inputRow, 1); inputRow.get(1); result = 2; }}; assertEquals("Invalid list of values", expectedOuterMap, RowUtils.toRowValue(inputRow, field)); }