String fieldName = field.getName(); if (excelColumnValueMap.containsKey(fieldName)) { builder.convertAndSet(fieldName, excelColumnValueMap.get(fieldName)); } else { builder.set(fieldName, NULL); builder.set(FILE, new Path(fileName).getName()); builder.set(SHEET, sheetName); emitter.emit(builder.build()); case WRITE_ERROR_DATASET: StructuredRecord.Builder errorRecordBuilder = StructuredRecord.builder(errorRecordSchema); errorRecordBuilder.set(KEY, fileName + "_" + sheetName + "_" + excelRecord[0]); errorRecordBuilder.set(FILE, fileName); errorRecordBuilder.set(SHEET, sheetName); errorRecordBuilder.set(RECORD, inputValue); Table errorTable = batchRuntimeContext.getDataset(excelInputreaderConfig.errorDatasetName); errorTable.write(errorRecordBuilder.build()); break; default:
builder.set(field.getName(), fromRowValue(row.getList(idx), fieldSchema, fieldPath)); } else if (fieldSchema.getType() == Schema.Type.MAP) { builder.set(field.getName(), fromRowValue(row.getJavaMap(idx), fieldSchema, fieldPath)); } else { Object fieldValue = row.get(idx); fieldValue = ((Timestamp) fieldValue).getTime(); builder.set(field.getName(), fromRowValue(fieldValue, fieldSchema, fieldPath)); return builder.build();
builder.set(field.getName(), fromRowValue(row.getList(idx), fieldSchema, fieldPath)); } else if (fieldSchema.getType() == Schema.Type.MAP) { builder.set(field.getName(), fromRowValue(row.getJavaMap(idx), fieldSchema, fieldPath)); } else { Object fieldValue = row.get(idx); fieldValue = ((Timestamp) fieldValue).getTime(); builder.set(field.getName(), fromRowValue(fieldValue, fieldSchema, fieldPath)); return builder.build();
builder.set(field.getName(), fromRowValue(row.getList(idx), fieldSchema, fieldPath)); } else if (fieldSchema.getType() == Schema.Type.MAP) { builder.set(field.getName(), fromRowValue(row.getJavaMap(idx), fieldSchema, fieldPath)); } else { Object fieldValue = row.get(idx); fieldValue = ((Timestamp) fieldValue).getTime(); builder.set(field.getName(), fromRowValue(fieldValue, fieldSchema, fieldPath)); return builder.build();
Object decodedObj = decode(name, value, field.getSchema()); if (decodedObj instanceof LocalDate) { builder.setDate(name, (LocalDate) decodedObj); } else if (decodedObj instanceof LocalTime) { builder.setTime(name, (LocalTime) decodedObj); } else if (decodedObj instanceof ZonedDateTime) { builder.setTimestamp(name, (ZonedDateTime) decodedObj); } else { builder.set(name, decodedObj); return builder.build();
Map<String, String> headers = Objects.firstNonNull(event.getHeaders(), ImmutableMap.<String, String>of()); StructuredRecord output = StructuredRecord.builder(DEFAULT_SCHEMA) .set("ts", event.getTimestamp()) .set("headers", headers) .set("body", event.getBody()) .build(); emitter.emit(output); } else { builder.set("ts", key instanceof LongWritable ? ((LongWritable) key).get() : (Long) key); builder.set("headers", headers); builder.set(fieldName, record.get(fieldName)); emitter.emit(builder.build());
if (inField.getName().equals(config.lookupKey)) { if (lookedUpValue instanceof String) { outputBuilder.set(config.destinationField, lookedUpValue); } else { outputBuilder.set(Bytes.toString(entry.getKey()), Bytes.toString(entry.getValue())); outputBuilder.set(inField.getName(), input.get(inField.getName())); emitter.emit(outputBuilder.build());
@Override public void aggregate(StructuredRecord groupKey, Iterator<StructuredRecord> iterator, Emitter<StructuredRecord> emitter) throws Exception { if (!iterator.hasNext()) { return; } SelectionFunction selectionFunction; if (filterFunction == null) { emitter.emit(iterator.next()); } else { StructuredRecord firstRecord = iterator.next(); Schema.Field firstField = firstRecord.getSchema().getField(filterFunction.getField()); selectionFunction = filterFunction.getSelectionFunction(firstField.getSchema()); selectionFunction.beginFunction(); selectionFunction.operateOn(firstRecord); while (iterator.hasNext()) { selectionFunction.operateOn(iterator.next()); } List<StructuredRecord> outputRecords = selectionFunction.getSelectedRecords(); for (StructuredRecord outputRecord : outputRecords) { Schema outputSchema = getOutputSchema(outputRecord.getSchema()); StructuredRecord.Builder builder = StructuredRecord.builder(outputSchema); for (Schema.Field field : outputRecord.getSchema().getFields()) { builder.set(field.getName(), outputRecord.get(field.getName())); } emitter.emit(builder.build()); } } }
@Override public void transform(StructuredRecord valueIn, Emitter<StructuredRecord> emitter) { Schema inputSchema = valueIn.getSchema(); Schema outputSchema = getOutputSchema(inputSchema); StructuredRecord.Builder builder = StructuredRecord.builder(outputSchema); for (Schema.Field inputField : inputSchema.getFields()) { String inputFieldName = inputField.getName(); if (!fieldsToKeep.isEmpty() && !fieldsToKeep.contains(inputFieldName)) { continue; } else if (fieldsToDrop.contains(inputFieldName)) { continue; } // get the corresponding output field name String outputFieldName = fieldsToRename.get(inputFieldName); if (outputFieldName == null) { outputFieldName = inputFieldName; } Schema.Field outputField = outputSchema.getField(outputFieldName); Object inputVal = valueIn.get(inputFieldName); // if we need to convert the value, convert it. otherwise just pass the value through if (fieldsToConvert.containsKey(inputFieldName)) { convertAndSet(builder, outputFieldName, inputVal, inputField.getSchema(), outputField.getSchema()); } else { builder.set(outputFieldName, inputVal); } } emitter.emit(builder.build()); }
@Override public StructuredRecord read(Row row, Schema sourceSchema) throws IOException { Preconditions.checkArgument(sourceSchema.getType() == Schema.Type.RECORD, "Source schema must be a record."); initializeRead(sourceSchema); StructuredRecord.Builder builder = StructuredRecord.builder(schema); // if one of the fields should come from the row key, add it. if (rowFieldName != null) { builder.set(rowFieldName, rowKeyFunction.convert(row.getRow())); } // go through the Row columns and add their values to the record try { for (Schema.Field sourceField : sourceSchema.getFields()) { String sourceFieldName = sourceField.getName(); Schema.Field targetField = schema.getField(sourceFieldName); // the Row may contain more fields than our target schema. Skip those fields that are not in the target schema, // as well as the row key field since it comes from the row key and not the columns. if (targetField == null || targetField.getName().equals(rowFieldName)) { advanceField(); continue; } builder.set(sourceFieldName, read(row, sourceField.getSchema(), targetField.getSchema(), type)); } return builder.build(); } catch (Exception e) { throw propagate(e); } }
@Override public StructuredRecord read(Row row, Schema sourceSchema) throws IOException { Preconditions.checkArgument(sourceSchema.getType() == Schema.Type.RECORD, "Source schema must be a record."); initializeRead(sourceSchema); StructuredRecord.Builder builder = StructuredRecord.builder(schema); // if one of the fields should come from the row key, add it. if (rowFieldName != null) { builder.set(rowFieldName, rowKeyFunction.convert(row.getRow())); } // go through the Row columns and add their values to the record try { for (Schema.Field sourceField : sourceSchema.getFields()) { String sourceFieldName = sourceField.getName(); Schema.Field targetField = schema.getField(sourceFieldName); // the Row may contain more fields than our target schema. Skip those fields that are not in the target schema, // as well as the row key field since it comes from the row key and not the columns. if (targetField == null || targetField.getName().equals(rowFieldName)) { advanceField(); continue; } builder.set(sourceFieldName, read(row, sourceField.getSchema(), targetField.getSchema(), type)); } return builder.build(); } catch (Exception e) { throw propagate(e); } }
@Override public StructuredRecord make(Schema schema, Iterator<String> bodyFields) { StructuredRecord.Builder builder = StructuredRecord.builder(schema); Iterator<Schema.Field> fieldsIterator = schema.getFields().iterator(); while (fieldsIterator.hasNext()) { Schema.Field field = fieldsIterator.next(); Schema fieldSchema = field.getSchema(); String fieldName = field.getName(); if (isStringArray(fieldSchema)) { if (!fieldsIterator.hasNext()) { // only do varargs-style string array parsing on bodyField if it's the last field List<String> fields = Lists.newArrayList(bodyFields); builder.set(fieldName, fields.toArray(new String[fields.size()])); } else { throw new UnexpectedFormatException( String.format("string array type field '%s' must be the last schema field", fieldName)); } } else { // simple type (not string array) String bodyField = bodyFields.hasNext() ? bodyFields.next() : null; String val = parseBodyValue(bodyField, fieldSchema); builder.convertAndSet(fieldName, val); } } return builder.build(); } }
@Override public StructuredRecord make(Schema schema, Iterator<String> bodyFields) { StructuredRecord.Builder builder = StructuredRecord.builder(schema); Iterator<Schema.Field> fieldsIterator = schema.getFields().iterator(); while (fieldsIterator.hasNext()) { Schema.Field field = fieldsIterator.next(); Schema fieldSchema = field.getSchema(); String fieldName = field.getName(); if (isStringArray(fieldSchema)) { if (!fieldsIterator.hasNext()) { // only do varargs-style string array parsing on bodyField if it's the last field List<String> fields = Lists.newArrayList(bodyFields); builder.set(fieldName, fields.toArray(new String[fields.size()])); } else { throw new UnexpectedFormatException( String.format("string array type field '%s' must be the last schema field", fieldName)); } } else { // simple type (not string array) String bodyField = bodyFields.hasNext() ? bodyFields.next() : null; String val = parseBodyValue(bodyField, fieldSchema); builder.convertAndSet(fieldName, val); } } return builder.build(); } }
@Override public StructuredRecord merge(StructuredRecord joinKey, Iterable<JoinElement<StructuredRecord>> joinRow) { StructuredRecord record = null; boolean containsDupe = false; for (JoinElement<StructuredRecord> element : joinRow) { if (element.getStageName().equals(config.keep)) { record = element.getInputRecord(); } else { containsDupe = true; } } if (record == null) { // can only happen if 'keep' was a macro and did not evaluate to one of the inputs throw new IllegalArgumentException("No record for " + config.keep + " was found."); } Schema outputSchema = getOutputSchema(record.getSchema()); StructuredRecord.Builder outputBuilder = StructuredRecord.builder(outputSchema) .set(config.flagField, containsDupe); for (Schema.Field field : record.getSchema().getFields()) { outputBuilder.set(field.getName(), record.get(field.getName())); } return outputBuilder.build(); }
@Override public void aggregate(StructuredRecord groupKey, Iterator<StructuredRecord> iterator, Emitter<StructuredRecord> emitter) throws Exception { if (!iterator.hasNext()) { return; } StructuredRecord firstVal = iterator.next(); initAggregates(firstVal.getSchema()); StructuredRecord.Builder builder = StructuredRecord.builder(outputSchema); for (String groupByField : groupByFields) { builder.set(groupByField, groupKey.get(groupByField)); } updateAggregates(firstVal); while (iterator.hasNext()) { updateAggregates(iterator.next()); } for (Map.Entry<String, AggregateFunction> aggregateFunction : aggregateFunctions.entrySet()) { builder.set(aggregateFunction.getKey(), aggregateFunction.getValue().getAggregate()); } emitter.emit(builder.build()); }
private void convertAndSet(StructuredRecord.Builder builder, String fieldName, Object val, Schema inputSchema, Schema outputSchema) { // guaranteed that if the input type is nullable, the output type is also nullable. Schema.Type inputType = inputSchema.getType(); Schema.Type outputType = outputSchema.getType(); if (inputSchema.isNullable()) { if (val == null) { builder.set(fieldName, null); return; } inputType = inputSchema.getNonNullable().getType(); outputType = outputSchema.getNonNullable().getType(); } // if the input is a string, try and do some sensible conversion if (inputType == Schema.Type.STRING) { builder.convertAndSet(fieldName, (String) val); } else { // otherwise, just try to cast it. builder.set(fieldName, convertPrimitive(val, inputType, outputType)); } }
@Override public StructuredRecord joinOn(String stageName, StructuredRecord record) throws Exception { List<Schema.Field> fields = new ArrayList<>(); Schema schema = record.getSchema(); // TODO create output record based on fields properties Map<String, List<String>> stageToJoinKey = config.getJoinKeys(); List<String> joinKeys = stageToJoinKey.get(stageName); int i = 1; for (String joinKey : joinKeys) { Schema.Field joinField = Schema.Field.of(String.valueOf(i++), schema.getField(joinKey).getSchema()); fields.add(joinField); } Schema keySchema = Schema.recordOf("join.key", fields); StructuredRecord.Builder keyRecordBuilder = StructuredRecord.builder(keySchema); i = 1; for (String joinKey : joinKeys) { keyRecordBuilder.set(String.valueOf(i++), record.get(joinKey)); } return keyRecordBuilder.build(); }