@Override public Row get() { Row row = new Row(); for (Map.Entry<String, Object> entry : map.entrySet()) { row.add(entry.getKey(), entry.getValue()); } return row; }
/** * Recursively flattens JSON until the 'depth' is reached. * * @param root of the JSONObject * @param field name to be used to be stored in the row. * @param depth current depth into JSON structure. * @param maxDepth maximum depth to reach * @param row to which the flatten fields need to be added. */ public static void jsonFlatten(JsonObject root, String field, int depth, int maxDepth, Row row) { if (depth > maxDepth) { row.addOrSet(String.format("%s", field), root); return; } Iterator<Map.Entry<String, JsonElement>> elements = root.entrySet().iterator(); while(elements.hasNext()) { Map.Entry<String, JsonElement> next = elements.next(); String key = next.getKey(); JsonElement element = next.getValue(); if (element instanceof JsonObject) { jsonFlatten(element.getAsJsonObject(), String.format("%s_%s", field, key), depth + 1, maxDepth, row); } else { row.add(String.format("%s_%s", field, key), getValue(element)); } } }
@Override public List<Row> execute(List<Row> rows, ExecutorContext context) throws DirectiveExecutionException { for (Row row : rows) { int idx = row.find(column); UUID uuid = new UUID(random.nextLong(), random.nextLong()); if (idx != -1) { row.setValue(idx, uuid.toString()); } else { row.add(column, uuid.toString()); } } return rows; } }
/** * Flattens the {@link GenericRecord}. * * @param genericRecord to be flattened. * @param row to be flattened into * @param name of the field to be flattened. */ private void add(GenericRecord genericRecord, Row row, String name) { List<Schema.Field> fields = genericRecord.getSchema().getFields(); String colname; for (Schema.Field field : fields) { Object v = genericRecord.get(field.name()); if (name != null) { colname = String.format("%s_%s", name, field.name()); } else { colname = field.name(); } if (v instanceof GenericRecord) { add((GenericRecord) v, row, colname); } else if (v instanceof Map || v instanceof List) { row.add(colname, gson.toJson(v)); } else if (v instanceof Utf8) { row.add(colname, v.toString()); } else { row.add(colname, genericRecord.get(field.name())); } } } }
@Override public List<Row> execute(List<Row> rows, ExecutorContext context) throws DirectiveExecutionException { for (Row row : rows) { int idx = row.find(column); if (idx != -1) { Object object = row.getValue(idx); if (object != null && object instanceof String) { String code = (String) object; StaticCatalog.Entry value = catalog.lookup(code); if (value != null) { row.add(String.format("%s_%s_description", column, name), value.getDescription()); } else { row.add(String.format("%s_%s_description", column, name), null); } } else { row.add(String.format("%s_%s_description", column, name), null); } } else { row.add(String.format("%s_%s_description", column, name), null); } } return rows; } }
@Override public List<Row> execute(List<Row> rows, ExecutorContext context) throws DirectiveExecutionException { for (Row row : rows) { int idx = row.find(column); if (idx != -1) { Object value = row.getValue(idx); if (value != null && value instanceof String) { Matcher matcher = pattern.matcher((String) value); int count = 1; while (matcher.find()) { for(int i = 1; i <= matcher.groupCount(); i++) { row.add(String.format("%s_%d_%d", column, count, i), matcher.group(i)); } count++; } } } } return rows; } }
/** * Converts a {@link CSVRecord} to {@link Row}. * * @param record * @return */ private void toRow(CSVRecord record, Row row) { int size = headers.size(); for ( int i = 0; i < record.size(); i++) { if (size > 0) { row.add(headers.get(i), record.get(i)); } else { row.add(columnArg.value() + "_" + (i + 1), record.get(i)); } } }
@Override public List<Row> execute(List<Row> rows, ExecutorContext context) throws DirectiveExecutionException { List<Row> results = new ArrayList<>(); for (Row row : rows) { int idx1 = row.find(col1); int idx2 = row.find(col2); if (idx1 != -1 && idx2 != -1) { StringBuilder builder = new StringBuilder(); builder.append(row.getValue(idx1)); builder.append(delimiter); builder.append(row.getValue(idx2)); row.add(dest, builder.toString()); } results.add(row); } return results; } }
@Override public List<Row> execute(List<Row> rows, ExecutorContext context) throws DirectiveExecutionException { List<Row> results = new ArrayList<>(); for (Row row : rows) { int idx = row.find(column); if (idx != -1) { Object object = row.getValue(idx); if (object instanceof String) { String[] lines = ((String) object).split(regex); int i = 1; for (String line : lines) { row.add(String.format("%s_%d", column, i), line); ++i; } results.add(row); } else { throw new DirectiveExecutionException( String.format("%s : Invalid type '%s' of column '%s'. Should be of type String.", toString(), object != null ? object.getClass().getName() : "null", column) ); } } } return results; } }
/** * Adds or sets the value. * * @param name of the field to be either set or added to record. * @param value to be added. */ public void addOrSet(String name, Object value) { int idx = find(name); if (idx != -1) { setValue(idx, value); } else { add(name, value); } }
@Override public List<Row> execute(List<Row> rows, ExecutorContext context) throws DirectiveExecutionException { List<Row> results = new ArrayList<>(); for (Row row : rows) { int idx = row.find(col); if (idx != -1) { String val = (String) row.getValue(idx); if (end > val.length() - 1) { end = val.length() - 1; } if (start < 0) { start = 0; } val = val.substring(start, end); row.add(dest, val); } else { throw new DirectiveExecutionException( col + " is not of type string in the row. Please check the wrangle configuration." ); } results.add(row); } return results; } }
private void decodeMessage(Message message, Row row, String root) { for (Map.Entry<Descriptors.FieldDescriptor, Object> field : message.getAllFields().entrySet()) { String name = field.getKey().getName(); String fullName = String.format("%s", name); if (root != null) { fullName = String.format("%s_%s", root, name); } Descriptors.FieldDescriptor.Type type = field.getKey().getType(); Object value = field.getValue(); switch(type) { case MESSAGE: for (Message msg : (List<Message>) value) { decodeMessage(msg, row, fullName); } break; case ENUM: row.add(fullName, ((Descriptors.EnumValueDescriptor) value).getName()); break; default: row.add(fullName, value); break; } } } }
@Override public List<Row> execute(List<Row> rows, ExecutorContext context) throws DirectiveExecutionException { ensureInitialized(context); for (Row row : rows) { int idx = row.find(column); if (idx == -1) { continue; } Object object = row.getValue(idx); if (!(object instanceof String)) { throw new DirectiveExecutionException( String.format("%s : Invalid type '%s' of column '%s'. Should be of type String.", toString(), object != null ? object.getClass().getName() : "null", column) ); } co.cask.cdap.api.dataset.table.Row lookedUpRow = tableLookup.lookup((String) object); for (Map.Entry<byte[], byte[]> entry : lookedUpRow.getColumns().entrySet()) { row.add(column + "_" + Bytes.toString(entry.getKey()), Bytes.toString(entry.getValue())); } } return rows; } }
@Override public List<Row> execute(List<Row> rows, ExecutorContext context) throws DirectiveExecutionException { for (Row row : rows) { try { final ByteArrayOutputStream bOut = new ByteArrayOutputStream(); try(Writer out = new BufferedWriter(new OutputStreamWriter(bOut))) { CSVPrinter csvPrinter = new CSVPrinter(out, CSVFormat.DEFAULT); for (int i = 0; i < row.length(); ++i) { csvPrinter.print(row.getValue(i)); } csvPrinter.flush(); csvPrinter.close(); } catch (Exception e) { bOut.close(); } row.add(column, bOut.toString()); } catch (IOException e) { throw new DirectiveExecutionException(toString() + " : Failed to write CSV row. " + e.getMessage()); } } return rows; } }
@Override public List<Row> execute(List<Row> rows, ExecutorContext context) throws DirectiveExecutionException, ErrorRowException { for (Row row : rows) { int idx = row.find(column); if (idx != -1) { Object object = row.getValue(idx); if (object instanceof String) { Parser parser = new Parser(timezone); List<DateGroup> groups = parser.parse((String) object); int i = 1; for (DateGroup group : groups) { List<Date> dates = group.getDates(); for (Date date : dates) { row.add(String.format("%s_%d", column, i), date.toInstant().atZone(timezone.toZoneId())); } i++; } } else { throw new ErrorRowException( String.format("%s : Invalid type '%s' of column '%s'. Should be of type String.", toString(), object != null ? object.getClass().getName() : "null", column), 1 ); } } } return rows; } }
@Override public List<Row> execute(List<Row> rows, ExecutorContext context) throws DirectiveExecutionException { for (Row row : rows) { int idx = row.find(column); if (idx == -1) { row.add(column, value); continue; } Object object = row.getValue(idx); if (object == null) { row.setValue(idx, value); } else { if (object instanceof String) { if (((String) object).isEmpty()) { row.setValue(idx, value); } } else if (object instanceof JSONObject) { if (JSONObject.NULL.equals(object)) { row.setValue(idx, value); } } } } return rows; } }
@Override public List<Row> execute(List<Row> rows, ExecutorContext context) throws DirectiveExecutionException { for (Row row : rows) { int idx = row.find(column); if (idx != -1) { String value = TypeConvertor.toString(row.getValue(idx)); if (value == null) { continue; } row.setValue(idx, maskNumber(value, mask)); } else { row.add(column, new String("")); } } return rows; }
@Override public List<Row> execute(List<Row> rows, ExecutorContext context) throws DirectiveExecutionException { for (Row row : rows) { int sidx = row.find(source.value()); if (sidx == -1) { throw new DirectiveExecutionException(MSG.get("column.not.found", toString(), source.value())); } int didx = row.find(destination.value()); // If source and destination are same, then it's a nop. if (didx == sidx) { continue; } if (didx == -1) { // if destination column doesn't exist then add it. row.add(destination.value(), row.getValue(sidx)); } else { // if destination column exists, and force is set to false, then throw exception, else // overwrite it. if (!force) { throw new DirectiveExecutionException(toString() + " : Destination column '" + destination.value() + "' does not exist in the row. Use 'force' option to add new column."); } row.setValue(didx, row.getValue(sidx)); } } return rows; } }
@Override public List<Row> execute(List<Row> rows, ExecutorContext context) throws DirectiveExecutionException { for (Row row : rows) { // Move the fields from the row into the context. ELContext ctx = new ELContext(context); ctx.set("this", row); for(String var : el.variables()) { ctx.set(var, row.getValue(var)); } // Execution of the script / expression based on the row data // mapped into context. try { ELResult result = el.execute(ctx); int idx = row.find(this.column); if (idx == -1) { row.add(this.column, result.getObject()); } else { row.setValue(idx, result.getObject()); } } catch (ELException e) { throw new DirectiveExecutionException(e.getMessage()); } } return rows; } }