public static void fillRow(int rowIndex, JsonConverter[] converters, TypeDescription schema, VectorizedRowBatch batch, JsonObject data) { List<String> fieldNames = schema.getFieldNames(); for (int c = 0; c < converters.length; ++c) { JsonElement field = data.get(fieldNames.get(c)); if (field == null) { batch.cols[c].noNulls = false; batch.cols[c].isNull[rowIndex] = true; } else { converters[c].convert(field, batch.cols[c], rowIndex); } } } }
public StructColumnConverter(TypeDescription schema) { List<TypeDescription> kids = schema.getChildren(); childrenConverters = new JsonConverter[kids.size()]; for (int c = 0; c < childrenConverters.length; ++c) { childrenConverters[c] = createConverter(kids.get(c)); } fieldNames = schema.getFieldNames(); }
private void validatePredicate(Predicate pred) { if (pred instanceof ColumnPredicate) { // check column name String colName = ((ColumnPredicate) pred).columnName; if (!this.schema.getFieldNames().contains(colName)) { throw new IllegalArgumentException("Predicate cannot be applied. " + "Column '" + colName + "' does not exist in ORC schema."); } } else if (pred instanceof Not) { validatePredicate(((Not) pred).child()); } else if (pred instanceof Or) { for (Predicate p : ((Or) pred).children()) { validatePredicate(p); } } }
private static void setStruct(JSONWriter writer, StructColumnVector batch, TypeDescription schema, int row) throws JSONException { writer.object(); List<String> fieldNames = schema.getFieldNames(); List<TypeDescription> fieldTypes = schema.getChildren(); for (int i = 0; i < fieldTypes.size(); ++i) { writer.key(fieldNames.get(i)); setValue(writer, batch.fields[i], fieldTypes.get(i), row); } writer.endObject(); } }
fieldTypes[i] = schemaToTypeInfo(fieldSchemas.get(i)); String[] fieldNames = schema.getFieldNames().toArray(new String[]{}); return new RowTypeInfo(fieldTypes, fieldNames); case LIST:
public static void processRow(JSONWriter writer, VectorizedRowBatch batch, TypeDescription schema, int row) throws JSONException { if (schema.getCategory() == TypeDescription.Category.STRUCT) { List<TypeDescription> fieldTypes = schema.getChildren(); List<String> fieldNames = schema.getFieldNames(); writer.object(); for (int c = 0; c < batch.cols.length; ++c) { writer.key(fieldNames.get(c)); setValue(writer, batch.cols[c], fieldTypes.get(c), row); } writer.endObject(); } else { setValue(writer, batch.cols[0], schema, row); } }
type.addSubtypes(t.getId()); for(String field: schema.getFieldNames()) { type.addFieldNames(field);
public WritableComparable getFieldValue(String fieldName) { int fieldIdx = schema.getFieldNames().indexOf(fieldName); if (fieldIdx == -1) { throw new IllegalArgumentException("Field " + fieldName + " not found in " + schema); } return fields[fieldIdx]; }
public void setFieldValue(String fieldName, WritableComparable value) { int fieldIdx = schema.getFieldNames().indexOf(fieldName); if (fieldIdx == -1) { throw new IllegalArgumentException("Field " + fieldName + " not found in " + schema); } fields[fieldIdx] = value; }
private boolean[] project(List<String> columns) { assert this.schema != null; List<String> fields = this.schema.getFieldNames(); boolean[] toInclude = new boolean[fields.size() + 1]; // 1 for the Category.STRUCT TypeDescription, which has index 0. // This does not seem to be documented in the Orc file format... toInclude[0] = false; // the struct type itself for (int i = 0; i < fields.size(); i++) { String field = fields.get(i); toInclude[i + 1] = columns.contains(field); } return toInclude; }
private boolean hasColumnNames(TypeDescription fileSchema) { if (fileSchema.getCategory() != TypeDescription.Category.STRUCT) { return true; } for (String fieldName : fileSchema.getFieldNames()) { if (!missingMetadataPattern.matcher(fieldName).matches()) { return true; } } return false; }
private static void printRow(SpecializedGetters row, TypeDescription schema) { List<TypeDescription> children = schema.getChildren(); System.out.print("{"); for(int c = 0; c < children.size(); ++c) { System.out.print("\"" + schema.getFieldNames().get(c) + "\": "); printRow(row, c, children.get(c)); } System.out.print("}"); }
private static boolean checkAcidSchema(TypeDescription type) { if (type.getCategory().equals(TypeDescription.Category.STRUCT)) { List<String> rootFields = type.getFieldNames(); if (rootFields.size() != acidEventFieldNames.size()) { return false; } for (int i = 0; i < rootFields.size(); i++) { if (!acidEventFieldNames.get(i).equalsIgnoreCase(rootFields.get(i))) { return false; } } return true; } return false; }
private static List<ColumnDescription> getDescriptions(TypeDescription complex) { if (complex.getCategory() != TypeDescription.Category.STRUCT) throw new RuntimeException("Expected a Struct TypeDescription, got " + complex); List<String> fields = complex.getFieldNames(); List<TypeDescription> types = complex.getChildren(); assert fields.size() == types.size(); List<ColumnDescription> result = new ArrayList<ColumnDescription>(fields.size()); for (int i = 0; i < fields.size(); i++) { ContentsKind kind = getKind(types.get(i)); ColumnDescription desc = new ColumnDescription(fields.get(i), kind); result.add(desc); } return result; }
static void printStruct(PrintStream output, int offset, TypeDescription type) { output.print("<"); List<TypeDescription> children = type.getChildren(); List<String> fields = type.getFieldNames(); for(int c = 0; c < children.size(); ++c) { if (c == 0) { output.println(); } else { output.println(","); } pad(output, offset + 2); printFieldName(output, fields.get(c)); output.print(':'); printType(output, offset + 2, children.get(c)); } output.print('>'); }
public static int[] requestedColumnIds( boolean isCaseSensitive, String[] requiredFieldNames, String[] schemaFieldNames, Reader reader) { String[] orcFieldNames = reader.getSchema().getFieldNames().toArray(new String[0]); if (orcFieldNames.length == 0) { // Some old empty ORC files always have an empty schema stored in their footer. return null; } else { boolean isHiveOrcFile = Arrays.stream(orcFieldNames).allMatch(field -> field.startsWith("_col")); List<Integer> ret = new ArrayList<>(); if (isHiveOrcFile) { // This is a ORC file written by Hive. // assume the required field names are the physical field names. for (String field : requiredFieldNames) { int index = fieldIndex(field, schemaFieldNames, isCaseSensitive); ret.add(index); } } else { for (String field : requiredFieldNames) { int index = fieldIndex(field, orcFieldNames, isCaseSensitive); ret.add(index); } } return ret.stream().mapToInt(i -> i).toArray(); } }
this.fieldNames = this.fileReaders.get(0).getSchema().getFieldNames();
public List<IOrcInputField> buildInputFields( TypeDescription typeDescription ) { List<IOrcInputField> inputFields = new ArrayList<IOrcInputField>(); Iterator fieldNameIterator = typeDescription.getFieldNames().iterator(); for ( TypeDescription subDescription : typeDescription.getChildren() ) { //Assume getFieldNames is 1:1 with getChildren String fieldName = (String) fieldNameIterator.next(); int formatType = determineFormatType( subDescription ); if ( formatType != -1 ) { //Skip orc types we do not support int metaType = determineMetaType( subDescription ); if ( metaType == -1 ) { throw new IllegalStateException( "Orc Field Name: " + fieldName + " - Could not find pdi field type for " + subDescription.getCategory() .getName() ); } OrcInputField inputField = new OrcInputField(); inputField.setFormatFieldName( fieldName ); inputField.setFormatType( formatType ); inputField.setPentahoType( metaType ); inputField.setPentahoFieldName( fieldName ); inputFields.add( inputField ); } } return inputFields; }
List<String> fieldNames = schema.getFieldNames(); List<TypeDescription> fields = schema.getChildren(); for (String column: selectedColumns.split((","))) {
for ( String orcFieldName : typeDescription.getFieldNames() ) { orcColumnNumberMap.put( orcFieldName, orcFieldNumber++ );