public AvroColumnWriter(Schema s, ColumnFileMetaData meta, GenericData model) throws IOException { this.schema = s; AvroColumnator columnator = new AvroColumnator(s); meta.set(SCHEMA_KEY, s.toString()); // save schema in file this.writer = new ColumnFileWriter(meta, columnator.getColumns()); this.arrayWidths = columnator.getArrayWidths(); this.model = model; }
private void columnize(String path, Schema s, ColumnMetaData parent, boolean isArray) { if (isSimple(s)) { if (path == null) path = s.getFullName(); addColumn(path, simpleValueType(s), parent, isArray); return; path = path == null ? ">" : path+">"; int start = columns.size(); ColumnMetaData p = addColumn(path, ValueType.NULL, parent, true); addColumn(p(path,"key", ""), ValueType.STRING, p, false); columnize(p(path,"value", ""), s.getValueType(), p, false); columnize(p(path, field.name(), "#"), field.schema(), parent, isArray); break; case ARRAY: path = path == null ? "[]" : path+"[]"; addArrayColumn(path, s.getElementType(), parent); break; case UNION: for (Schema branch : s.getTypes()) // array per non-null branch if (branch.getType() != Schema.Type.NULL) addArrayColumn(p(path, branch, "/"), branch, parent); break; default:
private void addArrayColumn(String path, Schema element, ColumnMetaData parent) { if (path == null) path = element.getFullName(); if (isSimple(element)) { // optimize simple arrays addColumn(path, simpleValueType(element), parent, true); return; } // complex array: insert a parent column with lengths int start = columns.size(); ColumnMetaData array = addColumn(path, ValueType.NULL, parent, true); columnize(path, element, array, false); arrayWidths.set(start, columns.size()-start); // fixup with actual width }
private void check(Schema s, ColumnMetaData... expected) throws Exception { ColumnMetaData[] shredded = new AvroColumnator(s).getColumns(); assertEquals(expected.length, shredded.length); for (int i = 0; i < expected.length; i++) assertEquals(expected[i].toString(), shredded[i].toString()); checkWrite(s); checkRead(s); }
private Object read(Schema s) throws IOException { if (isSimple(s)) return nextValue(s, column++); this.column = startColumn; Object value = nextValue(s, column++); if (!isSimple(s.getElementType())) value = read(s.getElementType()); elements.add(value); value = nextValue(branch, column); column++; if (!isSimple(branch)) value = read(branch); } else {
private String p(String parent, Schema child, String sep) { if (child.getType() == Schema.Type.UNION) return parent; return p(parent, child.getFullName(), sep); }
public AvroColumnator(Schema schema) { this.schema = schema; columnize(null, schema, null, false); }
private void check(Schema s, ColumnMetaData... expected) throws Exception { ColumnMetaData[] shredded = new AvroColumnator(s).getColumns(); assertEquals(expected.length, shredded.length); for (int i = 0; i < expected.length; i++) assertEquals(expected[i].toString(), shredded[i].toString()); checkWrite(s); checkRead(s); }
private void addArrayColumn(String path, Schema element, ColumnMetaData parent) { if (path == null) path = element.getFullName(); if (isSimple(element)) { // optimize simple arrays addColumn(path, simpleValueType(element), parent, true); return; } // complex array: insert a parent column with lengths int start = columns.size(); ColumnMetaData array = addColumn(path, ValueType.NULL, parent, true); columnize(path, element, array, false); arrayWidths.set(start, columns.size()-start); // fixup with actual width }
private int write(Object o, Schema s, int column) throws IOException { if (isSimple(s)) { writeValue(o, s, column); return column+1; Collection elements = (Collection)o; writer.writeLength(elements.size(), column); if (isSimple(s.getElementType())) { // optimize simple arrays for (Object element : elements) writeValue(element, s.getElementType(), column); } else { writer.writeLength(1, column); if (isSimple(branch)) { writeValue(o, branch, column++); } else {
private String p(String parent, Schema child, String sep) { if (child.getType() == Schema.Type.UNION) return parent; return p(parent, child.getFullName(), sep); }
public AvroColumnator(Schema schema) { this.schema = schema; columnize(null, schema, null, false); }
void initialize() throws IOException { // compute a mapping from column name to number for file Map<String,Integer> fileColumnNumbers = new HashMap<>(); int i = 0; for (ColumnMetaData c : new AvroColumnator(fileSchema).getColumns()) fileColumnNumbers.put(c.getName(), i++); // create iterator for each column in readSchema AvroColumnator readColumnator = new AvroColumnator(readSchema); this.arrayWidths = readColumnator.getArrayWidths(); ColumnMetaData[] readColumns = readColumnator.getColumns(); this.values = new ColumnValues[readColumns.length]; int j = 0; for (ColumnMetaData c : readColumns) { Integer n = fileColumnNumbers.get(c.getName()); if (n != null) values[j++] = reader.getValues(n); } findDefaults(readSchema, fileSchema); }
private void columnize(String path, Schema s, ColumnMetaData parent, boolean isArray) { if (isSimple(s)) { if (path == null) path = s.getFullName(); addColumn(path, simpleValueType(s), parent, isArray); return; path = path == null ? ">" : path+">"; int start = columns.size(); ColumnMetaData p = addColumn(path, ValueType.NULL, parent, true); addColumn(p(path,"key", ""), ValueType.STRING, p, false); columnize(p(path,"value", ""), s.getValueType(), p, false); columnize(p(path, field.name(), "#"), field.schema(), parent, isArray); break; case ARRAY: path = path == null ? "[]" : path+"[]"; addArrayColumn(path, s.getElementType(), parent); break; case UNION: for (Schema branch : s.getTypes()) // array per non-null branch if (branch.getType() != Schema.Type.NULL) addArrayColumn(p(path, branch, "/"), branch, parent); break; default:
private Object read(Schema s) throws IOException { if (isSimple(s)) return nextValue(s, column++); this.column = startColumn; Object value = nextValue(s, column++); if (!isSimple(s.getElementType())) value = read(s.getElementType()); elements.add(value); value = nextValue(s, column); column++; if (!isSimple(branch)) value = read(branch); } else {
public AvroColumnWriter(Schema s, ColumnFileMetaData meta, GenericData model) throws IOException { this.schema = s; AvroColumnator columnator = new AvroColumnator(s); meta.set(SCHEMA_KEY, s.toString()); // save schema in file this.writer = new ColumnFileWriter(meta, columnator.getColumns()); this.arrayWidths = columnator.getArrayWidths(); this.model = model; }
private int write(Object o, Schema s, int column) throws IOException { if (isSimple(s)) { writeValue(o, s, column); return column+1; Collection elements = (Collection)o; writer.writeLength(elements.size(), column); if (isSimple(s.getElementType())) { // optimize simple arrays for (Object element : elements) writeValue(element, s.getElementType(), column); } else { writer.writeLength(1, column); if (isSimple(branch)) { writeValue(o, branch, column++); } else {
void initialize() throws IOException { // compute a mapping from column name to number for file Map<String,Integer> fileColumnNumbers = new HashMap<String,Integer>(); int i = 0; for (ColumnMetaData c : new AvroColumnator(fileSchema).getColumns()) fileColumnNumbers.put(c.getName(), i++); // create iterator for each column in readSchema AvroColumnator readColumnator = new AvroColumnator(readSchema); this.arrayWidths = readColumnator.getArrayWidths(); ColumnMetaData[] readColumns = readColumnator.getColumns(); this.values = new ColumnValues[readColumns.length]; int j = 0; for (ColumnMetaData c : readColumns) { Integer n = fileColumnNumbers.get(c.getName()); if (n == null) throw new TrevniRuntimeException("No column named: "+c.getName()); values[j++] = reader.getValues(n); } }