switch (schema.getCategory()) { case BOOLEAN: setLongValue(vector, row, ((BooleanWritable) value).get() ? 1 : 0); break; case BYTE: setLongValue(vector, row, ((ByteWritable) value).get()); break; case SHORT: setLongValue(vector, row, ((ShortWritable) value).get()); break; case INT: setLongValue(vector, row, ((IntWritable) value).get()); break; case LONG: setLongValue(vector, row, ((LongWritable) value).get()); break; case FLOAT: setDoubleValue(vector, row, ((FloatWritable) value).get()); break; case DOUBLE: setDoubleValue(vector, row, ((DoubleWritable) value).get()); break; case STRING: setBinaryValue(vector, row, (Text) value); break; case CHAR: setCharValue((BytesColumnVector) vector, row, (Text) value, schema.getMaxLength()); break;
static void setStructValue(TypeDescription schema, StructColumnVector vector, int row, OrcStruct value) { List<TypeDescription> children = schema.getChildren(); for(int c=0; c < value.getNumFields(); ++c) { setColumn(children.get(c), vector.fields[c], row, value.getFieldValue(c)); } }
static void setCharValue(BytesColumnVector vector, int row, Text value, int length) { // we need to trim or pad the string with spaces to required length int actualLength = value.getLength(); if (actualLength >= length) { setBinaryValue(vector, row, value, length); } else { byte[] spaces = SPACE_BUFFER.get(); if (length - actualLength > spaces.length) { spaces = new byte[length - actualLength]; Arrays.fill(spaces, (byte)' '); SPACE_BUFFER.set(spaces); } vector.setConcat(row, value.getBytes(), 0, actualLength, spaces, 0, length - actualLength); } }
@Override public RecordWriter<NullWritable, V> getRecordWriter(FileSystem fileSystem, JobConf conf, String name, Progressable progressable ) throws IOException { Path path = getTaskOutputPath(conf, name); Writer writer = OrcFile.createWriter(path, buildOptions(conf).fileSystem(fileSystem)); return new OrcMapredRecordWriter<>(writer); } }
static void setUnionValue(TypeDescription schema, UnionColumnVector vector, int row, OrcUnion value) { List<TypeDescription> children = schema.getChildren(); int tag = value.getTag() & 0xff; vector.tags[row] = tag; setColumn(children.get(tag), vector.fields[tag], row, value.getObject()); }
static void setMapValue(TypeDescription schema, MapColumnVector vector, int row, OrcMap<?,?> value) { TypeDescription keyType = schema.getChildren().get(0); TypeDescription valueType = schema.getChildren().get(1); vector.offsets[row] = vector.childCount; vector.lengths[row] = value.size(); vector.childCount += vector.lengths[row]; vector.keys.ensureSize(vector.childCount, vector.offsets[row] != 0); vector.values.ensureSize(vector.childCount, vector.offsets[row] != 0); int e = 0; for(Map.Entry<?,?> entry: value.entrySet()) { setColumn(keyType, vector.keys, (int) vector.offsets[row] + e, (Writable) entry.getKey()); setColumn(valueType, vector.values, (int) vector.offsets[row] + e, (Writable) entry.getValue()); e += 1; } }
@Override public void write(NullWritable nullWritable, V v) throws IOException { // if the batch is full, write it out. if (batch.size == batch.getMaxSize()) { writer.addRowBatch(batch); batch.reset(); } // add the new row int row = batch.size++; // skip over the OrcKey or OrcValue if (v instanceof OrcKey) { v = (V)((OrcKey) v).key; } else if (v instanceof OrcValue) { v = (V)((OrcValue) v).value; } if (isTopStruct) { for(int f=0; f < schema.getChildren().size(); ++f) { setColumn(schema.getChildren().get(f), batch.cols[f], row, ((OrcStruct) v).getFieldValue(f)); } } else { setColumn(schema, batch.cols[0], row, v); } }
static void setListValue(TypeDescription schema, ListColumnVector vector, int row, OrcList value) { TypeDescription elemType = schema.getChildren().get(0); vector.offsets[row] = vector.childCount; vector.lengths[row] = value.size(); vector.childCount += vector.lengths[row]; vector.child.ensureSize(vector.childCount, vector.offsets[row] != 0); for(int e=0; e < vector.lengths[row]; ++e) { setColumn(elemType, vector.child, (int) vector.offsets[row] + e, (Writable) value.get(e)); } }
@Override public void write(NullWritable nullWritable, V v) throws IOException { // if the batch is full, write it out. if (batch.size == batch.getMaxSize()) { writer.addRowBatch(batch); batch.reset(); } // add the new row int row = batch.size++; // skip over the OrcKey or OrcValue if (v instanceof OrcKey) { v = (V)((OrcKey) v).key; } else if (v instanceof OrcValue) { v = (V)((OrcValue) v).value; } if (isTopStruct) { for(int f=0; f < schema.getChildren().size(); ++f) { OrcMapredRecordWriter.setColumn(schema.getChildren().get(f), batch.cols[f], row, ((OrcStruct) v).getFieldValue(f)); } } else { OrcMapredRecordWriter.setColumn(schema, batch.cols[0], row, v); } }