private ArrayList<SampleRec> dumpBucket(Path orcFile) throws IOException { org.apache.hadoop.fs.FileSystem fs = org.apache.hadoop.fs.FileSystem.getLocal(new Configuration()); Reader reader = OrcFile.createReader(orcFile, OrcFile.readerOptions(conf).filesystem(fs)); RecordReader rows = reader.rows(); StructObjectInspector inspector = (StructObjectInspector) reader .getObjectInspector(); System.out.format("Found Bucket File : %s \n", orcFile.getName()); ArrayList<SampleRec> result = new ArrayList<SampleRec>(); while (rows.hasNext()) { Object row = rows.next(null); SampleRec rec = (SampleRec) deserializeDeltaFileRow(row, inspector)[5]; result.add(rec); } return result; }
private static void assertFileContentsOrcHive( Type type, TempFile tempFile, Iterable<?> expectedValues) throws Exception { JobConf configuration = new JobConf(new Configuration(false)); configuration.set(READ_COLUMN_IDS_CONF_STR, "0"); configuration.setBoolean(READ_ALL_COLUMNS, false); Reader reader = OrcFile.createReader( new Path(tempFile.getFile().getAbsolutePath()), new ReaderOptions(configuration)); org.apache.hadoop.hive.ql.io.orc.RecordReader recordReader = reader.rows(); StructObjectInspector rowInspector = (StructObjectInspector) reader.getObjectInspector(); StructField field = rowInspector.getStructFieldRef("test"); Iterator<?> iterator = expectedValues.iterator(); Object rowData = null; while (recordReader.hasNext()) { rowData = recordReader.next(rowData); Object expectedValue = iterator.next(); Object actualValue = rowInspector.getStructFieldData(rowData, field); actualValue = decodeRecordReaderValue(type, actualValue); assertColumnValueEquals(type, actualValue, expectedValue); } assertFalse(iterator.hasNext()); }
private ArrayList<SampleRec> dumpBucket(Path orcFile) throws IOException { org.apache.hadoop.fs.FileSystem fs = org.apache.hadoop.fs.FileSystem.getLocal(new Configuration()); Reader reader = OrcFile.createReader(orcFile, OrcFile.readerOptions(conf).filesystem(fs)); RecordReader rows = reader.rows(); StructObjectInspector inspector = (StructObjectInspector) reader .getObjectInspector(); System.out.format("Found Bucket File : %s \n", orcFile.getName()); ArrayList<SampleRec> result = new ArrayList<SampleRec>(); while (rows.hasNext()) { Object row = rows.next(null); SampleRec rec = (SampleRec) deserializeDeltaFileRow(row, inspector)[5]; result.add(rec); } return result; }
ObjectInspector orcOi = reader.getObjectInspector(); ObjectInspector stoi = TypeInfoUtils .getStandardJavaObjectInspectorFromTypeInfo(OrcTestReducer3.typeInfo);
.version(reader.getFileVersion()) .rowIndexStride(reader.getRowIndexStride()) .inspector(reader.getObjectInspector()); rr.seekToRow(lastRow); OrcStruct row = (OrcStruct) rr.next(null); StructObjectInspector soi = (StructObjectInspector) reader.getObjectInspector();
assertEquals(reader.getCompression(), CompressionKind.ZLIB); StructObjectInspector soi = (StructObjectInspector)reader.getObjectInspector(); StructTypeInfo ti = (StructTypeInfo)TypeInfoUtils.getTypeInfoFromObjectInspector(soi);
.version(fileVersion) .rowIndexStride(rowIndexStride) .inspector(reader.getObjectInspector());
(StructObjectInspector) reader.getObjectInspector(); assertEquals(ObjectInspector.Category.STRUCT, readerInspector.getCategory());
.getObjectInspector(); assertEquals(ObjectInspector.Category.STRUCT, readerInspector.getCategory()); assertEquals("struct<boolean1:boolean,byte1:tinyint,short1:smallint,"
.version(version) .rowIndexStride(rowIndexStride) .inspector(reader.getObjectInspector());
(StructObjectInspector) reader.getObjectInspector(); assertEquals(ObjectInspector.Category.STRUCT, readerInspector.getCategory());
(StructObjectInspector) reader.getObjectInspector(); assertEquals(ObjectInspector.Category.STRUCT, readerInspector.getCategory());
OrcFile.readerOptions(conf).filesystem(fs)); StructObjectInspector readerInspector = (StructObjectInspector) reader.getObjectInspector(); List<? extends StructField> fields = readerInspector.getAllStructFieldRefs(); HiveDecimalObjectInspector doi = (HiveDecimalObjectInspector) readerInspector.
row = (OrcStruct) rows.next(null); assertEquals(1, rows.getRowNumber()); inspector = reader.getObjectInspector(); assertEquals("struct<time:timestamp,union:uniontype<int,string>,decimal:decimal(38,18)>", inspector.getTypeName());
OrcFile.readerOptions(conf).filesystem(fs)); StructObjectInspector readerInspector = (StructObjectInspector) reader.getObjectInspector(); List<? extends StructField> fields = readerInspector.getAllStructFieldRefs(); HiveDecimalObjectInspector doi = (HiveDecimalObjectInspector) readerInspector.
private TypeInfo getTypeInfoFromLocation(String location, Job job) throws IOException { FileSystem fs = FileSystem.get(new Path(location).toUri(), job.getConfiguration()); Path path = getFirstFile(location, fs, new NonEmptyOrcFileFilter(fs)); if (path == null) { log.info("Cannot find any ORC files from " + location + ". Probably multiple load store in script."); return null; } Reader reader = OrcFile.createReader(fs, path); ObjectInspector oip = (ObjectInspector)reader.getObjectInspector(); return TypeInfoUtils.getTypeInfoFromObjectInspector(oip); }
private int getAllColumnsCount(String filePath) { int columnsCount; final String colFinal = "_col"; Path path = new Path(filePath); try { Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(hadoopConf)); String type_struct = reader.getObjectInspector().getTypeName(); columnsCount = (type_struct.length() - type_struct.replace(colFinal, "").length()) / colFinal.length(); return columnsCount; } catch (IOException e) { String message = "读取orcfile column列数失败,请联系系统管理员"; throw DataXException.asDataXException(HdfsReaderErrorCode.READ_FILE_ERROR, message); } }
public SparkOrcNewRecordReader(Reader file, Configuration conf, long offset, long length) throws IOException { List<OrcProto.Type> types = file.getTypes(); numColumns = (types.size() == 0) ? 0 : types.get(0).getSubtypesCount(); value = new OrcStruct(numColumns); this.reader = OrcInputFormat.createReaderFromFile(file, conf, offset, length); this.objectInspector = file.getObjectInspector(); }
public SparkOrcNewRecordReader(Reader file, Configuration conf, long offset, long length) throws IOException { List<OrcProto.Type> types = file.getTypes(); numColumns = (types.size() == 0) ? 0 : types.get(0).getSubtypesCount(); value = new OrcStruct(numColumns); this.reader = OrcInputFormat.createReaderFromFile(file, conf, offset, length); this.objectInspector = file.getObjectInspector(); }
@Override public boolean accept(Path path) { Reader reader; try { reader = OrcFile.createReader(fs, path); ObjectInspector oip = (ObjectInspector)reader.getObjectInspector(); ResourceFieldSchema rs = HiveUtils.getResourceFieldSchema(TypeInfoUtils.getTypeInfoFromObjectInspector(oip)); if (rs.getSchema().getFields().length!=0) { return true; } } catch (IOException e) { throw new RuntimeException(e); } return false; } }