@Override @SuppressWarnings("deprecation") public Serializer createSerializer() { return new com.facebook.hive.orc.OrcSerde(); } };
private static RecordWriter createDwrfRecordWriter(File outputFile, CompressionKind compressionCodec, Type type) throws IOException { JobConf jobConf = new JobConf(); jobConf.set("hive.exec.orc.default.compress", compressionCodec.name()); jobConf.set("hive.exec.orc.compress", compressionCodec.name()); OrcConf.setIntVar(jobConf, OrcConf.ConfVars.HIVE_ORC_ENTROPY_STRING_THRESHOLD, 1); OrcConf.setIntVar(jobConf, OrcConf.ConfVars.HIVE_ORC_DICTIONARY_ENCODING_INTERVAL, 2); OrcConf.setBoolVar(jobConf, OrcConf.ConfVars.HIVE_ORC_BUILD_STRIDE_DICTIONARY, true); return new com.facebook.hive.orc.OrcOutputFormat().getHiveRecordWriter( jobConf, new Path(outputFile.toURI()), Text.class, compressionCodec != NONE, createTableProperties("test", getJavaObjectInspector(type).getTypeName()), () -> {}); }
private static void assertFileContentsDwrfHive( Type type, TempFile tempFile, Iterable<?> expectedValues) throws Exception { JobConf configuration = new JobConf(new Configuration(false)); configuration.set(READ_COLUMN_IDS_CONF_STR, "0"); configuration.setBoolean(READ_ALL_COLUMNS, false); Path path = new Path(tempFile.getFile().getAbsolutePath()); com.facebook.hive.orc.Reader reader = com.facebook.hive.orc.OrcFile.createReader( path.getFileSystem(configuration), path, configuration); boolean[] include = new boolean[reader.getTypes().size() + 100000]; Arrays.fill(include, true); com.facebook.hive.orc.RecordReader recordReader = reader.rows(include); StructObjectInspector rowInspector = (StructObjectInspector) reader.getObjectInspector(); StructField field = rowInspector.getStructFieldRef("test"); Iterator<?> iterator = expectedValues.iterator(); Object rowData = null; while (recordReader.hasNext()) { rowData = recordReader.next(rowData); Object expectedValue = iterator.next(); Object actualValue = rowInspector.getStructFieldData(rowData, field); actualValue = decodeRecordReaderValue(type, actualValue); assertColumnValueEquals(type, actualValue, expectedValue); } assertFalse(iterator.hasNext()); }
actualValue = ((OrcLazyObject) actualValue).materialize(); List<Object> fields = new ArrayList<>(); com.facebook.hive.orc.OrcStruct structObject = (com.facebook.hive.orc.OrcStruct) actualValue; for (int fieldId = 0; fieldId < structObject.getNumFields(); fieldId++) { fields.add(structObject.getFieldValue(fieldId));
private static DoubleStatistics toDoubleStatistics(OrcProto.DoubleStatistics doubleStatistics) { if (!doubleStatistics.hasMinimum() && !doubleStatistics.hasMaximum()) { return null; } // if either min, max, or sum is NaN, ignore the stat if ((doubleStatistics.hasMinimum() && Double.isNaN(doubleStatistics.getMinimum())) || (doubleStatistics.hasMaximum() && Double.isNaN(doubleStatistics.getMaximum())) || (doubleStatistics.hasSum() && Double.isNaN(doubleStatistics.getSum()))) { return null; } return new DoubleStatistics( doubleStatistics.hasMinimum() ? doubleStatistics.getMinimum() : null, doubleStatistics.hasMaximum() ? doubleStatistics.getMaximum() : null); }
@Override public Footer readFooter(InputStream inputStream) throws IOException { CodedInputStream input = CodedInputStream.newInstance(inputStream); OrcProto.Footer footer = OrcProto.Footer.parseFrom(input); return new Footer( footer.getNumberOfRows(), footer.getRowIndexStride(), toStripeInformation(footer.getStripesList()), toType(footer.getTypesList()), toColumnStatistics(footer.getStatisticsList(), false)); }
private static StripeInformation toStripeInformation(OrcProto.StripeInformation stripeInformation) { return new StripeInformation( Ints.checkedCast(stripeInformation.getNumberOfRows()), stripeInformation.getOffset(), stripeInformation.getIndexLength(), stripeInformation.getDataLength(), stripeInformation.getFooterLength()); }
private static ColumnStatistics toColumnStatistics(OrcProto.ColumnStatistics statistics, boolean isRowGroup) { return new ColumnStatistics( statistics.getNumberOfValues(), toBooleanStatistics(statistics.getBucketStatistics()), toIntegerStatistics(statistics.getIntStatistics()), toDoubleStatistics(statistics.getDoubleStatistics()), toStringStatistics(statistics.getStringStatistics(), isRowGroup), null); }
private static IntegerStatistics toIntegerStatistics(OrcProto.IntegerStatistics integerStatistics) { if (!integerStatistics.hasMinimum() && !integerStatistics.hasMaximum()) { return null; } return new IntegerStatistics( integerStatistics.hasMinimum() ? integerStatistics.getMinimum() : null, integerStatistics.hasMaximum() ? integerStatistics.getMaximum() : null); }
private static StringStatistics toStringStatistics(OrcProto.StringStatistics stringStatistics, boolean isRowGroup) { // TODO remove this when string statistics in ORC are fixed https://issues.apache.org/jira/browse/HIVE-8732 if (!isRowGroup) { return null; } if (!stringStatistics.hasMinimum() && !stringStatistics.hasMaximum()) { return null; } Slice minimum = stringStatistics.hasMinimum() ? getMinSlice(stringStatistics.getMinimum()) : null; Slice maximum = stringStatistics.hasMaximum() ? getMaxSlice(stringStatistics.getMaximum()) : null; return new StringStatistics(minimum, maximum); }
private static Stream toStream(OrcProto.Stream stream) { return new Stream(stream.getColumn(), toStreamKind(stream.getKind()), Ints.checkedCast(stream.getLength()), stream.getUseVInts()); }
private static OrcType toType(OrcProto.Type type) { return new OrcType(toTypeKind(type.getKind()), type.getSubtypesList(), type.getFieldNamesList()); }
@Override public StripeFooter readStripeFooter(List<OrcType> types, InputStream inputStream) throws IOException { CodedInputStream input = CodedInputStream.newInstance(inputStream); OrcProto.StripeFooter stripeFooter = OrcProto.StripeFooter.parseFrom(input); return new StripeFooter(toStream(stripeFooter.getStreamsList()), toColumnEncoding(types, stripeFooter.getColumnsList())); }
private static BooleanStatistics toBooleanStatistics(OrcProto.BucketStatistics bucketStatistics) { if (bucketStatistics.getCountCount() == 0) { return null; } return new BooleanStatistics(bucketStatistics.getCount(0)); }
@Override public List<RowGroupIndex> readRowIndexes(InputStream inputStream) throws IOException { CodedInputStream input = CodedInputStream.newInstance(inputStream); OrcProto.RowIndex rowIndex = OrcProto.RowIndex.parseFrom(input); return ImmutableList.copyOf(Iterables.transform(rowIndex.getEntryList(), DwrfMetadataReader::toRowGroupIndex)); }
private static ColumnEncoding toColumnEncoding(OrcTypeKind type, OrcProto.ColumnEncoding columnEncoding) { return new ColumnEncoding(toColumnEncodingKind(type, columnEncoding.getKind()), columnEncoding.getDictionarySize()); }
private static RecordWriter createDwrfRecordWriter(File outputFile, CompressionKind compressionCodec, Type type) throws IOException { JobConf jobConf = new JobConf(); com.facebook.hive.orc.OrcConf.setVar(jobConf, HIVE_ORC_COMPRESSION, compressionCodec.name()); com.facebook.hive.orc.OrcConf.setIntVar(jobConf, HIVE_ORC_ENTROPY_STRING_THRESHOLD, 1); com.facebook.hive.orc.OrcConf.setIntVar(jobConf, HIVE_ORC_DICTIONARY_ENCODING_INTERVAL, 2); com.facebook.hive.orc.OrcConf.setBoolVar(jobConf, HIVE_ORC_BUILD_STRIDE_DICTIONARY, true); return new com.facebook.hive.orc.OrcOutputFormat().getHiveRecordWriter( jobConf, new Path(outputFile.toURI()), Text.class, compressionCodec != NONE, createTableProperties("test", getJavaObjectInspector(type).getTypeName()), () -> {}); }
@Override public Serializer createSerializer() { return new com.facebook.hive.orc.OrcSerde(); } };
@Override @SuppressWarnings("deprecation") public Serializer createSerializer() { return new com.facebook.hive.orc.OrcSerde(); } };
@Override public Serializer createSerializer() { return new com.facebook.hive.orc.OrcSerde(); } };