assertEquals(8, reader.getRawDataSizeOfColumns(Lists.newArrayList("boolean1"))); assertEquals(8, reader.getRawDataSizeOfColumns(Lists.newArrayList("byte1"))); assertEquals(8, reader.getRawDataSizeOfColumns(Lists.newArrayList("short1"))); assertEquals(8, reader.getRawDataSizeOfColumns(Lists.newArrayList("int1"))); assertEquals(16, reader.getRawDataSizeOfColumns(Lists.newArrayList("long1"))); assertEquals(8, reader.getRawDataSizeOfColumns(Lists.newArrayList("float1"))); assertEquals(16, reader.getRawDataSizeOfColumns(Lists.newArrayList("double1"))); assertEquals(5, reader.getRawDataSizeOfColumns(Lists.newArrayList("bytes1"))); assertEquals(172, reader.getRawDataSizeOfColumns(Lists.newArrayList("string1"))); assertEquals(455, reader.getRawDataSizeOfColumns(Lists.newArrayList("list"))); assertEquals(368, reader.getRawDataSizeOfColumns(Lists.newArrayList("map"))); assertEquals(364, reader.getRawDataSizeOfColumns(Lists.newArrayList("middle"))); assertEquals(8, reader.getRawDataSizeOfColumns(Lists.newArrayList("ts"))); assertEquals(224, reader.getRawDataSizeOfColumns(Lists.newArrayList("decimal1"))); assertEquals(16, reader.getRawDataSizeOfColumns(Lists.newArrayList("ts", "int1"))); assertEquals(1195, reader.getRawDataSizeOfColumns(Lists.newArrayList("middle", "list", "map", "float1"))); assertEquals(185, reader.getRawDataSizeOfColumns(Lists.newArrayList("bytes1", "byte1", "string1"))); assertEquals(rawDataSize, reader.getRawDataSizeOfColumns(Lists.newArrayList("boolean1", "byte1", "short1", "int1", "long1", "float1", "double1", "bytes1", "string1", "list", "map", "middle", "ts", "decimal1")));
assertEquals(8, reader.getRawDataSizeOfColumns(Lists.newArrayList("boolean1"))); assertEquals(8, reader.getRawDataSizeOfColumns(Lists.newArrayList("byte1"))); assertEquals(8, reader.getRawDataSizeOfColumns(Lists.newArrayList("short1"))); assertEquals(8, reader.getRawDataSizeOfColumns(Lists.newArrayList("int1"))); assertEquals(16, reader.getRawDataSizeOfColumns(Lists.newArrayList("long1"))); assertEquals(8, reader.getRawDataSizeOfColumns(Lists.newArrayList("float1"))); assertEquals(16, reader.getRawDataSizeOfColumns(Lists.newArrayList("double1"))); assertEquals(5, reader.getRawDataSizeOfColumns(Lists.newArrayList("bytes1"))); assertEquals(172, reader.getRawDataSizeOfColumns(Lists.newArrayList("string1"))); assertEquals(455, reader.getRawDataSizeOfColumns(Lists.newArrayList("list"))); assertEquals(368, reader.getRawDataSizeOfColumns(Lists.newArrayList("map"))); assertEquals(364, reader.getRawDataSizeOfColumns(Lists.newArrayList("middle"))); assertEquals(8, reader.getRawDataSizeOfColumns(Lists.newArrayList("ts"))); assertEquals(224, reader.getRawDataSizeOfColumns(Lists.newArrayList("decimal1"))); assertEquals(16, reader.getRawDataSizeOfColumns(Lists.newArrayList("ts", "int1"))); assertEquals(1195, reader.getRawDataSizeOfColumns(Lists.newArrayList("middle", "list", "map", "float1"))); assertEquals(185, reader.getRawDataSizeOfColumns(Lists.newArrayList("bytes1", "byte1", "string1"))); assertEquals(rawDataSize, reader.getRawDataSizeOfColumns(Lists.newArrayList("boolean1", "byte1", "short1", "int1", "long1", "float1", "double1", "bytes1", "string1", "list", "map", "middle", "ts", "decimal1")));
assertEquals(1500, reader.getRawDataSizeOfColumns(Lists.newArrayList("bytes1"))); assertEquals(43000, reader.getRawDataSizeOfColumns(Lists.newArrayList("string1"))); assertEquals(44500, reader.getRawDataSizeOfColumns(Lists.newArrayList("bytes1", "string1")));
@Test public void testOrcSerDeStatsMap() throws Exception { ObjectInspector inspector; synchronized (TestOrcSerDeStats.class) { inspector = ObjectInspectorFactory.getReflectionObjectInspector (MapStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); } Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf) .inspector(inspector) .stripeSize(10000) .bufferSize(10000)); for (int row = 0; row < 1000; row++) { Map<String, Double> test = new HashMap<String, Double>(); for (int i = 0; i < 10; i++) { test.put("hi" + i, 2.0); } writer.addRow(new MapStruct(test)); } writer.close(); // stats from writer assertEquals(1000, writer.getNumberOfRows()); assertEquals(950000, writer.getRawDataSize()); Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs)); // stats from reader assertEquals(1000, reader.getNumberOfRows()); assertEquals(950000, reader.getRawDataSize()); assertEquals(950000, reader.getRawDataSizeOfColumns(Lists.newArrayList("map1"))); }
@Test public void testOrcSerDeStatsList() throws Exception { ObjectInspector inspector; synchronized (TestOrcSerDeStats.class) { inspector = ObjectInspectorFactory.getReflectionObjectInspector (ListStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); } Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf) .inspector(inspector) .stripeSize(10000) .bufferSize(10000)); for (int row = 0; row < 5000; row++) { List<String> test = new ArrayList<String>(); for (int i = 0; i < 1000; i++) { test.add("hi"); } writer.addRow(new ListStruct(test)); } writer.close(); assertEquals(5000, writer.getNumberOfRows()); assertEquals(430000000, writer.getRawDataSize()); Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs)); // stats from reader assertEquals(5000, reader.getNumberOfRows()); assertEquals(430000000, reader.getRawDataSize()); assertEquals(430000000, reader.getRawDataSizeOfColumns(Lists.newArrayList("list1"))); }
assertEquals(4, reader.getNumberOfRows()); assertEquals(273, reader.getRawDataSize()); assertEquals(15, reader.getRawDataSizeOfColumns(Lists.newArrayList("bytes1"))); assertEquals(258, reader.getRawDataSizeOfColumns(Lists.newArrayList("string1"))); assertEquals(273, reader.getRawDataSizeOfColumns(Lists.newArrayList("bytes1", "string1")));
OrcFile.readerOptions(context.conf).filesystem(fs)); List<String> projCols = ColumnProjectionUtils.getReadColumnNames(context.conf); projColsUncompressedSize = orcReader.getRawDataSizeOfColumns(projCols); if (fileInfo != null) { stripes = fileInfo.stripeInfos;