@Override @SuppressWarnings("deprecation") public Serializer createSerializer() { return new OrcSerde(); } },
@Override @SuppressWarnings("deprecation") public Serializer createSerializer() { return new OrcSerde(); } },
private static StructObjectInspector makeObjectInspector(final String typeString) { final OrcSerde serde = new OrcSerde(); TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeString); Preconditions.checkArgument( typeInfo instanceof StructTypeInfo, StringUtils.format("typeString should be struct type but not [%s]", typeString) ); Properties table = getTablePropertiesFromStructTypeInfo((StructTypeInfo) typeInfo); serde.initialize(new Configuration(), table); try { return (StructObjectInspector) serde.getObjectInspector(); } catch (SerDeException e) { throw new RuntimeException(e); } }
public TestPreparer(String tempFilePath, List<TestColumn> testColumns, int numRows, int stripeRows) throws Exception OrcSerde serde = new OrcSerde(); schema = new Properties(); schema.setProperty("columns",
@SuppressWarnings("deprecation") Serializer serde = new OrcSerde(); SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", VARCHAR); Object row = objectInspector.create();
/** * Write a file that contains a given number of maps where each row has 10 entries in total * and some entries have null keys/values. */ private static TempFile createSingleColumnMapFileWithNullValues(Type mapType, int rows) throws IOException, ReflectiveOperationException, SerDeException { Serializer serde = new OrcSerde(); TempFile tempFile = new TempFile(); FileSinkOperator.RecordWriter writer = createOrcRecordWriter(tempFile.getFile(), ORC_12, CompressionKind.NONE, mapType); SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", mapType); Object row = objectInspector.create(); StructField field = objectInspector.getAllStructFieldRefs().get(0); for (int i = 1; i <= rows; i++) { HashMap<Long, Long> map = new HashMap<>(); for (int j = 1; j <= 8; j++) { Long value = (long) j; map.put(value, value); } // Add null values so that the StreamReader nullVectors are not empty. map.put(null, 0L); map.put(0L, null); objectInspector.setStructFieldData(row, field, map); Writable record = serde.serialize(row, objectInspector); writer.write(record); } writer.close(false); return tempFile; }
private static void createSequentialFile(File file, int count) throws IOException, SerDeException { FileSinkOperator.RecordWriter writer = createOrcRecordWriter(file, ORC_12, CompressionKind.NONE, BIGINT); @SuppressWarnings("deprecation") Serializer serde = new OrcSerde(); SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", BIGINT); Object row = objectInspector.create(); StructField field = objectInspector.getAllStructFieldRefs().get(0); for (int i = 0; i < count; i++) { objectInspector.setStructFieldData(row, field, (long) i); Writable record = serde.serialize(row, objectInspector); writer.write(record); } writer.close(false); }
@Test public void testEmptyFile() throws Exception { Properties properties = new Properties(); properties.setProperty("columns", "x,y"); properties.setProperty("columns.types", "int:int"); HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat(); org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter writer = outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true, properties, Reporter.NULL); writer.close(true); AbstractSerDe serde = new OrcSerde(); SerDeUtils.initializeSerDe(serde, conf, properties, null); InputFormat<?,?> in = new OrcInputFormat(); FileInputFormat.setInputPaths(conf, testFilePath.toString()); InputSplit[] splits = in.getSplits(conf, 1); assertTrue(0 == splits.length); assertEquals(null, serde.getSerDeStats()); }
/** * Write a file that contains a number of rows with 1 VARCHAR column, and all values are not null. */ private static TempFile createSingleColumnVarcharFile(int count, int length) throws Exception { Serializer serde = new OrcSerde(); TempFile tempFile = new TempFile(); FileSinkOperator.RecordWriter writer = createOrcRecordWriter(tempFile.getFile(), ORC_12, CompressionKind.NONE, VARCHAR); SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", VARCHAR); Object row = objectInspector.create(); StructField field = objectInspector.getAllStructFieldRefs().get(0); for (int i = 0; i < count; i++) { objectInspector.setStructFieldData(row, field, Strings.repeat("0", length)); Writable record = serde.serialize(row, objectInspector); writer.write(record); } writer.close(false); return tempFile; }
private static void createMultiStripeFile(File file) throws IOException, ReflectiveOperationException, SerDeException { FileSinkOperator.RecordWriter writer = createOrcRecordWriter(file, ORC_12, CompressionKind.NONE, BIGINT); @SuppressWarnings("deprecation") Serializer serde = new OrcSerde(); SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", BIGINT); Object row = objectInspector.create(); StructField field = objectInspector.getAllStructFieldRefs().get(0); for (int i = 0; i < 300; i += 3) { if ((i > 0) && (i % 60 == 0)) { flushWriter(writer); } objectInspector.setStructFieldData(row, field, (long) i); Writable record = serde.serialize(row, objectInspector); writer.write(record); } writer.close(false); }
@Test(expected = RuntimeException.class) public void testSplitGenFailure() throws IOException { Properties properties = new Properties(); HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat(); org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter writer = outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true, properties, Reporter.NULL); writer.write(new OrcSerde().serialize(null,null)); writer.close(true); InputFormat<?,?> in = new OrcInputFormat(); fs.setPermission(testFilePath, FsPermission.createImmutable((short) 0333)); FileInputFormat.setInputPaths(conf, testFilePath.toString()); try { in.getSplits(conf, 1); } catch (RuntimeException e) { assertEquals(true, e.getMessage().contains("Permission denied")); throw e; } }
Properties properties = new Properties(); StructObjectInspector inspector = createSoi(); AbstractSerDe serde = new OrcSerde(); OutputFormat<?, ?> outFormat = new OrcOutputFormat(); conf.setInt("mapred.max.split.size", 50); serde.serialize(new SimpleRow(null), inspector)); writer.close(Reporter.NULL); serde = new OrcSerde(); SearchArgument sarg = SearchArgumentFactory.newBuilder()
ObjectInspectorFactory.ObjectInspectorOptions.JAVA); AbstractSerDe serde = new OrcSerde(); OutputFormat<?, ?> outFormat = new OrcOutputFormat(); conf.setInt("mapred.max.split.size", 50); serde.serialize(new NestedRow(7,8,9), inspector)); writer.close(Reporter.NULL); serde = new OrcSerde(); SearchArgument sarg = SearchArgumentFactory.newBuilder()
/** * Write a file that contains a number of rows with 1 BIGINT column, and some rows have null values. */ private static TempFile createSingleColumnFileWithNullValues(int rows) throws IOException, ReflectiveOperationException, SerDeException { Serializer serde = new OrcSerde(); TempFile tempFile = new TempFile(); FileSinkOperator.RecordWriter writer = createOrcRecordWriter(tempFile.getFile(), ORC_12, CompressionKind.NONE, BIGINT); SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", BIGINT); Object row = objectInspector.create(); StructField field = objectInspector.getAllStructFieldRefs().get(0); for (int i = 0; i < rows; i++) { if (i % 10 == 0) { objectInspector.setStructFieldData(row, field, null); } else { objectInspector.setStructFieldData(row, field, (long) i); } Writable record = serde.serialize(row, objectInspector); writer.write(record); } writer.close(false); return tempFile; }
ObjectInspectorFactory.ObjectInspectorOptions.JAVA); AbstractSerDe serde = new OrcSerde(); OutputFormat<?, ?> outFormat = new OrcOutputFormat(); RecordWriter writer = serde.serialize(new NestedRow(7,8,9), inspector)); writer.close(Reporter.NULL); serde = new OrcSerde(); properties.setProperty("columns", "z,r"); properties.setProperty("columns.types", "int:struct<x:int,y:int>");
ObjectInspectorFactory.ObjectInspectorOptions.JAVA); AbstractSerDe serde = new OrcSerde(); HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat(); org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter writer = writer.write(serde.serialize(new MyRow(3,2), inspector)); writer.close(true); serde = new OrcSerde(); SerDeUtils.initializeSerDe(serde, conf, properties, null); assertEquals(OrcSerde.OrcSerdeRow.class, serde.getSerializedClass());
ObjectInspectorFactory.ObjectInspectorOptions.JAVA); AbstractSerDe serde = new OrcSerde(); HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat(); org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter writer = writer.write(serde.serialize(new StringRow("miles"), inspector)); writer.close(true); serde = new OrcSerde(); SerDeUtils.initializeSerDe(serde, conf, properties, null); inspector = (StructObjectInspector) serde.getObjectInspector();
conf.setLong(HiveConf.ConfVars.MAPREDMAXSPLITSIZE.varname, newMaxSplitSize); AbstractSerDe serde = new OrcSerde(); HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat(); org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter writer = serde = new OrcSerde(); SerDeUtils.initializeSerDe(serde, conf, properties, null); assertEquals(OrcSerde.OrcSerdeRow.class, serde.getSerializedClass());
@Override public Serializer createSerializer() { return new OrcSerde(); } },
public OrcFileWriter(Configuration conf, Path path, PType<T> pType) throws IOException { JobConf jobConf = new JobConf(conf); OutputFormat outputFormat = new OrcOutputFormat(); writer = outputFormat.getRecordWriter(null, jobConf, path.toString(), new NullProgress()); mapFn = pType.getOutputMapFn(); mapFn.initialize(); serde = new OrcSerde(); }