@Test public void testDeflateCompressedAvroImport() throws IOException { this.setCurTableName("Deflate_Compressed_Avro_Import_Test_1"); avroImportTestHelper(new String[] {"--compression-codec", "org.apache.hadoop.io.compress.DefaultCodec", }, "deflate"); }
@Test public void testSpecialCharactersInColumnMappingWithConvertion() throws IOException, SQLException { // escaping enabled by default String [] extraArgsEscapeColNamesWithMapping = { "--map-column-java", "INTFIELD1=String,DATA_#_COL0=String,DATA#COL1=String,DATA___COL2=String"}; // disable escaping String [] extraArgsEscapingDisables = {"--escape-mapping-column-names", "false"}; // escaping enabled but mapping not provided String [] extraArgsEscapingWithoutMapping = {}; checkRecordWithExtraArgs(extraArgsEscapeColNamesWithMapping, "TABLE1"); checkRecordWithExtraArgs(extraArgsEscapingDisables, "TABLE2"); checkRecordWithExtraArgs(extraArgsEscapingWithoutMapping, "TABLE3"); }
@Test public void testNullableAvroImport() throws IOException, SQLException { String [] types = { "INT" }; String [] vals = { null }; createTableWithColTypes(types, vals); runImport(getOutputArgv(true, null)); Path outputFile = new Path(getTablePath(), "part-m-00000.avro"); DataFileReader<GenericRecord> reader = read(outputFile); GenericRecord record1 = reader.next(); assertNull(record1.get("DATA_COL0")); }
private void checkRecordWithExtraArgs(String[] extraArgs, String tableName) throws IOException { String date = "2017-01-19"; String timeStamp = "2017-01-19 14:47:57.112000"; String [] names = {"INTFIELD1", "DATA_#_COL0", "DATA#COL1", "DATA___COL2"}; String [] types = { "INT", "DATE", "TIMESTAMP", "DECIMAL(2,20)" }; String [] vals = {"1", "{ts \'" + date + "\'}", "{ts \'" + timeStamp + "\'}", "2e20"}; String [] checkNames = {"INTFIELD1", "DATA___COL0", "DATA_COL1", "DATA___COL2"}; setCurTableName(tableName); createTableWithColTypesAndNames(names, types, vals); runImport(getOutputArgv(true, extraArgs)); Path outputFile = new Path(getTablePath(), "part-m-00000.avro"); DataFileReader<GenericRecord> reader = read(outputFile); GenericRecord record = reader.next(); for (String columnName : checkNames) { assertNotNull(record.get(columnName)); } removeTableDir(); }
@Test public void testNonIdentCharactersInColumnName() throws IOException { String [] names = { "test_a-v+r/o" }; String [] types = { "INT" }; String [] vals = { "2015" }; createTableWithColTypesAndNames(names, types, vals); runImport(getOutputArgv(true, null)); Path outputFile = new Path(getTablePath(), "part-m-00000.avro"); DataFileReader<GenericRecord> reader = read(outputFile); Schema schema = reader.getSchema(); assertEquals(Schema.Type.RECORD, schema.getType()); List<Field> fields = schema.getFields(); assertEquals(types.length, fields.size()); checkField(fields.get(0), "TEST_A_V_R_O", Type.INT); GenericRecord record1 = reader.next(); assertEquals("TEST_A_V_R_O", 2015, record1.get("TEST_A_V_R_O")); }
/** * Create the argv to pass to Sqoop. * * @return the argv as an array of strings. */ protected String[] getOutputArgv(boolean includeHadoopFlags, String[] extraArgs) { ArrayList<String> args = new ArrayList<String>(); if (includeHadoopFlags) { CommonArgs.addHadoopFlags(args); } args.add("-m"); args.add("1"); args.add("--table"); args.add(getTableName()); args.add("--connect"); args.add(HsqldbTestServer.getUrl()); args.add("--warehouse-dir"); args.add(getWarehouseDir()); args.add("--split-by"); args.add("INTFIELD1"); args.add("--as-avrodatafile"); if (extraArgs != null) { args.addAll(Arrays.asList(extraArgs)); } return args.toArray(new String[0]); }
@Test public void testNonstandardCharactersInColumnName() throws IOException { String [] names = { "avro\uC3A11" }; String [] types = { "INT" }; String [] vals = { "1987" }; this.setCurTableName("Non_Std_Character_Test"); createTableWithColTypesAndNames(names, types, vals); runImport(getOutputArgv(true, null)); Path outputFile = new Path(getTablePath(), "part-m-00000.avro"); DataFileReader<GenericRecord> reader = read(outputFile); Schema schema = reader.getSchema(); assertEquals(Schema.Type.RECORD, schema.getType()); List<Field> fields = schema.getFields(); assertEquals(types.length, fields.size()); checkField(fields.get(0), "AVRO\uC3A11", Type.INT); GenericRecord record1 = reader.next(); assertEquals("AVRO\uC3A11", 1987, record1.get("AVRO\uC3A11")); }
@Test public void testFirstUnderscoreInColumnName() throws IOException { String [] names = { "_NAME" }; String [] types = { "INT" }; String [] vals = { "1987" }; createTableWithColTypesAndNames(names, types, vals); runImport(getOutputArgv(true, null)); Path outputFile = new Path(getTablePath(), "part-m-00000.avro"); DataFileReader<GenericRecord> reader = read(outputFile); Schema schema = reader.getSchema(); assertEquals(Schema.Type.RECORD, schema.getType()); List<Field> fields = schema.getFields(); assertEquals(types.length, fields.size()); checkField(fields.get(0), "__NAME", Type.INT); GenericRecord record1 = reader.next(); assertEquals("__NAME", 1987, record1.get("__NAME")); }
@Test public void testOverrideTypeMapping() throws IOException { String [] types = { "INT" }; String [] vals = { "10" }; createTableWithColTypes(types, vals); String [] extraArgs = { "--map-column-java", "DATA_COL0=String"}; runImport(getOutputArgv(true, extraArgs)); Path outputFile = new Path(getTablePath(), "part-m-00000.avro"); DataFileReader<GenericRecord> reader = read(outputFile); Schema schema = reader.getSchema(); assertEquals(Schema.Type.RECORD, schema.getType()); List<Field> fields = schema.getFields(); assertEquals(types.length, fields.size()); checkField(fields.get(0), "DATA_COL0", Schema.Type.STRING); GenericRecord record1 = reader.next(); assertEquals("DATA_COL0", new Utf8("10"), record1.get("DATA_COL0")); }
@Test public void testAvroImport() throws IOException { this.setCurTableName("Avro_Import_Test"); avroImportTestHelper(null, null); }
@Test public void testNonstandardCharactersInMultipleColumns() throws IOException { String[] names = { "id$1", "id1$" }; String[] types = { "INT", "INT" }; String[] vals = { "1987", "1988" }; this.setCurTableName("Non_Std_Character_Test_For_Multiple_Columns"); createTableWithColTypesAndNames(names, types, vals); runImport(getOutputArgv(true, null)); Path outputFile = new Path(getTablePath(), "part-m-00000.avro"); DataFileReader<GenericRecord> reader = read(outputFile); Schema schema = reader.getSchema(); assertEquals(Schema.Type.RECORD, schema.getType()); List<Field> fields = schema.getFields(); assertEquals(types.length, fields.size()); checkField(fields.get(0), "ID_1", Type.INT); GenericRecord record1 = reader.next(); assertEquals("ID_1", 1987, record1.get("ID_1")); checkField(fields.get(1), "ID1_", Type.INT); assertEquals("ID1_", 1988, record1.get("ID1_")); }
"VARBINARY(2)", "DECIMAL(3,2)"}; String[] vals = {"true", "100", "200", "1.0", "2.0", "'s'", "'0102'", "'1.00'"}; createTableWithColTypes(types, vals); runImport(getOutputArgv(true, extraArgs)); Path outputFile = new Path(getTablePath(), "part-m-00000.avro"); DataFileReader<GenericRecord> reader = read(outputFile); Schema schema = reader.getSchema(); assertEquals(Schema.Type.RECORD, schema.getType()); assertEquals(types.length, fields.size()); checkField(fields.get(0), "DATA_COL0", Schema.Type.BOOLEAN); checkField(fields.get(1), "DATA_COL1", Schema.Type.INT); checkField(fields.get(2), "DATA_COL2", Schema.Type.LONG); checkField(fields.get(3), "DATA_COL3", Schema.Type.FLOAT); checkField(fields.get(4), "DATA_COL4", Schema.Type.DOUBLE); checkField(fields.get(5), "DATA_COL5", Schema.Type.STRING); checkField(fields.get(6), "DATA_COL6", Schema.Type.BYTES); checkField(fields.get(7), "DATA_COL7", Schema.Type.STRING); checkSchemaFile(schema);
@Test public void testDefaultCompressedAvroImport() throws IOException { this.setCurTableName("Deflate_Compressed_Avro_Import_Test_2"); avroImportTestHelper(new String[] {"--compress", }, "deflate"); }
@Test public void testUnsupportedCodec() throws IOException { try { this.setCurTableName("Deflate_Compressed_Avro_Import_Test_3"); avroImportTestHelper(new String[] {"--compression-codec", "foobar", }, null); fail("Expected IOException"); } catch (IOException e) { // Exception is expected } }