@Test public void testNonIdentCharactersInColumnName() throws IOException { String [] names = { "test_a-v+r/o" }; String [] types = { "INT" }; String [] vals = { "2015" }; createTableWithColTypesAndNames(names, types, vals); runImport(getOutputArgv(true, null)); Path outputFile = new Path(getTablePath(), "part-m-00000.avro"); DataFileReader<GenericRecord> reader = read(outputFile); Schema schema = reader.getSchema(); assertEquals(Schema.Type.RECORD, schema.getType()); List<Field> fields = schema.getFields(); assertEquals(types.length, fields.size()); checkField(fields.get(0), "TEST_A_V_R_O", Type.INT); GenericRecord record1 = reader.next(); assertEquals("TEST_A_V_R_O", 2015, record1.get("TEST_A_V_R_O")); }
@Test public void testFirstUnderscoreInColumnName() throws IOException { String [] names = { "_NAME" }; String [] types = { "INT" }; String [] vals = { "1987" }; createTableWithColTypesAndNames(names, types, vals); runImport(getOutputArgv(true, null)); Path outputFile = new Path(getTablePath(), "part-m-00000.avro"); DataFileReader<GenericRecord> reader = read(outputFile); Schema schema = reader.getSchema(); assertEquals(Schema.Type.RECORD, schema.getType()); List<Field> fields = schema.getFields(); assertEquals(types.length, fields.size()); checkField(fields.get(0), "__NAME", Type.INT); GenericRecord record1 = reader.next(); assertEquals("__NAME", 1987, record1.get("__NAME")); }
@Test public void testNonstandardCharactersInColumnName() throws IOException { String [] names = { "avro\uC3A11" }; String [] types = { "INT" }; String [] vals = { "1987" }; this.setCurTableName("Non_Std_Character_Test"); createTableWithColTypesAndNames(names, types, vals); runImport(getOutputArgv(true, null)); Path outputFile = new Path(getTablePath(), "part-m-00000.avro"); DataFileReader<GenericRecord> reader = read(outputFile); Schema schema = reader.getSchema(); assertEquals(Schema.Type.RECORD, schema.getType()); List<Field> fields = schema.getFields(); assertEquals(types.length, fields.size()); checkField(fields.get(0), "AVRO\uC3A11", Type.INT); GenericRecord record1 = reader.next(); assertEquals("AVRO\uC3A11", 1987, record1.get("AVRO\uC3A11")); }
@Test public void testNonstandardCharactersInMultipleColumns() throws IOException { String[] names = { "id$1", "id1$" }; String[] types = { "INT", "INT" }; String[] vals = { "1987", "1988" }; this.setCurTableName("Non_Std_Character_Test_For_Multiple_Columns"); createTableWithColTypesAndNames(names, types, vals); runImport(getOutputArgv(true, null)); Path outputFile = new Path(getTablePath(), "part-m-00000.avro"); DataFileReader<GenericRecord> reader = read(outputFile); Schema schema = reader.getSchema(); assertEquals(Schema.Type.RECORD, schema.getType()); List<Field> fields = schema.getFields(); assertEquals(types.length, fields.size()); checkField(fields.get(0), "ID_1", Type.INT); GenericRecord record1 = reader.next(); assertEquals("ID_1", 1987, record1.get("ID_1")); checkField(fields.get(1), "ID1_", Type.INT); assertEquals("ID1_", 1988, record1.get("ID1_")); }
private void checkRecordWithExtraArgs(String[] extraArgs, String tableName) throws IOException { String date = "2017-01-19"; String timeStamp = "2017-01-19 14:47:57.112000"; String [] names = {"INTFIELD1", "DATA_#_COL0", "DATA#COL1", "DATA___COL2"}; String [] types = { "INT", "DATE", "TIMESTAMP", "DECIMAL(2,20)" }; String [] vals = {"1", "{ts \'" + date + "\'}", "{ts \'" + timeStamp + "\'}", "2e20"}; String [] checkNames = {"INTFIELD1", "DATA___COL0", "DATA_COL1", "DATA___COL2"}; setCurTableName(tableName); createTableWithColTypesAndNames(names, types, vals); runImport(getOutputArgv(true, extraArgs)); Path outputFile = new Path(getTablePath(), "part-m-00000.avro"); DataFileReader<GenericRecord> reader = read(outputFile); GenericRecord record = reader.next(); for (String columnName : checkNames) { assertNotNull(record.get(columnName)); } removeTableDir(); }