runStatementOnDriver("export table Tstage to '" + getWarehouseDir() +"/1'"); runStatementOnDriver("truncate table Tstage");//because 'local' inpath doesn't delete source files runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/1/data' into table T partition(p=0)"); runStatementOnDriver("export table Tstage to '" + getWarehouseDir() +"/2'"); runStatementOnDriver("truncate table Tstage"); runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/2/data' into table T partition(p=1)"); runStatementOnDriver("export table Tstage to '" + getWarehouseDir() +"/3'"); runStatementOnDriver("truncate table Tstage"); runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/3/data' into table T partition(p=1)"); runStatementOnDriver("export table Tstage to '" + getWarehouseDir() +"/4'"); runStatementOnDriver("truncate table Tstage"); runStatementOnDriver("load data inpath '" + getWarehouseDir() + "/4/data' overwrite into table T partition(p=1)"); String[][] expected2 = new String[][] { {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t0\t0\t2", "t/p=0/delta_0000001_0000001_0000/000000_0"},
/** * By default you can't load into bucketed tables. Things will break badly in acid (data loss, etc) * if loaded data is not bucketed properly. This test is to capture that this is still the default. * If the default is changed, Load Data should probably do more validation to ensure data is * properly distributed into files and files are named correctly. * With the availability of new feature to rewrite such "load data" commands into insert-as-select, * the test should let the load data pass. */ @Test public void testValidations() throws Exception { runStatementOnDriver("drop table if exists T"); runStatementOnDriver("drop table if exists Tstage"); runStatementOnDriver("create table T (a int, b int) clustered by (a) into 2 buckets stored as orc tblproperties('transactional'='true')"); File createdFile= folder.newFile("myfile.txt"); FileUtils.writeStringToFile(createdFile, "hello world"); runStatementOnDriver("create table Tstage (a int, b int) stored as orc tblproperties('transactional'='false')"); //this creates an ORC data file with correct schema under table root runStatementOnDriver("insert into Tstage values(1,2),(3,4)"); // This will work with the new support of rewriting load into IAS. runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/Tstage' into table T"); }
@Test public void testAbort() throws Exception { boolean isVectorized = false; runStatementOnDriver("drop table if exists T"); runStatementOnDriver("drop table if exists Tstage"); runStatementOnDriver("create table T (a int, b int) stored as orc tblproperties('transactional'='true')"); //Tstage is just a simple way to generate test data runStatementOnDriver("create table Tstage (a int, b int) stored as orc tblproperties('transactional'='false')"); runStatementOnDriver("insert into Tstage values(5,5),(6,6)"); //this creates an ORC data file with correct schema under table root runStatementOnDriver("export table Tstage to '" + getWarehouseDir() + "/1'"); //and do a Load Data into the same table, which should now land in a delta_x_x. // 'data' is created by export command/ runStatementOnDriver("insert into T values(1,2),(3,4)"); runStatementOnDriver("START TRANSACTION"); runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/1/data' into table T"); runStatementOnDriver("ROLLBACK"); String testQuery = isVectorized ? "select ROW__ID, a, b from T order by ROW__ID" : "select ROW__ID, a, b, INPUT__FILE__NAME from T order by ROW__ID"; String[][] expected = new String[][] { {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/delta_0000001_0000001_0000/bucket_00000"}, {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000001_0000001_0000/bucket_00000"} }; checkResult(expected, testQuery, isVectorized, "load data inpath"); } void checkResult(String[][] expectedResult, String query, boolean isVectorized,
runStatementOnDriver("insert into Tstage values(4,4),(5,5)"); runStatementOnDriver("export table Tstage to '" + getWarehouseDir() +"/1'"); runStatementOnDriver("truncate table Tstage");//clean the staging table runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/1/data' into table T"); runStatementOnDriver("export table Tstage to '" + getWarehouseDir() +"/2'"); runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/2/data' overwrite into table T");
runStatementOnDriver("insert into Tstage values(5,5),(6,6)"); runStatementOnDriver("export table Tstage to '" + getWarehouseDir() + "/1'"); runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/1/data' into table T"); runStatementOnDriver("COMMIT");
runStatementOnDriver("insert into Tstage values(1,2),(3,4)"); runStatementOnDriver("export table Tstage to '" + getWarehouseDir() +"/1'"); runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/1/data' into table T"); runStatementOnDriver("export table Tstage to '" + getWarehouseDir() +"/2'"); runStatementOnDriver("load data inpath '" + getWarehouseDir() + "/2/data' overwrite into table T"); String[][] expected3 = new String[][] { {"{\"writeid\":4,\"bucketid\":536870912,\"rowid\":0}\t5\t6", "t/base_0000004/000000_0"},
runStatementOnDriver("insert into Tstage values(1,2),(3,4)"); runStatementOnDriver("export table Tstage to '" + getWarehouseDir() + "/1'"); runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/1/data' into table T"); checkResult(expected3, testQuery, isVectorized, "delete compact minor"); runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/1/data' overwrite into table T"); String[][] expected4 = new String[][]{ {"{\"writeid\":5,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/base_0000005/000000_0"}, runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/1/data' into table T"); runStatementOnDriver("update T set b = 17 where a = 1");//matches 2 rows runStatementOnDriver("delete from T where a = 3");//matches 2 rows