@Test public void testAbort() throws Exception { boolean isVectorized = false; runStatementOnDriver("drop table if exists T"); runStatementOnDriver("drop table if exists Tstage"); runStatementOnDriver("create table T (a int, b int) stored as orc tblproperties('transactional'='true')"); //Tstage is just a simple way to generate test data runStatementOnDriver("create table Tstage (a int, b int) stored as orc tblproperties('transactional'='false')"); runStatementOnDriver("insert into Tstage values(5,5),(6,6)"); //this creates an ORC data file with correct schema under table root runStatementOnDriver("export table Tstage to '" + getWarehouseDir() + "/1'"); //and do a Load Data into the same table, which should now land in a delta_x_x. // 'data' is created by export command/ runStatementOnDriver("insert into T values(1,2),(3,4)"); runStatementOnDriver("START TRANSACTION"); runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/1/data' into table T"); runStatementOnDriver("ROLLBACK"); String testQuery = isVectorized ? "select ROW__ID, a, b from T order by ROW__ID" : "select ROW__ID, a, b, INPUT__FILE__NAME from T order by ROW__ID"; String[][] expected = new String[][] { {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/delta_0000001_0000001_0000/bucket_00000"}, {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000001_0000001_0000/bucket_00000"} }; checkResult(expected, testQuery, isVectorized, "load data inpath"); } void checkResult(String[][] expectedResult, String query, boolean isVectorized,
@Test public void testLoadAcidFile() throws Exception { MetastoreConf.setBoolVar(hiveConf, MetastoreConf.ConfVars.CREATE_TABLES_AS_ACID, true); runStatementOnDriver("drop table if exists T"); runStatementOnDriver("drop table if exists T2"); runStatementOnDriver( "create table T (a int, b int) stored as orc"); //This is just a simple way to generate test data runStatementOnDriver("create table T2(a int, b int) stored as orc"); runStatementOnDriver("insert into T values(1,2)"); List<String> rs = runStatementOnDriver("select INPUT__FILE__NAME from T"); Assert.assertEquals(1, rs.size()); Assert.assertTrue("Unexpcted file name", rs.get(0) .endsWith("t/delta_0000001_0000001_0000/bucket_00000")); //T2 is an acid table so this should fail CommandProcessorResponse cpr = runStatementOnDriverNegative( "load data local inpath '" + rs.get(0) + "' into table T2"); Assert.assertEquals("Unexpected error code", ErrorMsg.LOAD_DATA_ACID_FILE.getErrorCode(), cpr.getErrorCode()); } }
/** * By default you can't load into bucketed tables. Things will break badly in acid (data loss, etc) * if loaded data is not bucketed properly. This test is to capture that this is still the default. * If the default is changed, Load Data should probably do more validation to ensure data is * properly distributed into files and files are named correctly. * With the availability of new feature to rewrite such "load data" commands into insert-as-select, * the test should let the load data pass. */ @Test public void testValidations() throws Exception { runStatementOnDriver("drop table if exists T"); runStatementOnDriver("drop table if exists Tstage"); runStatementOnDriver("create table T (a int, b int) clustered by (a) into 2 buckets stored as orc tblproperties('transactional'='true')"); File createdFile= folder.newFile("myfile.txt"); FileUtils.writeStringToFile(createdFile, "hello world"); runStatementOnDriver("create table Tstage (a int, b int) stored as orc tblproperties('transactional'='false')"); //this creates an ORC data file with correct schema under table root runStatementOnDriver("insert into Tstage values(1,2),(3,4)"); // This will work with the new support of rewriting load into IAS. runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/Tstage' into table T"); }
@Test public void testMMOrcTable() throws Exception { runStatementOnDriver("drop table if exists T"); runStatementOnDriver("create table T (a int, b int) stored as orc tblproperties('transactional'='true', 'transactional_properties'='insert_only')"); int[][] values = {{1,2},{3,4}}; runStatementOnDriver("insert into T " + makeValuesClause(values)); List<String> rs = runStatementOnDriver("select a, b from T order by b"); Assert.assertEquals("", stringifyValues(values), rs); }
runStatementOnDriver("drop table if exists T"); runStatementOnDriver("drop table if exists Tstage"); runStatementOnDriver("create table T (a int, b int) stored as orc tblproperties('transactional'='true')"); runStatementOnDriver("create table Tstage (a int, b int) stored as orc tblproperties('transactional'='false')"); runStatementOnDriver("insert into Tstage values(5,5),(6,6)"); runStatementOnDriver("export table Tstage to '" + getWarehouseDir() + "/1'"); runStatementOnDriver("START TRANSACTION"); runStatementOnDriver("insert into T values(1,2),(3,4)"); runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/1/data' into table T"); runStatementOnDriver("COMMIT"); checkResult(expected, testQuery, isVectorized, "load data inpath"); runStatementOnDriver("alter table T compact 'major'"); TestTxnCommands2.runWorker(hiveConf); String[][] expected2 = new String[][] {
runStatementOnDriver("drop table if exists T"); runStatementOnDriver("drop table if exists Tstage"); runStatementOnDriver("create table T (a int, b int) partitioned by (p int) stored as orc tblproperties('transactional'='true')"); runStatementOnDriver("create table Tstage (a int, b int) stored as orc tblproperties('transactional'='false')"); runStatementOnDriver("insert into Tstage values(0,2),(0,4)"); runStatementOnDriver("export table Tstage to '" + getWarehouseDir() +"/1'"); runStatementOnDriver("truncate table Tstage");//because 'local' inpath doesn't delete source files runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/1/data' into table T partition(p=0)"); runStatementOnDriver("insert into Tstage values(1,2),(1,4)"); runStatementOnDriver("export table Tstage to '" + getWarehouseDir() +"/2'"); runStatementOnDriver("truncate table Tstage"); runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/2/data' into table T partition(p=1)"); runStatementOnDriver("insert into Tstage values(2,2),(2,4)"); runStatementOnDriver("export table Tstage to '" + getWarehouseDir() +"/3'"); runStatementOnDriver("truncate table Tstage"); runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/3/data' into table T partition(p=1)"); List<String> rs = runStatementOnDriver("select ROW__ID, p, a, b, INPUT__FILE__NAME from T order by p, ROW__ID"); String[][] expected = new String[][] { {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t0\t0\t2", "t/p=0/delta_0000001_0000001_0000/000000_0"}, runStatementOnDriver("insert into Tstage values(5,2),(5,4)"); runStatementOnDriver("export table Tstage to '" + getWarehouseDir() +"/4'"); runStatementOnDriver("truncate table Tstage"); runStatementOnDriver("load data inpath '" + getWarehouseDir() + "/4/data' overwrite into table T partition(p=1)"); String[][] expected2 = new String[][] {
runStatementOnDriver("drop table if exists T"); runStatementOnDriver("drop table if exists Tstage"); runStatementOnDriver("create table T (a int, b int) stored as orc tblproperties('transactional'='false')"); runStatementOnDriver("insert into T values(0,2),(0,4)"); runStatementOnDriver("create table Tstage (a int, b int) stored as orc tblproperties('transactional'='false')"); runStatementOnDriver("insert into Tstage values(1,2),(3,4)"); runStatementOnDriver("insert into Tstage values(2,2),(3,3)"); runStatementOnDriver("insert into Tstage values(4,4),(5,5)"); runStatementOnDriver("export table Tstage to '" + getWarehouseDir() +"/1'"); runStatementOnDriver("truncate table Tstage");//clean the staging table runStatementOnDriver("alter table T SET TBLPROPERTIES ('transactional' = 'true')"); runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/1/data' into table T"); runStatementOnDriver("insert into Tstage values(5,6),(7,8)"); runStatementOnDriver("insert into Tstage values(8,8)"); runStatementOnDriver("export table Tstage to '" + getWarehouseDir() +"/2'"); runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/2/data' overwrite into table T"); runStatementOnDriver("insert into T values(9,9)"); runStatementOnDriver("alter table T compact 'major'"); TestTxnCommands2.runWorker(hiveConf);
private void loadDataUpdate(boolean isVectorized) throws Exception { runStatementOnDriver("drop table if exists T"); runStatementOnDriver("drop table if exists Tstage"); runStatementOnDriver( "create table T (a int, b int) stored as orc tblproperties('transactional'='true')"); runStatementOnDriver("create table Tstage (a int, b int) stored as orc tblproperties('transactional'='false')"); runStatementOnDriver("insert into Tstage values(1,2),(3,4)"); runStatementOnDriver("export table Tstage to '" + getWarehouseDir() + "/1'"); runStatementOnDriver("truncate table Tstage"); runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/1/data' into table T"); {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000001_0000001_0000/000000_0"}}; checkResult(expected, testQuery, isVectorized, "load data inpath"); runStatementOnDriver("update T set b = 17 where a = 1"); String[][] expected2 = new String[][]{ {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000001_0000001_0000/000000_0"}, runStatementOnDriver("insert into T values(2,2)"); runStatementOnDriver("delete from T where a = 3"); runStatementOnDriver("alter table T compact 'minor'"); TestTxnCommands2.runWorker(hiveConf); String[][] expected3 = new String[][] { runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/1/data' overwrite into table T"); String[][] expected4 = new String[][]{
private void loadData(boolean isVectorized) throws Exception { runStatementOnDriver("drop table if exists T"); runStatementOnDriver("drop table if exists Tstage"); runStatementOnDriver("create table T (a int, b int) stored as orc tblproperties('transactional'='true')"); runStatementOnDriver("insert into T values(0,2),(0,4)"); runStatementOnDriver("create table Tstage (a int, b int) stored as orc tblproperties('transactional'='false')"); runStatementOnDriver("insert into Tstage values(1,2),(3,4)"); runStatementOnDriver("export table Tstage to '" + getWarehouseDir() +"/1'"); runStatementOnDriver("truncate table Tstage"); runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/1/data' into table T"); runStatementOnDriver("alter table T compact 'minor'"); TestTxnCommands2.runWorker(hiveConf); String[][] expected1 = new String[][] { runStatementOnDriver("insert into T values(2,2)"); runStatementOnDriver("alter table T compact 'major'"); TestTxnCommands2.runWorker(hiveConf); String[][] expected2 = new String[][] { runStatementOnDriver("insert into Tstage values(5,6),(7,8)"); runStatementOnDriver("export table Tstage to '" + getWarehouseDir() +"/2'"); runStatementOnDriver("load data inpath '" + getWarehouseDir() + "/2/data' overwrite into table T"); String[][] expected3 = new String[][] { {"{\"writeid\":4,\"bucketid\":536870912,\"rowid\":0}\t5\t6", "t/base_0000004/000000_0"},