/** * Return a list of arguments to import the specified table. */ private List<String> getArgListForTable(String tableName, boolean commonArgs, boolean isAppend) { return getArgListForTable(tableName, commonArgs, isAppend, false); }
@Test public void testAppendWithTimestamp() throws Exception { // Create a table with data in it; import it. // Then add more data, verify that only the incremental data is pulled. final String TABLE_NAME = "appendTimestamp"; Timestamp thePast = new Timestamp(System.currentTimeMillis() - 100); createTimestampTable(TABLE_NAME, 10, thePast); List<String> args = getArgListForTable(TABLE_NAME, false, false); args.add("--append"); createJob(TABLE_NAME, args); runJob(TABLE_NAME); assertDirOfNumbers(TABLE_NAME, 10); // Add some more rows. long importWasBefore = System.currentTimeMillis(); Thread.sleep(50); long rowsAddedTime = System.currentTimeMillis() - 5; assertTrue(rowsAddedTime > importWasBefore); assertTrue(rowsAddedTime < System.currentTimeMillis()); insertIdTimestampRows(TABLE_NAME, 10, 20, new Timestamp(rowsAddedTime)); // Import only those rows. runJob(TABLE_NAME); assertDirOfNumbers(TABLE_NAME, 20); }
@Test public void testNoImportFromTheFuture() throws Exception { // If last-modified dates for writes are serialized to be in the // future w.r.t. an import, do not import these rows. final String TABLE_NAME = "futureLastModified"; Timestamp theFuture = new Timestamp(System.currentTimeMillis() + 1000000); createTimestampTable(TABLE_NAME, 10, theFuture); List<String> args = getArgListForTable(TABLE_NAME, true, false); Configuration conf = newConf(); SqoopOptions options = new SqoopOptions(); options.setConf(conf); runImport(options, args); assertDirOfNumbers(TABLE_NAME, 0); }
createTimestampTable(TABLE_NAME, 10, thePast); List<String> args = getArgListForTable(TABLE_NAME, false, false); args.add("--merge-key"); args.add("ID");
@Test public void testAppendWithString() throws Exception { // Create a table with string column in it; // incrementally import it on the string column - it should fail. final String TABLE_NAME = "appendString"; createIdVarcharTable(TABLE_NAME, 10); List<String> args = getArgListForTable(TABLE_NAME, false, true); args.add("--append"); createJob(TABLE_NAME, args); thrown.expect(RuntimeException.class); thrown.reportMissingExceptionWithMessage("Expected incremental import on varchar column to fail"); runJob(TABLE_NAME); }
@Test public void testFullLastModifiedImport() throws Exception { // Given a table of rows imported in the past, // see that they are imported. final String TABLE_NAME = "fullLastModified"; Timestamp thePast = new Timestamp(System.currentTimeMillis() - 100); createTimestampTable(TABLE_NAME, 10, thePast); List<String> args = getArgListForTable(TABLE_NAME, true, false); Configuration conf = newConf(); SqoopOptions options = new SqoopOptions(); options.setConf(conf); runImport(options, args); assertDirOfNumbers(TABLE_NAME, 10); }
createTimestampTable(TABLE_NAME, 10, thePast); List<String> args = getArgListForTable(TABLE_NAME, false, false);
createTimestampTable(TABLE_NAME, 10, thePast); List<String> args = getArgListForTable(TABLE_NAME, false, false); createJob(TABLE_NAME, args); runJob(TABLE_NAME);
@Test public void testLastModifiedImportWithExistingOutputDirectoryFails() throws Exception { final String TABLE_NAME = "failWithExistingOutputDirectory"; createDir(TABLE_NAME); Timestamp thePast = new Timestamp(System.currentTimeMillis() - 100); createTimestampTable(TABLE_NAME, 10, thePast); List<String> args = getArgListForTable(TABLE_NAME, true, false); SqoopOptions options = new SqoopOptions(newConf()); options.setThrowOnError(true); thrown.expectMessage("--merge-key or --append is required when using --incremental lastmodified and the output directory exists."); Sqoop sqoop = new Sqoop(new ImportTool(), options.getConf(), options); ToolRunner.run(sqoop.getConf(), sqoop, args.toArray(new String[0])); }
@Test public void testTimestampBoundary() throws Exception { // Run an import, and then insert rows with the last-modified timestamp // set to the exact time when the first import runs. Run a second import // and ensure that we pick up the new data. long now = System.currentTimeMillis(); final String TABLE_NAME = "boundaryTimestamp"; Timestamp thePast = new Timestamp(now - 100); createTimestampTable(TABLE_NAME, 10, thePast); Timestamp firstJobTime = new Timestamp(now); InstrumentHsqldbManager.setCurrentDbTimestamp(firstJobTime); // Configure the job to use the instrumented Hsqldb manager. Configuration conf = newConf(); conf.set(ConnFactory.FACTORY_CLASS_NAMES_KEY, InstrumentHsqldbManagerFactory.class.getName()); List<String> args = getArgListForTable(TABLE_NAME, false, false); args.add("--append"); createJob(TABLE_NAME, args, conf); runJob(TABLE_NAME); assertDirOfNumbers(TABLE_NAME, 10); // Add some more rows with the timestamp equal to the job run timestamp. insertIdTimestampRows(TABLE_NAME, 10, 20, firstJobTime); assertRowCount(TABLE_NAME, 20); // Run a second job with the clock advanced by 100 ms. Timestamp secondJobTime = new Timestamp(now + 100); InstrumentHsqldbManager.setCurrentDbTimestamp(secondJobTime); // Import only those rows. runJob(TABLE_NAME); assertDirOfNumbers(TABLE_NAME, 20); }
createTimestampTable(TABLE_NAME, 0, null); List<String> args = getArgListForTable(TABLE_NAME, false, false); args.add("--append"); createJob(TABLE_NAME, args);
@Test public void testIncrementalAppendTimestamp() throws Exception { // Run an import, and then insert rows with the last-modified timestamp // set to the exact time when the first import runs. Run a second import // and ensure that we pick up the new data. long now = System.currentTimeMillis(); final String TABLE_NAME = "incrementalAppendTimestamp"; Timestamp thePast = new Timestamp(now - 100); createTimestampTable(TABLE_NAME, 10, thePast); Timestamp firstJobTime = new Timestamp(now); InstrumentHsqldbManager.setCurrentDbTimestamp(firstJobTime); // Configure the job to use the instrumented Hsqldb manager. Configuration conf = newConf(); conf.set(ConnFactory.FACTORY_CLASS_NAMES_KEY, InstrumentHsqldbManagerFactory.class.getName()); List<String> args = getArgListForTable(TABLE_NAME, false, true, true); createJob(TABLE_NAME, args, conf); runJob(TABLE_NAME); assertDirOfNumbers(TABLE_NAME, 10); // Add some more rows with the timestamp equal to the job run timestamp. insertIdTimestampRows(TABLE_NAME, 10, 20, firstJobTime); assertRowCount(TABLE_NAME, 20); // Run a second job with the clock advanced by 100 ms. Timestamp secondJobTime = new Timestamp(now + 100); InstrumentHsqldbManager.setCurrentDbTimestamp(secondJobTime); // Import only those rows. runJob(TABLE_NAME); assertDirOfNumbers(TABLE_NAME, 20); } @Test
@Test public void testTableNameWithSpecialCharacters() throws Exception { // Table name with special characters to verify proper table name escaping final String TABLE_NAME = "my-table.ext"; createIdTable(TABLE_NAME, 0); // Now add some rows. insertIdRows(TABLE_NAME, 0, 10); List<String> args = getArgListForTable(TABLE_NAME, false, true); createJob("emptyJob", args); runJob("emptyJob"); assertDirOfNumbers(TABLE_NAME, 10); }
@Test public void testEmptyJobLastMod() throws Exception { // Create a job and run an import on an empty table. // Nothing should happen. final String TABLE_NAME = "emptyJobLastMod"; createTimestampTable(TABLE_NAME, 0, null); List<String> args = getArgListForTable(TABLE_NAME, false, false); args.add("--append"); createJob("emptyJobLastMod", args); runJob("emptyJobLastMod"); assertDirOfNumbers(TABLE_NAME, 0); // Running the job a second time should result in // nothing happening, it's still empty. runJob("emptyJobLastMod"); assertDirOfNumbers(TABLE_NAME, 0); }
@Test public void testFullAppendImport() throws Exception { final String TABLE_NAME = "fullAppend1"; createIdTable(TABLE_NAME, 10); List<String> args = getArgListForTable(TABLE_NAME, true, true); Configuration conf = newConf(); SqoopOptions options = new SqoopOptions(); options.setConf(conf); runImport(options, args); assertDirOfNumbers(TABLE_NAME, 10); }
@Test public void testEmptyLastModified() throws Exception { final String TABLE_NAME = "emptyLastModified"; createTimestampTable(TABLE_NAME, 0, null); List<String> args = getArgListForTable(TABLE_NAME, true, false); Configuration conf = newConf(); SqoopOptions options = new SqoopOptions(); options.setConf(conf); runImport(options, args); assertDirOfNumbers(TABLE_NAME, 0); }
@Test public void testEmptyAppendImport() throws Exception { final String TABLE_NAME = "emptyAppend1"; createIdTable(TABLE_NAME, 0); List<String> args = getArgListForTable(TABLE_NAME, true, true); Configuration conf = newConf(); SqoopOptions options = new SqoopOptions(); options.setConf(conf); runImport(options, args); assertDirOfNumbers(TABLE_NAME, 0); }
@Test public void testEmptyJobAppend() throws Exception { // Create a job and run an import on an empty table. // Nothing should happen. final String TABLE_NAME = "emptyJob"; createIdTable(TABLE_NAME, 0); List<String> args = getArgListForTable(TABLE_NAME, false, true); createJob("emptyJob", args); runJob("emptyJob"); assertDirOfNumbers(TABLE_NAME, 0); // Running the job a second time should result in // nothing happening, it's still empty. runJob("emptyJob"); assertDirOfNumbers(TABLE_NAME, 0); }
@Test public void testAppend() throws Exception { // Create a table with data in it; import it. // Then add more data, verify that only the incremental data is pulled. final String TABLE_NAME = "append"; createIdTable(TABLE_NAME, 10); List<String> args = getArgListForTable(TABLE_NAME, false, true); createJob(TABLE_NAME, args); runJob(TABLE_NAME); assertDirOfNumbers(TABLE_NAME, 10); // Add some more rows. insertIdRows(TABLE_NAME, 10, 20); // Import only those rows. runJob(TABLE_NAME); assertDirOfNumbers(TABLE_NAME, 20); }
@Test public void testEmptyThenFullJobAppend() throws Exception { // Create an empty table. Import it; nothing happens. // Add some rows. Verify they are appended. final String TABLE_NAME = "emptyThenFull"; createIdTable(TABLE_NAME, 0); List<String> args = getArgListForTable(TABLE_NAME, false, true); createJob(TABLE_NAME, args); runJob(TABLE_NAME); assertDirOfNumbers(TABLE_NAME, 0); // Now add some rows. insertIdRows(TABLE_NAME, 0, 10); // Running the job a second time should import 10 rows. runJob(TABLE_NAME); assertDirOfNumbers(TABLE_NAME, 10); // Add some more rows. insertIdRows(TABLE_NAME, 10, 20); // Import only those rows. runJob(TABLE_NAME); assertDirOfNumbers(TABLE_NAME, 20); }