/** * test DBInputFormat class. Class should split result for chunks * @throws Exception */ @Test(timeout = 10000) public void testDBInputFormat() throws Exception { JobConf configuration = new JobConf(); setupDriver(configuration); DBInputFormat<NullDBWritable> format = new DBInputFormat<NullDBWritable>(); format.setConf(configuration); format.setConf(configuration); DBInputFormat.DBInputSplit splitter = new DBInputFormat.DBInputSplit(1, 10); Reporter reporter = mock(Reporter.class); RecordReader<LongWritable, NullDBWritable> reader = format.getRecordReader( splitter, configuration, reporter); configuration.setInt(MRJobConfig.NUM_MAPS, 3); InputSplit[] lSplits = format.getSplits(configuration, 3); assertEquals(5, lSplits[0].getLength()); assertEquals(3, lSplits.length); // test reader .Some simple tests assertEquals(LongWritable.class, reader.createKey().getClass()); assertEquals(0, reader.getPos()); assertEquals(0, reader.getProgress(), 0.001); reader.close(); }
/** {@inheritDoc} */ @SuppressWarnings("unchecked") public RecordReader<LongWritable, T> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException { // wrap the DBRR in a shim class to deal with API differences. return new DBRecordReaderWrapper<T>( (org.apache.hadoop.mapreduce.lib.db.DBRecordReader<T>) createDBRecordReader( (org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit) split, job)); }
/** {@inheritDoc} */ public InputSplit[] getSplits(JobConf job, int chunks) throws IOException { try { Statement statement = connection.createStatement(); ResultSet results = statement.executeQuery(getCountQuery()); results.next(); long count = results.getLong(1); long chunkSize = (count / chunks); results.close(); statement.close(); InputSplit[] splits = new InputSplit[chunks]; // Split the rows into n-number of chunks and adjust the last chunk // accordingly for (int i = 0; i < chunks; i++) { DBInputSplit split; if ((i + 1) == chunks) split = new DBInputSplit(i * chunkSize, count); else split = new DBInputSplit(i * chunkSize, (i * chunkSize) + chunkSize); splits[i] = split; } return splits; } catch (SQLException e) { throw new IOException(e.getMessage()); } }
/** * * test DBRecordReader. This reader should creates keys, values, know about position.. */ @SuppressWarnings("unchecked") @Test (timeout = 5000) public void testDBRecordReader() throws Exception { JobConf job = mock(JobConf.class); DBConfiguration dbConfig = mock(DBConfiguration.class); String[] fields = { "field1", "filed2" }; @SuppressWarnings("rawtypes") DBRecordReader reader = new DBInputFormat<NullDBWritable>().new DBRecordReader( new DBInputSplit(), NullDBWritable.class, job, DriverForTest.getConnection(), dbConfig, "condition", fields, "table"); LongWritable key = reader.createKey(); assertEquals(0, key.get()); DBWritable value = reader.createValue(); assertEquals( "org.apache.hadoop.mapred.lib.db.DBInputFormat$NullDBWritable", value .getClass().getName()); assertEquals(0, reader.getPos()); assertFalse(reader.next(key, value)); }
/** {@inheritDoc} */ public RecordReader<LongWritable, T> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException { // wrap the DBRR in a shim class to deal with API differences. return new DBRecordReaderWrapper<T>( (org.apache.hadoop.mapreduce.lib.db.DBRecordReader<T>) createDBRecordReader( (org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit) split, job)); }
/** {@inheritDoc} */ public InputSplit[] getSplits(JobConf job, int chunks) throws IOException { try { Statement statement = connection.createStatement(); ResultSet results = statement.executeQuery(getCountQuery()); results.next(); long count = results.getLong(1); long chunkSize = (count / chunks); results.close(); statement.close(); InputSplit[] splits = new InputSplit[chunks]; // Split the rows into n-number of chunks and adjust the last chunk // accordingly for (int i = 0; i < chunks; i++) { DBInputSplit split; if ((i + 1) == chunks) split = new DBInputSplit(i * chunkSize, count); else split = new DBInputSplit(i * chunkSize, (i * chunkSize) + chunkSize); splits[i] = split; } return splits; } catch (SQLException e) { throw new IOException(e.getMessage()); } }
/** {@inheritDoc} */ public RecordReader<LongWritable, T> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException { // wrap the DBRR in a shim class to deal with API differences. return new DBRecordReaderWrapper<T>( (org.apache.hadoop.mapreduce.lib.db.DBRecordReader<T>) createDBRecordReader( (org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit) split, job)); }
public static void runJob(String mysqlJar, String output) throws Exception { Configuration conf = new Configuration(); JobHelper.addJarForJob(conf, mysqlJar); DBConfiguration.configureDB(conf, "com.mysql.jdbc.Driver", "jdbc:mysql://localhost/sqoop_test" + "?user=hip_sqoop_user&password=password"); JobConf job = new JobConf(conf); job.setJarByClass(DBImportMapReduce.class); Path outputPath = new Path(output); outputPath.getFileSystem(job).delete(outputPath, true); job.setInputFormat(DBInputFormat.class); job.setOutputFormat(AvroOutputFormat.class); AvroJob.setOutputSchema(job, Stock.SCHEMA$); job.set(AvroJob.OUTPUT_CODEC, SnappyCodec.class.getName()); job.setMapperClass(Map.class); job.setNumMapTasks(4); job.setNumReduceTasks(0); job.setMapOutputKeyClass(AvroWrapper.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputKeyClass(AvroWrapper.class); job.setOutputValueClass(NullWritable.class); FileOutputFormat.setOutputPath(job, outputPath); DBInputFormat.setInput( job, StockRecord.class, "select * from stocks", "SELECT COUNT(id) FROM stocks"); JobClient.runJob(job); }
/** {@inheritDoc} */ public RecordReader<LongWritable, T> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException { // wrap the DBRR in a shim class to deal with API differences. return new DBRecordReaderWrapper<T>( (org.apache.hadoop.mapreduce.lib.db.DBRecordReader<T>) createDBRecordReader( (org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit) split, job)); }
DBInputFormat.setInput(job, AccessRecord.class, "Access", null, "url", AccessFieldNames);
/** {@inheritDoc} */ public RecordReader<LongWritable, T> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException { // wrap the DBRR in a shim class to deal with API differences. return new DBRecordReaderWrapper<T>( (org.apache.hadoop.mapreduce.lib.db.DBRecordReader<T>) createDBRecordReader( (org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit) split, job)); }