org.apache.hadoop.mapreduce.lib.db.DBInputFormat java code examples

/** Note that the "orderBy" column is called the "splitBy" in this version.
 * We reuse the same field, but it's not strictly ordering it -- just partitioning
 * the results.
 */
public static void setInput(Job job, 
  Class<? extends DBWritable> inputClass,
  String tableName,String conditions, 
  String splitBy, String... fieldNames) {
 DBInputFormat.setInput(job, inputClass, tableName, conditions, splitBy, fieldNames);
 job.setInputFormatClass(DataDrivenDBInputFormat.class);
}

/** {@inheritDoc} */
@SuppressWarnings("unchecked")
public RecordReader<LongWritable, T> createRecordReader(InputSplit split,
  TaskAttemptContext context) throws IOException, InterruptedException {  
 return createDBRecordReader((DBInputSplit) split, context.getConfiguration());
}

/** {@inheritDoc} */
public void configure(JobConf job) {
 super.setConf(job);
}

statement = connection.createStatement();
results = statement.executeQuery(getCountQuery());
results.next();
} catch (SQLException e1) {}
closeConnection();

protected RecordReader<LongWritable, T> createDBRecordReader(DBInputSplit split,
  Configuration conf) throws IOException {
 @SuppressWarnings("unchecked")
 Class<T> inputClass = (Class<T>) (dbConf.getInputClass());
 try {
  // use database product name to determine appropriate record reader.
  if (dbProductName.startsWith("ORACLE")) {
   // use Oracle-specific db reader.
   return new OracleDBRecordReader<T>(split, inputClass,
     conf, createConnection(), getDBConf(), conditions, fieldNames,
     tableName);
  } else if (dbProductName.startsWith("MYSQL")) {
   // use MySQL-specific db reader.
   return new MySQLDBRecordReader<T>(split, inputClass,
     conf, createConnection(), getDBConf(), conditions, fieldNames,
     tableName);
  } else {
   // Generic reader.
   return new DBRecordReader<T>(split, inputClass,
     conf, createConnection(), getDBConf(), conditions, fieldNames,
     tableName);
  }
 } catch (SQLException ex) {
  throw new IOException(ex.getMessage());
 }
}

public Connection getConnection() {
 // TODO Remove this code that handles backward compatibility.
 if (this.connection == null) {
  this.connection = createConnection();
 }
 return this.connection;
}

/** {@inheritDoc} */
public InputSplit[] getSplits(JobConf job, int chunks) throws IOException {
 List<org.apache.hadoop.mapreduce.InputSplit> newSplits = 
  super.getSplits(Job.getInstance(job));
 InputSplit[] ret = new InputSplit[newSplits.size()];
 int i = 0;
 for (org.apache.hadoop.mapreduce.InputSplit s : newSplits) {
  org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit split = 
   (org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit)s;
  ret[i++] = new DBInputSplit(split.getStart(), split.getEnd());
 }
 return ret;
}

protected RecordReader<LongWritable, T> createDBRecordReader(DBInputSplit split,
  Configuration conf) throws IOException {
 @SuppressWarnings("unchecked")
 Class<T> inputClass = (Class<T>) (dbConf.getInputClass());
 try {
  // use database product name to determine appropriate record reader.
  if (dbProductName.startsWith("ORACLE")) {
   // use Oracle-specific db reader.
   return new OracleDBRecordReader<T>(split, inputClass,
     conf, getConnection(), getDBConf(), conditions, fieldNames,
     tableName);
  } else if (dbProductName.startsWith("MYSQL")) {
   // use MySQL-specific db reader.
   return new MySQLDBRecordReader<T>(split, inputClass,
     conf, getConnection(), getDBConf(), conditions, fieldNames,
     tableName);
  } else {
   // Generic reader.
   return new DBRecordReader<T>(split, inputClass,
     conf, getConnection(), getDBConf(), conditions, fieldNames,
     tableName);
  }
 } catch (SQLException ex) {
  throw new IOException(ex.getMessage());
 }
}

statement = connection.createStatement();
results = statement.executeQuery(getCountQuery());
results.next();
} catch (SQLException e1) {}
closeConnection();

protected RecordReader<LongWritable, T> createDBRecordReader(DBInputSplit split,
  Configuration conf) throws IOException {
 @SuppressWarnings("unchecked")
 Class<T> inputClass = (Class<T>) (dbConf.getInputClass());
 try {
  // use database product name to determine appropriate record reader.
  if (dbProductName.startsWith("ORACLE")) {
   // use Oracle-specific db reader.
   return new OracleDBRecordReader<T>(split, inputClass,
     conf, createConnection(), getDBConf(), conditions, fieldNames,
     tableName);
  } else if (dbProductName.startsWith("MYSQL")) {
   // use MySQL-specific db reader.
   return new MySQLDBRecordReader<T>(split, inputClass,
     conf, createConnection(), getDBConf(), conditions, fieldNames,
     tableName);
  } else {
   // Generic reader.
   return new DBRecordReader<T>(split, inputClass,
     conf, createConnection(), getDBConf(), conditions, fieldNames,
     tableName);
  }
 } catch (SQLException ex) {
  throw new IOException(ex.getMessage());
 }
}

public Connection getConnection() {
 // TODO Remove this code that handles backward compatibility.
 if (this.connection == null) {
  this.connection = createConnection();
 }
 return this.connection;
}

/** {@inheritDoc} */
public InputSplit[] getSplits(JobConf job, int chunks) throws IOException {
 List<org.apache.hadoop.mapreduce.InputSplit> newSplits = 
  super.getSplits(new Job(job));
 InputSplit[] ret = new InputSplit[newSplits.size()];
 int i = 0;
 for (org.apache.hadoop.mapreduce.InputSplit s : newSplits) {
  org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit split = 
   (org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit)s;
  ret[i++] = new DBInputSplit(split.getStart(), split.getEnd());
 }
 return ret;
}

/** Note that the "orderBy" column is called the "splitBy" in this version.
 * We reuse the same field, but it's not strictly ordering it -- just partitioning
 * the results.
 */
public static void setInput(Job job, 
  Class<? extends DBWritable> inputClass,
  String tableName,String conditions, 
  String splitBy, String... fieldNames) {
 DBInputFormat.setInput(job, inputClass, tableName, conditions, splitBy, fieldNames);
 job.setInputFormatClass(DataDrivenDBInputFormat.class);
}

statement = connection.createStatement();
results = statement.executeQuery(getCountQuery());
results.next();
} catch (SQLException e1) {}
closeConnection();

protected RecordReader<LongWritable, T> createDBRecordReader(DBInputSplit split,
  Configuration conf) throws IOException {
 @SuppressWarnings("unchecked")
 Class<T> inputClass = (Class<T>) (dbConf.getInputClass());
 try {
  // use database product name to determine appropriate record reader.
  if (dbProductName.startsWith("ORACLE")) {
   // use Oracle-specific db reader.
   return new OracleDBRecordReader<T>(split, inputClass,
     conf, createConnection(), getDBConf(), conditions, fieldNames,
     tableName);
  } else if (dbProductName.startsWith("MYSQL")) {
   // use MySQL-specific db reader.
   return new MySQLDBRecordReader<T>(split, inputClass,
     conf, createConnection(), getDBConf(), conditions, fieldNames,
     tableName);
  } else {
   // Generic reader.
   return new DBRecordReader<T>(split, inputClass,
     conf, createConnection(), getDBConf(), conditions, fieldNames,
     tableName);
  }
 } catch (SQLException ex) {
  throw new IOException(ex.getMessage());
 }
}

public Connection getConnection() {
 // TODO Remove this code that handles backward compatibility.
 if (this.connection == null) {
  this.connection = createConnection();
 }
 return this.connection;
}

/** {@inheritDoc} */
public RecordReader<LongWritable, T> createRecordReader(InputSplit split,
  TaskAttemptContext context) throws IOException, InterruptedException {  
 return createDBRecordReader((DBInputSplit) split, context.getConfiguration());
}

/** {@inheritDoc} */
public InputSplit[] getSplits(JobConf job, int chunks) throws IOException {
 List<org.apache.hadoop.mapreduce.InputSplit> newSplits = 
  super.getSplits(Job.getInstance(job));
 InputSplit[] ret = new InputSplit[newSplits.size()];
 int i = 0;
 for (org.apache.hadoop.mapreduce.InputSplit s : newSplits) {
  org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit split = 
   (org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit)s;
  ret[i++] = new DBInputSplit(split.getStart(), split.getEnd());
 }
 return ret;
}

/** {@inheritDoc} */
public void configure(JobConf job) {
 super.setConf(job);
}

/** Note that the "orderBy" column is called the "splitBy" in this version.
 * We reuse the same field, but it's not strictly ordering it -- just partitioning
 * the results.
 */
public static void setInput(Job job, 
  Class<? extends DBWritable> inputClass,
  String tableName,String conditions, 
  String splitBy, String... fieldNames) {
 DBInputFormat.setInput(job, inputClass, tableName, conditions, splitBy, fieldNames);
 job.setInputFormatClass(DataDrivenDBInputFormat.class);
}

Javadoc

A InputFormat that reads input data from an SQL table.

DBInputFormat emits LongWritables containing the record number as key and DBWritables as value. The SQL query, and input class can be using one of the two setInput methods.

Most used methods

setInput
Initializes the map-part of the job with the appropriate input settings.
closeConnection
createDBRecordReader
getCountQuery
Returns the query for getting the total number of rows, subclasses can override this for custom beha
getDBConf
getSplits
setConf
createConnection
getConnection

Popular in Java

Reading from database using SQL prepared statement
putExtra (Intent)
getResourceAsStream (ClassLoader)
requestLocationUpdates (LocationManager)
FileInputStream (java.io)
An input stream that reads bytes from a file. File file = ...finally if (in != null) in.clos
FileOutputStream (java.io)
An output stream that writes bytes to a file. If the output file exists, it can be replaced or appen
UnknownHostException (java.net)
Thrown when a hostname can not be resolved.
Arrays (java.util)
This class contains various methods for manipulating arrays (such as sorting and searching). This cl
Base64 (org.apache.commons.codec.binary)
Provides Base64 encoding and decoding as defined by RFC 2045.This class implements section 6.8. Base
FileUtils (org.apache.commons.io)
General file manipulation utilities. Facilities are provided in the following areas: * writing to a
Github Copilot alternatives

How to useDBInputFormat in org.apache.hadoop.mapreduce.lib.db

Best Java code snippets using org.apache.hadoop.mapreduce.lib.db.DBInputFormat (Showing top 20 results out of 315)

How to use
DBInputFormat
in
org.apache.hadoop.mapreduce.lib.db