/** {@inheritDoc} */ @Override public RecordReader<EntityId, FijiRowData> createRecordReader( InputSplit split, TaskAttemptContext context) throws IOException { return new FijiTableRecordReader(mConf); }
/** * Adds the jars from a directory into the distributed cache of a job. * * @param job The job to configure. * @param jarDirectory A path to a directory of jar files. * @throws IOException If there is a problem reading from the file system. */ public static void addJarsToDistributedCache(Job job, String jarDirectory) throws IOException { addJarsToDistributedCache(job, new File(jarDirectory)); }
/** * Adds the jars from a directory into the distributed cache of a job. * * @param job The job to configure. * @param jarDirectory A path to a directory of jar files. * @throws IOException If there is a problem reading from the file system. */ public static void addJarsToDistributedCache(Job job, File jarDirectory) throws IOException { if (null == jarDirectory) { throw new IllegalArgumentException("Jar directory may not be null"); } if (!jarDirectory.exists()) { throw new IOException("The jar directory " + jarDirectory.getPath() + " does not exist."); } List<String> allJars = new ArrayList<String>(); // Get existing jars named in configuration. allJars.addAll(getJarsFromConfiguration(job.getConfiguration())); // Add jars from jarDirectory. allJars.addAll(getJarsFromDirectory(job.getConfiguration(), jarDirectory)); // De-dupe List<String> deDupedJars = deDuplicateJarNames(allJars); job.getConfiguration().set(CONF_TMPJARS, StringUtils.join(deDupedJars, ",")); }
final FijiURI tableURI = FijiURI.newBuilder(String.format("fiji://.env/default/%s", TABLE_NAME)).build(); FijiTableInputFormat.configureJob( job, tableURI, DistributedCacheJars.addJarsToDistributedCache( job, new File(System.getenv("FIJI_HOME"), "lib")); FijiMRPlatformBridge.get().setUserClassesTakesPrecedence(job, true);
/** {@inheritDoc} */ @Override public List<InputSplit> getSplits(JobContext context) throws IOException { final Configuration conf = context.getConfiguration(); final FijiURI inputTableURI = getInputTableURI(conf); final Fiji fiji = Fiji.Factory.open(inputTableURI, conf); final FijiTable table = fiji.openTable(inputTableURI.getTable()); final HTableInterface htable = HBaseFijiTable.downcast(table).openHTableConnection(); try { final List<InputSplit> splits = Lists.newArrayList(); for (FijiRegion region : table.getRegions()) { final byte[] startKey = region.getStartKey(); // TODO: a smart way to get which location is most relevant. final String location = region.getLocations().isEmpty() ? null : region.getLocations().iterator().next(); final TableSplit tableSplit = new TableSplit( htable.getTableName(), startKey, region.getEndKey(), location); splits.add(new FijiTableSplit(tableSplit, startKey)); } return splits; } finally { htable.close(); } }
/** {@inheritDoc} */ @Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException { assert split instanceof FijiTableSplit; mSplit = (FijiTableSplit) split; final Configuration conf = context.getConfiguration(); final FijiURI inputURI = getInputTableURI(conf); mFiji = Fiji.Factory.open(inputURI, conf); mTable = mFiji.openTable(inputURI.getTable()); mReader = mTable.openTableReader(); final FijiScannerOptions scannerOptions = new FijiScannerOptions() .setStartRow(HBaseEntityId.fromHBaseRowKey(mSplit.getStartRow())) .setStopRow(HBaseEntityId.fromHBaseRowKey(mSplit.getEndRow())); mScanner = mReader.getScanner(mDataRequest, scannerOptions); mIterator = mScanner.iterator(); mCurrentRow = null; }
final FijiURI tableURI = FijiURI.newBuilder(String.format("fiji://.env/default/%s", TABLE_NAME)).build(); FijiTableInputFormat.configureJob( job, tableURI, DistributedCacheJars.addJarsToDistributedCache(job, new File(System.getenv("FIJI_HOME"), "lib")); FijiMRPlatformBridge.get().setUserClassesTakesPrecedence(job, true);
DistributedCacheJars.addJarsToDistributedCache( job, new File(System.getenv("FIJI_HOME"), "lib")); FijiMRPlatformBridge.get().setUserClassesTakesPrecedence(job, true);