/** * Configures the output for the job. * * @param job The job to configure. * @throws IOException If there is an error. */ protected void configureOutput(Job job) throws IOException { if (null == mJobOutput) { throw new JobConfigurationException("Must specify job output."); } mJobOutput.configure(job); }
/** * Configures the job output. * * @param jobOutput The output for the job. * Bulk importer must output to a Fiji table. * @return This builder instance so you may chain configuration method calls. * * {@inheritDoc} */ @Override public FijiBulkImportJobBuilder withOutput(MapReduceJobOutput jobOutput) { if (!(jobOutput instanceof FijiTableMapReduceJobOutput)) { throw new JobConfigurationException(String.format( "Invalid job output %s: expecting %s or %s", jobOutput.getClass().getName(), DirectFijiTableMapReduceJobOutput.class.getName(), HFileMapReduceJobOutput.class.getName())); } return withOutput((FijiTableMapReduceJobOutput) jobOutput); }
/** * Return a FijiDataRequest that describes which input columns need to be available. * * @return A fiji data request. */ public FijiDataRequest getDataRequest() { final FijiDataRequest dataRequest = mProducer.getDataRequest(); if (dataRequest.isEmpty()) { throw new JobConfigurationException(mProducer.getClass().getName() + " returned an empty FijiDataRequest, which is not allowed."); } return dataRequest; }
throw new JobConfigurationException("Must specify job input."); throw new JobConfigurationException("Must specify a mapper.");
/** {@inheritDoc} */ @Override protected void configureJob(Job job) throws IOException { final Configuration conf = job.getConfiguration(); if (null == mPivoterClass) { throw new JobConfigurationException("Must specify a FijiPivoter class."); } // Serialize the pivoter class name into the job configuration. conf.setClass(FijiConfKeys.FIJI_PIVOTER_CLASS, mPivoterClass, FijiPivoter.class); // Producers should output to HFiles. mMapper = new PivoterMapper(); mReducer = new IdentityReducer<Object, Object>(); job.setJobName("FijiPivoter: " + mPivoterClass.getSimpleName()); mPivoter = ReflectionUtils.newInstance(mPivoterClass, job.getConfiguration()); mDataRequest = mPivoter.getDataRequest(); // Configure the table input job. super.configureJob(job); }
/** * Configures the MapReduce mapper for the job. * * @param job The Hadoop MR job. * @throws IOException If there is an error. */ protected void configureMapper(Job job) throws IOException { // Set the map class in the job configuration. final FijiMapper<?, ?, ?, ?> mapper = getMapper(); if (null == mapper) { throw new JobConfigurationException("Must specify a mapper"); } if (mapper instanceof Configurable) { ((Configurable) mapper).setConf(job.getConfiguration()); } job.setMapperClass(((Mapper<?, ?, ?, ?>) mapper).getClass()); // Set the map output key and map output value types in the job configuration. job.setMapOutputKeyClass(mapper.getOutputKeyClass()); job.setMapOutputValueClass(mapper.getOutputValueClass()); configureAvro(job, mapper); configureHTableInput(job, mapper); }
/** {@inheritDoc} */ @Override protected void configureJob(Job job) throws IOException { final Configuration conf = job.getConfiguration(); // Store the name of the the importer to use in the job configuration so the mapper can // create instances of it. // Construct the bulk importer instance. if (null == mBulkImporterClass) { throw new JobConfigurationException("Must specify a bulk importer."); } conf.setClass( FijiConfKeys.FIJI_BULK_IMPORTER_CLASS, mBulkImporterClass, FijiBulkImporter.class); mJobOutput.configure(job); // Configure the mapper and reducer. This part depends on whether we're going to write // to HFiles or directly to the table. configureJobForHFileOutput(job); job.setJobName("Fiji bulk import: " + mBulkImporterClass.getSimpleName()); mBulkImporter = ReflectionUtils.newInstance(mBulkImporterClass, conf); // Configure the MapReduce job (requires mBulkImporter to be set properly): super.configureJob(job); }
/** {@inheritDoc} */ @Override protected void configureJob(Job job) throws IOException { final Configuration conf = job.getConfiguration(); // Construct the producer instance. if (null == mProducerClass) { throw new JobConfigurationException("Must specify a producer."); } // Serialize the producer class name into the job configuration. conf.setClass(FijiConfKeys.FIJI_PRODUCER_CLASS, mProducerClass, FijiProducer.class); // Write to the table, but make sure the output table is the same as the input table. if (!getInputTableURI().equals(mJobOutput.getOutputTableURI())) { throw new JobConfigurationException("Output table must be the same as the input table."); } // Producers should output to HFiles. mMapper = new ProduceMapper(); mReducer = new IdentityReducer<Object, Object>(); job.setJobName("Fiji produce: " + mProducerClass.getSimpleName()); mProducer = ReflectionUtils.newInstance(mProducerClass, job.getConfiguration()); mDataRequest = mProducer.getDataRequest(); // Configure the table input job. super.configureJob(job); }
if (AvroKey.class.isAssignableFrom(outputKeyClass)) { if (null == outputKeyWriterSchema) { throw new JobConfigurationException( "Using AvroKey output, but a writer schema was not provided. " + "Did you forget to implement AvroKeyWriter in your FijiReducer?"); throw new JobConfigurationException(reducer.getClass().getName() + ".getAvroKeyWriterSchema() returned a non-null Schema" + " but the output key class was not AvroKey."); if (AvroValue.class.isAssignableFrom(outputValueClass)) { if (null == outputValueWriterSchema) { throw new JobConfigurationException( "Using AvroValue output, but a writer schema was not provided. " + "Did you forget to implement AvroValueWriter in your FijiReducer?"); throw new JobConfigurationException(reducer.getClass().getName() + ".getAvroValueWriterSchema() returned a non-null Schema" + " but the output value class was not AvroValue.");
throw new JobConfigurationException("Must specify a gatherer.");
if (null != outputKeyWriterSchema) { if (!AvroKey.class.isAssignableFrom(job.getMapOutputKeyClass())) { throw new JobConfigurationException(mapper.getClass().getName() + ".getAvroKeyWriterSchema() returned a non-null Schema" + " but the output key class was not AvroKey."); if (null != outputValueWriterSchema) { if (!AvroValue.class.isAssignableFrom(job.getMapOutputValueClass())) { throw new JobConfigurationException(mapper.getClass().getName() + ".getAvroValueWriterSchema() returned a non-null Schema" + " but the output value class was not AvroValue.");
/** * Validates the input table. * * Sub-classes may override this method to perform additional validation requiring an active * connection to the input table. * * @param table Input table. * @throws IOException on I/O error. */ protected void validateInputTable(FijiTable table) throws IOException { try { FijiDataRequestValidator.validatorForLayout(table.getLayout()).validate(getDataRequest()); } catch (FijiDataRequestException kdre) { throw new JobConfigurationException("Invalid data request: " + kdre.getMessage()); } } }
/** {@inheritDoc} */ @Override protected void configureOutput(Job job) throws IOException { final MapReduceJobOutput output = getJobOutput(); if (null == output) { throw new JobConfigurationException("Must specify job output."); } final FijiReducer reducer = getReducer(); if (output instanceof HFileMapReduceJobOutput) { if (reducer instanceof IdentityReducer) { output.configure(job); } else { // Cannot use the HFile output format if the reducer is not IdentityReducer: // Writing HFile from a Fiji reducer requires an extra map/reduce to sort the HFile keys. // This forces the output format of this MapReduce job to be SequenceFile. final HFileMapReduceJobOutput hfileOutput = (HFileMapReduceJobOutput) output; LOG.warn("Reducing to HFiles will require an extra MapReduce job."); new HFileReducerMapReduceJobOutput(hfileOutput).configure(job); } } else { output.configure(job); } } }
throw new JobConfigurationException(String.format( "Table '%s' has row key hashing disabled, so the number of HFile splits must be" + "determined by the number of HRegions in the HTable. "