@Override public void configure(JobConf job) { StringBuilder builder = new StringBuilder(); int size = job.getInt("value.size", -1); for(int i = 0; i < size; i++) builder.append('a'); this.value = new BytesWritable(builder.toString().getBytes()); } }
private static int getQueueVar(ConfVars var, JobConf jobConf, Configuration daemonConf) { // Check job config for overrides, otherwise use the default server value. int jobVal = jobConf.getInt(var.varname, -1); return (jobVal != -1) ? jobVal : HiveConf.getIntVar(daemonConf, var); }
static <T> void configureDataFileWriter(DataFileWriter<T> writer, JobConf job) throws UnsupportedEncodingException { CodecFactory factory = getCodecFactory(job); if (factory != null) { writer.setCodec(factory); } writer.setSyncInterval(job.getInt(SYNC_INTERVAL_KEY, DEFAULT_SYNC_INTERVAL)); // copy metadata from job for (Map.Entry<String,String> e : job) { if (e.getKey().startsWith(AvroJob.TEXT_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue()); if (e.getKey().startsWith(AvroJob.BINARY_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()), URLDecoder.decode(e.getValue(), StandardCharsets.ISO_8859_1.name()) .getBytes(StandardCharsets.ISO_8859_1)); } }
public void configure(JobConf conf) { this.cluster = new ClusterMapper().readCluster(new StringReader(conf.get("cluster.xml"))); List<StoreDefinition> storeDefs = new StoreDefinitionsMapper().readStoreList(new StringReader(conf.get("stores.xml"))); if(storeDefs.size() != 1) throw new IllegalStateException("Expected to find only a single store, but found multiple!"); this.storeDef = storeDefs.get(0); this.numChunks = conf.getInt(NUM_CHUNKS, -1); if(this.numChunks < 1) { // A bit of defensive code for good measure, but should never happen anymore, now that the config cannot // be overridden by the user. throw new VoldemortException(NUM_CHUNKS + " not specified in the MapReduce JobConf (should NEVER happen)"); } this.saveKeys = conf.getBoolean(VoldemortBuildAndPushJob.SAVE_KEYS, true); this.reducerPerBucket = conf.getBoolean(VoldemortBuildAndPushJob.REDUCER_PER_BUCKET, true); this.buildPrimaryReplicasOnly = conf.getBoolean(VoldemortBuildAndPushJob.BUILD_PRIMARY_REPLICAS_ONLY, false); if (buildPrimaryReplicasOnly && !saveKeys) { throw new IllegalStateException(VoldemortBuildAndPushJob.BUILD_PRIMARY_REPLICAS_ONLY + " can only be true if " + VoldemortBuildAndPushJob.SAVE_KEYS + " is also true."); } }
@Override public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jobConf, Path path, Class<? extends Writable> valueClass, boolean isCompressed, Properties properties, Progressable progressable) throws IOException { Schema schema; try { schema = AvroSerdeUtils.determineSchemaOrThrowException(jobConf, properties); } catch (AvroSerdeException e) { throw new IOException(e); } GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>(schema); DataFileWriter<GenericRecord> dfw = new DataFileWriter<GenericRecord>(gdw); if (isCompressed) { int level = jobConf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL); String codecName = jobConf.get(OUTPUT_CODEC, DEFLATE_CODEC); CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName); dfw.setCodec(factory); } dfw.create(schema, path.getFileSystem(jobConf).create(path)); return new AvroGenericRecordWriter(dfw); }
@Override public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jobConf, Path path, Class<? extends Writable> valueClass, boolean isCompressed, Properties properties, Progressable progressable) throws IOException { Schema schema; try { schema = AvroSerdeUtils.determineSchemaOrThrowException(jobConf, properties); } catch (AvroSerdeException e) { throw new IOException(e); } GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>(schema); DataFileWriter<GenericRecord> dfw = new DataFileWriter<GenericRecord>(gdw); if (isCompressed) { int level = jobConf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL); String codecName = jobConf.get(OUTPUT_CODEC, DEFLATE_CODEC); CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName); dfw.setCodec(factory); } dfw.create(schema, path.getFileSystem(jobConf).create(path)); return new AvroGenericRecordWriter(dfw); }
@SuppressWarnings("unchecked") public RecordWriter<TetherData, NullWritable> getRecordWriter(FileSystem ignore, JobConf job, String name, Progressable prog) throws IOException { Schema schema = AvroJob.getOutputSchema(job); final DataFileWriter writer = new DataFileWriter(new GenericDatumWriter()); if (FileOutputFormat.getCompressOutput(job)) { int level = job.getInt(AvroOutputFormat.DEFLATE_LEVEL_KEY, CodecFactory.DEFAULT_DEFLATE_LEVEL); writer.setCodec(CodecFactory.deflateCodec(level)); } Path path = FileOutputFormat.getTaskOutputPath(job, name+AvroOutputFormat.EXT); writer.create(schema, path.getFileSystem(job).create(path)); return new RecordWriter<TetherData, NullWritable>() { public void write(TetherData datum, NullWritable ignore) throws IOException { writer.appendEncoded(datum.buffer()); } public void close(Reporter reporter) throws IOException { writer.close(); } }; }
int numPartitions = job.getInt(Constants.JDBC_NUM_PARTITIONS, -1); String lowerBound = job.get(Constants.JDBC_LOW_BOUND); String upperBound = job.get(Constants.JDBC_UPPER_BOUND);
@Test public void testSetSyncInterval() { JobConf jobConf = new JobConf(); int newSyncInterval = 100000; AvroOutputFormat.setSyncInterval(jobConf, newSyncInterval); assertEquals(newSyncInterval, jobConf.getInt( AvroOutputFormat.SYNC_INTERVAL_KEY, -1)); }
/** * will be run only once get parameters from Hadoop's configuration */ @Override public void configure(JobConf jobconf) { this.windowSize = jobconf.getInt("moving.average.window.size", 5); }
maxEventsInMemory = HiveConf .getIntVar(conf, ConfVars.HIVE_TRANSACTIONAL_NUM_EVENTS_IN_MEMORY); final boolean isBucketedTable = conf.getInt(hive_metastoreConstants.BUCKET_COUNT, 0) > 0; this.orcSplit = orcSplit;
short replication = (short) job.getInt("mapred.submit.replication", 10); hdfs.copyFromLocalFile(archivePath, hdfsFilePath); hdfs.setReplication(hdfsFilePath, replication);
short replication = (short) job.getInt("mapred.submit.replication", 10); hdfs.copyFromLocalFile(archivePath, hdfsFilePath); hdfs.setReplication(hdfsFilePath, replication);
/** * Check what the given number of reduce tasks for the given job configuration * does not exceed the number of regions for the given table. */ @Test public void shouldNumberOfReduceTaskNotExceedNumberOfRegionsForGivenTable() throws IOException { Assert.assertNotNull(presidentsTable); Configuration cfg = UTIL.getConfiguration(); JobConf jobConf = new JobConf(cfg); TableMapReduceUtil.setNumReduceTasks(TABLE_NAME, jobConf); TableMapReduceUtil.limitNumReduceTasks(TABLE_NAME, jobConf); TableMapReduceUtil.setScannerCaching(jobConf, 100); assertEquals(1, jobConf.getNumReduceTasks()); assertEquals(100, jobConf.getInt("hbase.client.scanner.caching", 0)); jobConf.setNumReduceTasks(10); TableMapReduceUtil.setNumMapTasks(TABLE_NAME, jobConf); TableMapReduceUtil.limitNumReduceTasks(TABLE_NAME, jobConf); assertEquals(1, jobConf.getNumReduceTasks()); }
private void getOptions(JobConf job) { pages = job.getLong("pages", 0); slotpages = job.getLong("slotpages", 0); hashsize = job.getInt("hashsize", 0); }
public void configure(JobConf job) { try { pages = job.getLong("pages", 0); slotpages = job.getLong("slotpages", 0); groups = job.getInt("groups", 0); generator = new HtmlCore(job); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
int oldValue = conf.getInt(HiveConf.ConfVars.HIVE_TRANSACTIONAL_NUM_EVENTS_IN_MEMORY.varname, 1000000); conf.setInt(HiveConf.ConfVars.HIVE_TRANSACTIONAL_NUM_EVENTS_IN_MEMORY.varname, 1000); testVectorizedOrcAcidRowBatchReader(SortMergedDeleteEventRegistry.class.getName());