job = Job.getInstance( new Configuration(), StringUtils.format("%s-index-generator-%s", config.getDataSource(), config.getIntervals()) ); config.addJobProperties(job); JobHelper.injectDruidProperties(job.getConfiguration(), config.getAllowedHadoopPrefix()); int numReducers = Iterables.size(config.getAllBuckets().get()); if (numReducers == 0) { throw new RuntimeException("No buckets?? seems there is no data to index."); if (config.getSchema().getTuningConfig().getUseCombiner()) { job.setCombinerClass(IndexGeneratorCombiner.class); job.setCombinerKeyGroupingComparatorClass(BytesWritable.Comparator.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(IndexGeneratorOutputFormat.class); FileOutputFormat.setOutputPath(job, config.makeIntermediatePath()); config.addInputPaths(job); config.intoConfiguration(job); JobHelper.distributedClassPath(config.getWorkingPath()), JobHelper.distributedClassPath(config.makeIntermediatePath()), job );
@Override protected void setup(Context context) { config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration()); aggregators = config.getSchema().getDataSchema().getAggregators(); combiningAggs = new AggregatorFactory[aggregators.length]; for (int i = 0; i < aggregators.length; ++i) { metricNames.add(aggregators[i].getName()); combiningAggs[i] = aggregators[i].getCombiningFactory(); } typeHelperMap = InputRowSerde.getTypeHelperMap(config.getSchema() .getDataSchema() .getParser() .getParseSpec() .getDimensionsSpec()); }
@Override public void run() { try { Injector injector = makeInjector(); config = getHadoopDruidIndexerConfig(); MetadataStorageUpdaterJobSpec metadataSpec = config.getSchema().getIOConfig().getMetadataUpdateSpec(); // override metadata storage type based on HadoopIOConfig Preconditions.checkNotNull(metadataSpec.getType(), "type in metadataUpdateSpec must not be null"); injector.getInstance(Properties.class).setProperty("druid.metadata.storage.type", metadataSpec.getType()); config = HadoopDruidIndexerConfig.fromSpec( HadoopIngestionSpec.updateSegmentListIfDatasourcePathSpecIsUsed( config.getSchema(), HadoopDruidIndexerConfig.JSON_MAPPER, new MetadataStoreBasedUsedSegmentLister( injector.getInstance(IndexerMetadataStorageCoordinator.class) ) ) ); List<Jobby> jobs = new ArrayList<>(); jobs.add(new HadoopDruidDetermineConfigurationJob(config)); jobs.add(new HadoopDruidIndexerJob(config, injector.getInstance(MetadataStorageUpdaterJobHandler.class))); JobHelper.runJobs(jobs, config); } catch (Exception e) { throw Throwables.propagate(e); } }
@Override protected void setup(Context context) throws IOException, InterruptedException { config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration()); parser = config.getParser(); granularitySpec = config.getGranularitySpec(); }
public static HadoopDruidIndexerConfig fromConfiguration(Configuration conf) { final HadoopDruidIndexerConfig retVal = fromString(conf.get(HadoopDruidIndexerConfig.CONFIG_PROPERTY)); retVal.verify(); return retVal; }
@Override protected void setup(Context context) { config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration()); determineIntervals = !config.getSegmentGranularIntervals().isPresent(); }
final Interval interval = config.getGranularitySpec().bucketInterval(bucket.time).get(); int numBackgroundPersistThreads = config.getSchema().getTuningConfig().getNumBackgroundPersistThreads(); if (numBackgroundPersistThreads > 0) { final BlockingQueue<Runnable> queue = new SynchronousQueue<>(); final FileSystem outputFS = new Path(config.getSchema().getIOConfig().getSegmentOutputPath()) .getFileSystem(context.getConfiguration()); final ShardSpec shardSpecForPartitioning = config.getShardSpec(bucket).getActualSpec(); if (config.isForceExtendableShardSpecs()) { shardSpecForPublishing = new NumberedShardSpec( shardSpecForPartitioning.getPartitionNum(), config.getShardSpecCount(bucket) ); } else { config.getDataSource(), interval, config.getSchema().getTuningConfig().getVersion(), null, ImmutableList.copyOf(allDimensionNames), mergedBase, JobHelper.makeFileNamePath( new Path(config.getSchema().getIOConfig().getSegmentOutputPath()), outputFS, segmentTemplate,
public static boolean runSingleJob(Jobby job, HadoopDruidIndexerConfig config) { boolean succeeded = job.run(); if (!config.getSchema().getTuningConfig().isLeaveIntermediate()) { if (succeeded || config.getSchema().getTuningConfig().isCleanupOnFailure()) { Path workingPath = config.makeIntermediatePath(); log.info("Deleting path[%s]", workingPath); try { Configuration conf = injectSystemProperties(new Configuration()); config.addJobProperties(conf); workingPath.getFileSystem(conf).delete(workingPath, true); } catch (IOException e) { log.error(e, "Failed to cleanup path[%s]", workingPath); } } } return succeeded; }
@Override protected List<? extends Module> getModules() { return ImmutableList.of( binder -> { binder.bindConstant().annotatedWith(Names.named("serviceName")).to("druid/internal-hadoop-indexer"); binder.bindConstant().annotatedWith(Names.named("servicePort")).to(0); binder.bindConstant().annotatedWith(Names.named("tlsServicePort")).to(-1); // bind metadata storage config based on HadoopIOConfig MetadataStorageUpdaterJobSpec metadataSpec = getHadoopDruidIndexerConfig().getSchema() .getIOConfig() .getMetadataUpdateSpec(); binder.bind(new TypeLiteral<Supplier<MetadataStorageConnectorConfig>>() {}) .toInstance(metadataSpec); binder.bind(MetadataStorageTablesConfig.class).toInstance(metadataSpec.getMetadataStorageTablesConfig()); binder.bind(IndexerMetadataStorageCoordinator.class).to(IndexerSQLMetadataStorageCoordinator.class).in( LazySingleton.class ); } ); }
HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration()); ParseSpec parseSpec = config.getParser().getParseSpec(); String tsField = config.getParser().getParseSpec().getTimestampSpec().getTimestampColumn(); List<DimensionSchema> dimensionSchema = config.getParser().getParseSpec().getDimensionsSpec().getDimensions(); Set<String> dimensions = new HashSet<>(); for (DimensionSchema dim : dimensionSchema) { for (AggregatorFactory agg : config.getSchema().getDataSchema().getAggregators()) { metricsFields.addAll(agg.requiredFields());
if (updatedIngestionSpec.getDimensions() == null) { List<String> dims; if (config.getParser().getParseSpec().getDimensionsSpec().hasCustomDimensions()) { dims = config.getParser().getParseSpec().getDimensionsSpec().getDimensionNames(); } else { Set<String> dimSet = Sets.newHashSet( Sets.difference( dimSet, config.getParser() .getParseSpec() .getDimensionsSpec() final AggregatorFactory[] cols = config.getSchema().getDataSchema().getAggregators(); if (cols != null) { if (useNewAggs) { updatedIngestionSpec = updatedIngestionSpec.withQueryGranularity(config.getGranularitySpec().getQueryGranularity()); config.getSchema().getDataSchema().getTransformSpec() );
config.getGranularitySpec() instanceof UniformGranularitySpec, StringUtils.format( "Cannot use %s without %s", final Granularity segmentGranularity = config.getGranularitySpec().getSegmentGranularity(); config.getSchema().getIOConfig().getSegmentOutputPath(), segmentGranularity.toPath(timeBucket) ); config.setGranularitySpec( new UniformGranularitySpec( segmentGranularity, config.getGranularitySpec().getQueryGranularity(), config.getGranularitySpec().isRollup(), Lists.newArrayList(bucketsToRun)
protected File mergeQueryableIndex( final List<QueryableIndex> indexes, final AggregatorFactory[] aggs, final File file, ProgressIndicator progressIndicator ) throws IOException { boolean rollup = config.getSchema().getDataSchema().getGranularitySpec().isRollup(); return HadoopDruidIndexerConfig.INDEX_MERGER_V9 .mergeQueryableIndex(indexes, rollup, aggs, file, config.getIndexSpec(), progressIndicator, null); }
@Override protected void setup(Context context) { final HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration()); SingleDimensionPartitionsSpec spec = (SingleDimensionPartitionsSpec) config.getPartitionsSpec(); helper = new DeterminePartitionsDimSelectionMapperHelper(config, spec.getPartitionDimension()); }
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); rollupGranularity = getConfig().getGranularitySpec().getQueryGranularity(); config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration()); Optional<Set<Interval>> intervals = config.getSegmentGranularIntervals(); if (intervals.isPresent()) { determineIntervals = false; final ImmutableMap.Builder<Interval, HyperLogLogCollector> builder = ImmutableMap.builder(); for (final Interval bucketInterval : intervals.get()) { builder.put(bucketInterval, HyperLogLogCollector.makeLatestCollector()); } hyperLogLogs = builder.build(); } else { determineIntervals = true; hyperLogLogs = new HashMap<>(); } }
try { if (argumentSpec.startsWith("{")) { config = HadoopDruidIndexerConfig.fromString(argumentSpec); } else { File localConfigFile = null; config = HadoopDruidIndexerConfig.fromFile(localConfigFile); } else { config = HadoopDruidIndexerConfig.fromDistributedFileSystem(argumentSpec);
HadoopIngestionSpec.class ); final HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromSpec( theSchema .withTuningConfig(theSchema.getTuningConfig().withVersion(version)) if (config.isUpdaterJobSpecSet()) { maybeHandler = injector.getInstance(MetadataStorageUpdaterJobHandler.class); } else {
@Inject public HadoopDruidIndexerJob( HadoopDruidIndexerConfig config, MetadataStorageUpdaterJobHandler handler ) { config.verify(); this.config = config; Preconditions.checkArgument( !config.isUpdaterJobSpecSet() || handler != null, "MetadataStorageUpdaterJobHandler must not be null if ioConfig.metadataUpdateSpec is specified." ); if (config.isUpdaterJobSpecSet()) { metadataStorageUpdaterJob = new MetadataStorageUpdaterJob( config, handler ); } else { metadataStorageUpdaterJob = null; } }
@Override protected void setup(Context context) { if (config == null) { synchronized (DeterminePartitionsDimSelectionBaseReducer.class) { if (config == null) { config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration()); } } } }
public static HadoopDruidIndexerConfig fromSpec(HadoopIngestionSpec spec) { return new HadoopDruidIndexerConfig(spec); }