@Override protected List<? extends Module> getModules() { return ImmutableList.of( binder -> { binder.bindConstant().annotatedWith(Names.named("serviceName")).to("druid/internal-hadoop-indexer"); binder.bindConstant().annotatedWith(Names.named("servicePort")).to(0); binder.bindConstant().annotatedWith(Names.named("tlsServicePort")).to(-1); // bind metadata storage config based on HadoopIOConfig MetadataStorageUpdaterJobSpec metadataSpec = getHadoopDruidIndexerConfig().getSchema() .getIOConfig() .getMetadataUpdateSpec(); binder.bind(new TypeLiteral<Supplier<MetadataStorageConnectorConfig>>() {}) .toInstance(metadataSpec); binder.bind(MetadataStorageTablesConfig.class).toInstance(metadataSpec.getMetadataStorageTablesConfig()); binder.bind(IndexerMetadataStorageCoordinator.class).to(IndexerSQLMetadataStorageCoordinator.class).in( LazySingleton.class ); } ); }
@Override public void run() { try { Injector injector = makeInjector(); config = getHadoopDruidIndexerConfig(); MetadataStorageUpdaterJobSpec metadataSpec = config.getSchema().getIOConfig().getMetadataUpdateSpec(); // override metadata storage type based on HadoopIOConfig Preconditions.checkNotNull(metadataSpec.getType(), "type in metadataUpdateSpec must not be null"); injector.getInstance(Properties.class).setProperty("druid.metadata.storage.type", metadataSpec.getType()); config = HadoopDruidIndexerConfig.fromSpec( HadoopIngestionSpec.updateSegmentListIfDatasourcePathSpecIsUsed( config.getSchema(), HadoopDruidIndexerConfig.JSON_MAPPER, new MetadataStoreBasedUsedSegmentLister( injector.getInstance(IndexerMetadataStorageCoordinator.class) ) ) ); List<Jobby> jobs = new ArrayList<>(); jobs.add(new HadoopDruidDetermineConfigurationJob(config)); jobs.add(new HadoopDruidIndexerJob(config, injector.getInstance(MetadataStorageUpdaterJobHandler.class))); JobHelper.runJobs(jobs, config); } catch (Exception e) { throw Throwables.propagate(e); } }
public static boolean runSingleJob(Jobby job, HadoopDruidIndexerConfig config) { boolean succeeded = job.run(); if (!config.getSchema().getTuningConfig().isLeaveIntermediate()) { if (succeeded || config.getSchema().getTuningConfig().isCleanupOnFailure()) { Path workingPath = config.makeIntermediatePath(); log.info("Deleting path[%s]", workingPath); try { Configuration conf = injectSystemProperties(new Configuration()); config.addJobProperties(conf); workingPath.getFileSystem(conf).delete(workingPath, true); } catch (IOException e) { log.error(e, "Failed to cleanup path[%s]", workingPath); } } } return succeeded; }
public static boolean runJobs(List<Jobby> jobs, HadoopDruidIndexerConfig config) { boolean succeeded = true; for (Jobby job : jobs) { if (!job.run()) { succeeded = false; break; } } if (!config.getSchema().getTuningConfig().isLeaveIntermediate()) { if (succeeded || config.getSchema().getTuningConfig().isCleanupOnFailure()) { Path workingPath = config.makeIntermediatePath(); log.info("Deleting path[%s]", workingPath); try { Configuration conf = injectSystemProperties(new Configuration()); config.addJobProperties(conf); workingPath.getFileSystem(conf).delete(workingPath, true); } catch (IOException e) { log.error(e, "Failed to cleanup path[%s]", workingPath); } } } return succeeded; }
@Override public boolean run() { final List<DataSegment> segments = IndexGeneratorJob.getPublishedSegments(config); final String segmentTable = config.getSchema().getIOConfig().getMetadataUpdateSpec().getSegmentTable(); handler.publishSegments(segmentTable, segments, HadoopDruidIndexerConfig.JSON_MAPPER); return true; } }
protected File mergeQueryableIndex( final List<QueryableIndex> indexes, final AggregatorFactory[] aggs, final File file, ProgressIndicator progressIndicator ) throws IOException { boolean rollup = config.getSchema().getDataSchema().getGranularitySpec().isRollup(); return HadoopDruidIndexerConfig.INDEX_MERGER_V9 .mergeQueryableIndex(indexes, rollup, aggs, file, config.getIndexSpec(), progressIndicator, null); }
@Override protected void setup(Context context) { config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration()); aggregators = config.getSchema().getDataSchema().getAggregators(); combiningAggs = new AggregatorFactory[aggregators.length]; for (int i = 0; i < aggregators.length; ++i) { metricNames.add(aggregators[i].getName()); combiningAggs[i] = aggregators[i].getCombiningFactory(); } typeHelperMap = InputRowSerde.getTypeHelperMap(config.getSchema() .getDataSchema() .getParser() .getParseSpec() .getDimensionsSpec()); }
config.getSchema().getIOConfig().getSegmentOutputPath(), segmentGranularity.toPath(timeBucket) );
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); aggregators = config.getSchema().getDataSchema().getAggregators(); if (DatasourcePathSpec.checkIfReindexingAndIsUseAggEnabled(config.getSchema().getIOConfig().getPathSpec())) { aggsForSerializingSegmentInputRow = aggregators; } else { // Note: this is required for "delta-ingestion" use case where we are reading rows stored in Druid as well // as late arriving data on HDFS etc. aggsForSerializingSegmentInputRow = new AggregatorFactory[aggregators.length]; for (int i = 0; i < aggregators.length; ++i) { aggsForSerializingSegmentInputRow[i] = aggregators[i].getCombiningFactory(); } } typeHelperMap = InputRowSerde.getTypeHelperMap(config.getSchema() .getDataSchema() .getParser() .getParseSpec() .getDimensionsSpec()); }
@Override protected void setup(Context context) { config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration()); aggregators = config.getSchema().getDataSchema().getAggregators(); combiningAggs = new AggregatorFactory[aggregators.length]; for (int i = 0; i < aggregators.length; ++i) { combiningAggs[i] = aggregators[i].getCombiningFactory(); } typeHelperMap = InputRowSerde.getTypeHelperMap(config.getSchema() .getDataSchema() .getParser() .getParseSpec() .getDimensionsSpec()); }
for (AggregatorFactory agg : config.getSchema().getDataSchema().getAggregators()) { metricsFields.addAll(agg.requiredFields());
for (AggregatorFactory agg : config.getSchema().getDataSchema().getAggregators()) { metricsFields.addAll(agg.requiredFields());
private static IncrementalIndex makeIncrementalIndex( Bucket theBucket, AggregatorFactory[] aggs, HadoopDruidIndexerConfig config, Iterable<String> oldDimOrder, Map<String, ColumnCapabilitiesImpl> oldCapabilities ) { final HadoopTuningConfig tuningConfig = config.getSchema().getTuningConfig(); final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder() .withMinTimestamp(theBucket.time.getMillis()) .withTimestampSpec(config.getSchema().getDataSchema().getParser().getParseSpec().getTimestampSpec()) .withDimensionsSpec(config.getSchema().getDataSchema().getParser()) .withQueryGranularity(config.getSchema().getDataSchema().getGranularitySpec().getQueryGranularity()) .withMetrics(aggs) .withRollup(config.getSchema().getDataSchema().getGranularitySpec().isRollup()) .build(); IncrementalIndex newIndex = new IncrementalIndex.Builder() .setIndexSchema(indexSchema) .setReportParseExceptions(!tuningConfig.isIgnoreInvalidRows()) // only used by OffHeapIncrementalIndex .setMaxRowCount(tuningConfig.getRowFlushBoundary()) .setMaxBytesInMemory(TuningConfigs.getMaxBytesInMemoryOrDefault(tuningConfig.getMaxBytesInMemory())) .buildOnheap(); if (oldDimOrder != null && !indexSchema.getDimensionsSpec().hasCustomDimensions()) { newIndex.loadDimensionIterable(oldDimOrder, oldCapabilities); } return newIndex; }
int numBackgroundPersistThreads = config.getSchema().getTuningConfig().getNumBackgroundPersistThreads(); if (numBackgroundPersistThreads > 0) { final BlockingQueue<Runnable> queue = new SynchronousQueue<>(); final FileSystem outputFS = new Path(config.getSchema().getIOConfig().getSegmentOutputPath()) .getFileSystem(context.getConfiguration()); config.getDataSource(), interval, config.getSchema().getTuningConfig().getVersion(), null, ImmutableList.copyOf(allDimensionNames), mergedBase, JobHelper.makeFileNamePath( new Path(config.getSchema().getIOConfig().getSegmentOutputPath()), outputFS, segmentTemplate, ), JobHelper.makeFileNamePath( new Path(config.getSchema().getIOConfig().getSegmentOutputPath()), outputFS, segmentTemplate, ), JobHelper.makeTmpPath( new Path(config.getSchema().getIOConfig().getSegmentOutputPath()), outputFS,
if (config.getSchema().getTuningConfig().getUseCombiner()) { job.setCombinerClass(IndexGeneratorCombiner.class); job.setCombinerKeyGroupingComparatorClass(BytesWritable.Comparator.class);
public String runTask(String[] args) throws Exception { final String schema = args[0]; final String workingPath = args[1]; final String segmentOutputPath = args[2]; final String hadoopJobIdFile = args[3]; final HadoopIngestionSpec theSchema = HadoopDruidIndexerConfig.JSON_MAPPER .readValue( schema, HadoopIngestionSpec.class ); final HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromSpec( theSchema .withIOConfig(theSchema.getIOConfig().withSegmentOutputPath(segmentOutputPath)) .withTuningConfig(theSchema.getTuningConfig().withWorkingPath(workingPath)) ); job = new HadoopDruidDetermineConfigurationJob(config); job.setHadoopJobIdFile(hadoopJobIdFile); log.info("Starting a hadoop determine configuration job..."); if (job.run()) { return HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString( new HadoopDetermineConfigInnerProcessingStatus(config.getSchema(), job.getStats(), null) ); } else { return HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString( new HadoopDetermineConfigInnerProcessingStatus(null, job.getStats(), job.getErrorMessage()) ); } }
@Override public boolean run() { final List<DataSegment> segments = IndexGeneratorJob.getPublishedSegments(config); final String segmentTable = config.getSchema().getIOConfig().getMetadataUpdateSpec().getSegmentTable(); handler.publishSegments(segmentTable, segments, HadoopDruidIndexerConfig.JSON_MAPPER); return true; } }
protected File mergeQueryableIndex( final List<QueryableIndex> indexes, final AggregatorFactory[] aggs, final File file, ProgressIndicator progressIndicator ) throws IOException { boolean rollup = config.getSchema().getDataSchema().getGranularitySpec().isRollup(); return HadoopDruidIndexerConfig.INDEX_MERGER_V9.mergeQueryableIndex( indexes, rollup, aggs, file, config.getIndexSpec(), progressIndicator, null ); }
@Override protected void setup(Context context) { config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration()); aggregators = config.getSchema().getDataSchema().getAggregators(); combiningAggs = new AggregatorFactory[aggregators.length]; for (int i = 0; i < aggregators.length; ++i) { metricNames.add(aggregators[i].getName()); combiningAggs[i] = aggregators[i].getCombiningFactory(); } typeHelperMap = InputRowSerde.getTypeHelperMap(config.getSchema() .getDataSchema() .getParser() .getParseSpec() .getDimensionsSpec()); }