@Override public boolean run() { List<Jobby> jobs = new ArrayList<>(); JobHelper.ensurePaths(config); indexJob = new IndexGeneratorJob(config); jobs.add(indexJob); if (metadataStorageUpdaterJob != null) { jobs.add(metadataStorageUpdaterJob); } else { log.info( "No metadataStorageUpdaterJob set in the config. This is cool if you are running a hadoop index task, otherwise nothing will be uploaded to database." ); } jobs.add( new Jobby() { @Override public boolean run() { publishedSegments = IndexGeneratorJob.getPublishedSegments(config); return true; } } ); config.setHadoopJobIdFileName(hadoopJobIdFile); return JobHelper.runJobs(jobs, config); }
RetryUtils.retry( () -> { if (isSnapshot(jarFile)) { addSnapshotJarToClassPath(jarFile, intermediateClassPath, fs, job); } else { addJarToClassPath(jarFile, distributedClassPath, intermediateClassPath, fs, job); shouldRetryPredicate(), NUM_RETRIES );
public static void ensurePaths(HadoopDruidIndexerConfig config) { authenticate(config); // config.addInputPaths() can have side-effects ( boo! :( ), so this stuff needs to be done before anything else try { Job job = Job.getInstance( new Configuration(), StringUtils.format("%s-determine_partitions-%s", config.getDataSource(), config.getIntervals()) ); job.getConfiguration().set("io.sort.record.percent", "0.19"); injectSystemProperties(job); config.addJobProperties(job); config.addInputPaths(job); } catch (IOException e) { throw Throwables.propagate(e); } }
JobHelper.injectSystemProperties(job); config.addJobProperties(job); JobHelper.injectDruidProperties(job.getConfiguration(), config.getAllowedHadoopPrefix()); JobHelper.setupClasspath( JobHelper.distributedClassPath(config.getWorkingPath()), JobHelper.distributedClassPath(config.makeIntermediatePath()), job ); JobHelper.writeJobIdToFile(config.getHadoopJobIdFileName(), job.getJobID().toString());
); JobHelper.injectSystemProperties(groupByJob); config.addJobProperties(groupByJob); groupByJob.setOutputValueClass(NullWritable.class); groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class); JobHelper.setupClasspath( JobHelper.distributedClassPath(config.getWorkingPath()), JobHelper.distributedClassPath(config.makeIntermediatePath()), groupByJob ); JobHelper.injectSystemProperties(dimSelectionJob); config.addJobProperties(dimSelectionJob); dimSelectionJob.setOutputFormatClass(DeterminePartitionsDimSelectionOutputFormat.class); dimSelectionJob.setNumReduceTasks(config.getGranularitySpec().bucketIntervals().get().size()); JobHelper.setupClasspath( JobHelper.distributedClassPath(config.getWorkingPath()), JobHelper.distributedClassPath(config.makeIntermediatePath()), dimSelectionJob );
@Override public void run() { try { Injector injector = makeInjector(); config = getHadoopDruidIndexerConfig(); MetadataStorageUpdaterJobSpec metadataSpec = config.getSchema().getIOConfig().getMetadataUpdateSpec(); // override metadata storage type based on HadoopIOConfig Preconditions.checkNotNull(metadataSpec.getType(), "type in metadataUpdateSpec must not be null"); injector.getInstance(Properties.class).setProperty("druid.metadata.storage.type", metadataSpec.getType()); config = HadoopDruidIndexerConfig.fromSpec( HadoopIngestionSpec.updateSegmentListIfDatasourcePathSpecIsUsed( config.getSchema(), HadoopDruidIndexerConfig.JSON_MAPPER, new MetadataStoreBasedUsedSegmentLister( injector.getInstance(IndexerMetadataStorageCoordinator.class) ) ) ); List<Jobby> jobs = new ArrayList<>(); jobs.add(new HadoopDruidDetermineConfigurationJob(config)); jobs.add(new HadoopDruidIndexerJob(config, injector.getInstance(MetadataStorageUpdaterJobHandler.class))); JobHelper.runJobs(jobs, config); } catch (Exception e) { throw Throwables.propagate(e); } }
@Override public boolean run() JobHelper.ensurePaths(config); return JobHelper.runSingleJob(job, config); } else { int shardsPerInterval = config.getPartitionsSpec().getNumShards();
job.setOutputFormatClass(ConvertingOutputFormat.class); JobHelper.setupClasspath( JobHelper.distributedClassPath(jobConf.getWorkingDirectory()), JobHelper.distributedClassPath(getJobClassPathDir(job.getJobName(), jobConf.getWorkingDirectory())), job );
@Override public int getPartition(LongWritable interval, BytesWritable text, int numPartitions) { if ("local".equals(JobHelper.getJobTrackerAddress(config)) || determineIntervals) { return 0; } else { return reducerLookup.get(interval); } }
public static Path distributedClassPath(String path) { return distributedClassPath(new Path(path)); }
public static long zipAndCopyDir( File baseDir, OutputStream baseOutputStream, Progressable progressable ) throws IOException { long size = 0L; try (ZipOutputStream outputStream = new ZipOutputStream(baseOutputStream)) { List<String> filesToCopy = Arrays.asList(baseDir.list()); for (String fileName : filesToCopy) { final File fileToCopy = new File(baseDir, fileName); if (java.nio.file.Files.isRegularFile(fileToCopy.toPath())) { size += copyFileToZipStream(fileToCopy, outputStream, progressable); } else { log.warn("File at [%s] is not a regular file! skipping as part of zip", fileToCopy.getPath()); } } outputStream.flush(); } return size; }
public static long copyFileToZipStream( File file, ZipOutputStream zipOutputStream, Progressable progressable ) throws IOException { createNewZipEntry(zipOutputStream, file); long numRead = 0; try (FileInputStream inputStream = new FileInputStream(file)) { byte[] buf = new byte[0x10000]; for (int bytesRead = inputStream.read(buf); bytesRead >= 0; bytesRead = inputStream.read(buf)) { progressable.progress(); if (bytesRead == 0) { continue; } zipOutputStream.write(buf, 0, bytesRead); progressable.progress(); numRead += bytesRead; } } zipOutputStream.closeEntry(); progressable.progress(); return numRead; }
public static void cleanup(Job job) throws IOException { final Path jobDir = getJobPath(job.getJobID(), job.getWorkingDirectory()); final FileSystem fs = jobDir.getFileSystem(job.getConfiguration()); RuntimeException e = null; try { JobHelper.deleteWithRetry(fs, jobDir, true); } catch (RuntimeException ex) { e = ex; } try { JobHelper.deleteWithRetry(fs, getJobClassPathDir(job.getJobName(), job.getWorkingDirectory()), true); } catch (RuntimeException ex) { if (e == null) { e = ex; } else { e.addSuppressed(ex); } } if (e != null) { throw e; } }
); JobHelper.injectSystemProperties(groupByJob); config.addJobProperties(groupByJob); groupByJob.setOutputValueClass(NullWritable.class); groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class); JobHelper.setupClasspath( JobHelper.distributedClassPath(config.getWorkingPath()), JobHelper.distributedClassPath(config.makeIntermediatePath()), groupByJob ); JobHelper.writeJobIdToFile(config.getHadoopJobIdFileName(), groupByJob.getJobID().toString()); JobHelper.injectSystemProperties(dimSelectionJob); config.addJobProperties(dimSelectionJob); dimSelectionJob.setOutputFormatClass(DeterminePartitionsDimSelectionOutputFormat.class); dimSelectionJob.setNumReduceTasks(config.getGranularitySpec().bucketIntervals().get().size()); JobHelper.setupClasspath( JobHelper.distributedClassPath(config.getWorkingPath()), JobHelper.distributedClassPath(config.makeIntermediatePath()), dimSelectionJob ); JobHelper.writeJobIdToFile(config.getHadoopJobIdFileName(), dimSelectionJob.getJobID().toString());
); JobHelper.injectSystemProperties(groupByJob); config.addJobProperties(groupByJob); groupByJob.setMapperClass(DetermineCardinalityMapper.class); groupByJob.setNumReduceTasks(config.getSegmentGranularIntervals().get().size()); JobHelper.setupClasspath( JobHelper.distributedClassPath(config.getWorkingPath()), JobHelper.distributedClassPath(config.makeIntermediatePath()), groupByJob );
@Override public void run() { try { Injector injector = makeInjector(); config = getHadoopDruidIndexerConfig(); MetadataStorageUpdaterJobSpec metadataSpec = config.getSchema().getIOConfig().getMetadataUpdateSpec(); // override metadata storage type based on HadoopIOConfig Preconditions.checkNotNull(metadataSpec.getType(), "type in metadataUpdateSpec must not be null"); injector.getInstance(Properties.class).setProperty("druid.metadata.storage.type", metadataSpec.getType()); config = HadoopDruidIndexerConfig.fromSpec( HadoopIngestionSpec.updateSegmentListIfDatasourcePathSpecIsUsed( config.getSchema(), HadoopDruidIndexerConfig.JSON_MAPPER, new MetadataStoreBasedUsedSegmentLister( injector.getInstance(IndexerMetadataStorageCoordinator.class) ) ) ); List<Jobby> jobs = Lists.newArrayList(); jobs.add(new HadoopDruidDetermineConfigurationJob(config)); jobs.add(new HadoopDruidIndexerJob(config, injector.getInstance(MetadataStorageUpdaterJobHandler.class))); JobHelper.runJobs(jobs, config); } catch (Exception e) { throw Throwables.propagate(e); } }
@Override public boolean run() JobHelper.ensurePaths(config); return JobHelper.runSingleJob(job, config); } else { int shardsPerInterval = config.getPartitionsSpec().getNumShards();
job.setOutputFormatClass(ConvertingOutputFormat.class); JobHelper.setupClasspath( JobHelper.distributedClassPath(jobConf.getWorkingDirectory()), JobHelper.distributedClassPath(getJobClassPathDir(job.getJobName(), jobConf.getWorkingDirectory())), job );
@Override public int getPartition(BytesWritable bytesWritable, Writable value, int numPartitions) { final ByteBuffer bytes = ByteBuffer.wrap(bytesWritable.getBytes()); bytes.position(4); // Skip length added by SortableBytes int shardNum = bytes.getInt(); if ("local".equals(JobHelper.getJobTrackerAddress(config))) { return shardNum % numPartitions; } else { if (shardNum >= numPartitions) { throw new ISE("Not enough partitions, shard[%,d] >= numPartitions[%,d]", shardNum, numPartitions); } return shardNum; } }
public static Path distributedClassPath(String path) { return distributedClassPath(new Path(path)); }