org.apache.druid.indexer.JobHelper java code examples

@Override
public boolean run()
{
 List<Jobby> jobs = new ArrayList<>();
 JobHelper.ensurePaths(config);
 indexJob = new IndexGeneratorJob(config);
 jobs.add(indexJob);
 if (metadataStorageUpdaterJob != null) {
  jobs.add(metadataStorageUpdaterJob);
 } else {
  log.info(
    "No metadataStorageUpdaterJob set in the config. This is cool if you are running a hadoop index task, otherwise nothing will be uploaded to database."
  );
 }
 jobs.add(
   new Jobby()
   {
    @Override
    public boolean run()
    {
     publishedSegments = IndexGeneratorJob.getPublishedSegments(config);
     return true;
    }
   }
 );
 config.setHadoopJobIdFileName(hadoopJobIdFile);
 return JobHelper.runJobs(jobs, config);
}

RetryUtils.retry(
  () -> {
   if (isSnapshot(jarFile)) {
    addSnapshotJarToClassPath(jarFile, intermediateClassPath, fs, job);
   } else {
    addJarToClassPath(jarFile, distributedClassPath, intermediateClassPath, fs, job);
  shouldRetryPredicate(),
  NUM_RETRIES
);

public static void ensurePaths(HadoopDruidIndexerConfig config)
{
 authenticate(config);
 // config.addInputPaths() can have side-effects ( boo! :( ), so this stuff needs to be done before anything else
 try {
  Job job = Job.getInstance(
    new Configuration(),
    StringUtils.format("%s-determine_partitions-%s", config.getDataSource(), config.getIntervals())
  );
  job.getConfiguration().set("io.sort.record.percent", "0.19");
  injectSystemProperties(job);
  config.addJobProperties(job);
  config.addInputPaths(job);
 }
 catch (IOException e) {
  throw Throwables.propagate(e);
 }
}

JobHelper.injectSystemProperties(job);
config.addJobProperties(job);
JobHelper.injectDruidProperties(job.getConfiguration(), config.getAllowedHadoopPrefix());
JobHelper.setupClasspath(
  JobHelper.distributedClassPath(config.getWorkingPath()),
  JobHelper.distributedClassPath(config.makeIntermediatePath()),
  job
);
 JobHelper.writeJobIdToFile(config.getHadoopJobIdFileName(), job.getJobID().toString());

 );
 JobHelper.injectSystemProperties(groupByJob);
 config.addJobProperties(groupByJob);
 groupByJob.setOutputValueClass(NullWritable.class);
 groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class);
 JobHelper.setupClasspath(
   JobHelper.distributedClassPath(config.getWorkingPath()),
   JobHelper.distributedClassPath(config.makeIntermediatePath()),
   groupByJob
 );
JobHelper.injectSystemProperties(dimSelectionJob);
config.addJobProperties(dimSelectionJob);
dimSelectionJob.setOutputFormatClass(DeterminePartitionsDimSelectionOutputFormat.class);
dimSelectionJob.setNumReduceTasks(config.getGranularitySpec().bucketIntervals().get().size());
JobHelper.setupClasspath(
  JobHelper.distributedClassPath(config.getWorkingPath()),
  JobHelper.distributedClassPath(config.makeIntermediatePath()),
  dimSelectionJob
);

@Override
public void run()
{
 try {
  Injector injector = makeInjector();
  config = getHadoopDruidIndexerConfig();
  MetadataStorageUpdaterJobSpec metadataSpec = config.getSchema().getIOConfig().getMetadataUpdateSpec();
  // override metadata storage type based on HadoopIOConfig
  Preconditions.checkNotNull(metadataSpec.getType(), "type in metadataUpdateSpec must not be null");
  injector.getInstance(Properties.class).setProperty("druid.metadata.storage.type", metadataSpec.getType());
  config = HadoopDruidIndexerConfig.fromSpec(
    HadoopIngestionSpec.updateSegmentListIfDatasourcePathSpecIsUsed(
      config.getSchema(),
      HadoopDruidIndexerConfig.JSON_MAPPER,
      new MetadataStoreBasedUsedSegmentLister(
        injector.getInstance(IndexerMetadataStorageCoordinator.class)
      )
    )
  );
  List<Jobby> jobs = new ArrayList<>();
  jobs.add(new HadoopDruidDetermineConfigurationJob(config));
  jobs.add(new HadoopDruidIndexerJob(config, injector.getInstance(MetadataStorageUpdaterJobHandler.class)));
  JobHelper.runJobs(jobs, config);
 }
 catch (Exception e) {
  throw Throwables.propagate(e);
 }
}

@Override
public boolean run()
 JobHelper.ensurePaths(config);
  return JobHelper.runSingleJob(job, config);
 } else {
  int shardsPerInterval = config.getPartitionsSpec().getNumShards();

job.setOutputFormatClass(ConvertingOutputFormat.class);
JobHelper.setupClasspath(
  JobHelper.distributedClassPath(jobConf.getWorkingDirectory()),
  JobHelper.distributedClassPath(getJobClassPathDir(job.getJobName(), jobConf.getWorkingDirectory())),
  job
);

@Override
public int getPartition(LongWritable interval, BytesWritable text, int numPartitions)
{
 if ("local".equals(JobHelper.getJobTrackerAddress(config)) || determineIntervals) {
  return 0;
 } else {
  return reducerLookup.get(interval);
 }
}

public static Path distributedClassPath(String path)
{
 return distributedClassPath(new Path(path));
}

public static long zipAndCopyDir(
  File baseDir,
  OutputStream baseOutputStream,
  Progressable progressable
) throws IOException
{
 long size = 0L;
 try (ZipOutputStream outputStream = new ZipOutputStream(baseOutputStream)) {
  List<String> filesToCopy = Arrays.asList(baseDir.list());
  for (String fileName : filesToCopy) {
   final File fileToCopy = new File(baseDir, fileName);
   if (java.nio.file.Files.isRegularFile(fileToCopy.toPath())) {
    size += copyFileToZipStream(fileToCopy, outputStream, progressable);
   } else {
    log.warn("File at [%s] is not a regular file! skipping as part of zip", fileToCopy.getPath());
   }
  }
  outputStream.flush();
 }
 return size;
}

public static long copyFileToZipStream(
  File file,
  ZipOutputStream zipOutputStream,
  Progressable progressable
) throws IOException
{
 createNewZipEntry(zipOutputStream, file);
 long numRead = 0;
 try (FileInputStream inputStream = new FileInputStream(file)) {
  byte[] buf = new byte[0x10000];
  for (int bytesRead = inputStream.read(buf); bytesRead >= 0; bytesRead = inputStream.read(buf)) {
   progressable.progress();
   if (bytesRead == 0) {
    continue;
   }
   zipOutputStream.write(buf, 0, bytesRead);
   progressable.progress();
   numRead += bytesRead;
  }
 }
 zipOutputStream.closeEntry();
 progressable.progress();
 return numRead;
}

public static void cleanup(Job job) throws IOException
{
 final Path jobDir = getJobPath(job.getJobID(), job.getWorkingDirectory());
 final FileSystem fs = jobDir.getFileSystem(job.getConfiguration());
 RuntimeException e = null;
 try {
  JobHelper.deleteWithRetry(fs, jobDir, true);
 }
 catch (RuntimeException ex) {
  e = ex;
 }
 try {
  JobHelper.deleteWithRetry(fs, getJobClassPathDir(job.getJobName(), job.getWorkingDirectory()), true);
 }
 catch (RuntimeException ex) {
  if (e == null) {
   e = ex;
  } else {
   e.addSuppressed(ex);
  }
 }
 if (e != null) {
  throw e;
 }
}

 );
 JobHelper.injectSystemProperties(groupByJob);
 config.addJobProperties(groupByJob);
 groupByJob.setOutputValueClass(NullWritable.class);
 groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class);
 JobHelper.setupClasspath(
   JobHelper.distributedClassPath(config.getWorkingPath()),
   JobHelper.distributedClassPath(config.makeIntermediatePath()),
   groupByJob
 );
  JobHelper.writeJobIdToFile(config.getHadoopJobIdFileName(), groupByJob.getJobID().toString());
JobHelper.injectSystemProperties(dimSelectionJob);
config.addJobProperties(dimSelectionJob);
dimSelectionJob.setOutputFormatClass(DeterminePartitionsDimSelectionOutputFormat.class);
dimSelectionJob.setNumReduceTasks(config.getGranularitySpec().bucketIntervals().get().size());
JobHelper.setupClasspath(
  JobHelper.distributedClassPath(config.getWorkingPath()),
  JobHelper.distributedClassPath(config.makeIntermediatePath()),
  dimSelectionJob
);
 JobHelper.writeJobIdToFile(config.getHadoopJobIdFileName(), dimSelectionJob.getJobID().toString());

);
JobHelper.injectSystemProperties(groupByJob);
config.addJobProperties(groupByJob);
groupByJob.setMapperClass(DetermineCardinalityMapper.class);
 groupByJob.setNumReduceTasks(config.getSegmentGranularIntervals().get().size());
JobHelper.setupClasspath(
  JobHelper.distributedClassPath(config.getWorkingPath()),
  JobHelper.distributedClassPath(config.makeIntermediatePath()),
  groupByJob
);

@Override
public void run()
{
 try {
  Injector injector = makeInjector();
  config = getHadoopDruidIndexerConfig();
  MetadataStorageUpdaterJobSpec metadataSpec = config.getSchema().getIOConfig().getMetadataUpdateSpec();
  // override metadata storage type based on HadoopIOConfig
  Preconditions.checkNotNull(metadataSpec.getType(), "type in metadataUpdateSpec must not be null");
  injector.getInstance(Properties.class).setProperty("druid.metadata.storage.type", metadataSpec.getType());
  config = HadoopDruidIndexerConfig.fromSpec(
    HadoopIngestionSpec.updateSegmentListIfDatasourcePathSpecIsUsed(
      config.getSchema(),
      HadoopDruidIndexerConfig.JSON_MAPPER,
      new MetadataStoreBasedUsedSegmentLister(
        injector.getInstance(IndexerMetadataStorageCoordinator.class)
      )
    )
  );
  List<Jobby> jobs = Lists.newArrayList();
  jobs.add(new HadoopDruidDetermineConfigurationJob(config));
  jobs.add(new HadoopDruidIndexerJob(config, injector.getInstance(MetadataStorageUpdaterJobHandler.class)));
  JobHelper.runJobs(jobs, config);
 }
 catch (Exception e) {
  throw Throwables.propagate(e);
 }
}

@Override
public boolean run()
 JobHelper.ensurePaths(config);
  return JobHelper.runSingleJob(job, config);
 } else {
  int shardsPerInterval = config.getPartitionsSpec().getNumShards();

job.setOutputFormatClass(ConvertingOutputFormat.class);
JobHelper.setupClasspath(
  JobHelper.distributedClassPath(jobConf.getWorkingDirectory()),
  JobHelper.distributedClassPath(getJobClassPathDir(job.getJobName(), jobConf.getWorkingDirectory())),
  job
);

@Override
public int getPartition(BytesWritable bytesWritable, Writable value, int numPartitions)
{
 final ByteBuffer bytes = ByteBuffer.wrap(bytesWritable.getBytes());
 bytes.position(4); // Skip length added by SortableBytes
 int shardNum = bytes.getInt();
 if ("local".equals(JobHelper.getJobTrackerAddress(config))) {
  return shardNum % numPartitions;
 } else {
  if (shardNum >= numPartitions) {
   throw new ISE("Not enough partitions, shard[%,d] >= numPartitions[%,d]", shardNum, numPartitions);
  }
  return shardNum;
 }
}

public static Path distributedClassPath(String path)
{
 return distributedClassPath(new Path(path));
}

Most used methods

runJobs
addJarToClassPath
addSnapshotJarToClassPath
authenticate
Dose authenticate against a secured hadoop cluster In case of any bug fix make sure to fix the code
copyFileToZipStream
createNewZipEntry
deleteWithRetry
distributedClassPath
ensurePaths
getJobTrackerAddress
getURIFromSegment
injectDruidProperties

Popular in Java

Finding current android device location
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
getSharedPreferences (Context)
putExtra (Intent)
InputStream (java.io)
A readable source of bytes.Most clients will use input streams that read data from the file system (
OutputStream (java.io)
A writable sink for bytes.Most clients will use output streams that write data to the file system (
LinkedHashMap (java.util)
LinkedHashMap is an implementation of Map that guarantees iteration order. All optional operations a
Stack (java.util)
Stack is a Last-In/First-Out(LIFO) data structure which represents a stack of objects. It enables u
FileUtils (org.apache.commons.io)
General file manipulation utilities. Facilities are provided in the following areas: * writing to a
Annotation (javassist.bytecode.annotation)
The annotation structure.An instance of this class is returned bygetAnnotations() in AnnotationsAttr
Top 12 Jupyter Notebook extensions

How to useJobHelper in org.apache.druid.indexer

Best Java code snippets using org.apache.druid.indexer.JobHelper (Showing top 20 results out of 315)

How to use
JobHelper
in
org.apache.druid.indexer