com.stratio.deep.commons.config.BaseConfig java code examples

/**
 * Gets extractor instance.
 *
 * @param config the config
 * @return the extractor instance
 */
public static <T, S extends BaseConfig> IExtractor<T, S> getExtractorInstance(S config) {
  try {
    Class<T> rdd = (Class<T>) config.getExtractorImplClass();
    if (rdd == null) {
      rdd = (Class<T>) Class.forName(config.getExtractorImplClassName());
    }
    Constructor<T> c;
    if (config.getEntityClass().isAssignableFrom(Cells.class)) {
      c = rdd.getConstructor();
      return (IExtractor<T, S>) c.newInstance();
    } else {
      c = rdd.getConstructor(Class.class);
      return (IExtractor<T, S>) c.newInstance(config.getEntityClass());
    }
  } catch (ClassNotFoundException | InstantiationException | IllegalAccessException | IllegalArgumentException
      | InvocationTargetException | NoSuchMethodException | SecurityException e) {
    String message = "A exception happens and we wrap with DeepExtractorInitializationException" + e.getMessage();
    LOG.error(message);
    throw new DeepExtractorInitializationException(message,e);
  }
}

/**
 *
 * @param rdd
 * @param config
 * @param queryBuilder
 * @param <T>
 * @param <S>
 */
public static <T, S extends BaseConfig> void saveRDD(RDD<T> rdd, S config, UpdateQueryBuilder queryBuilder) {
  config.setRddId(rdd.id());
  config.setPartitionId(0);
  rdd.foreachPartition(new PrepareSaveFunction<>(queryBuilder, config, rdd.first()));
}

public DeepRDD(SparkContext sc, S config) {
  super(sc, scala.collection.Seq$.MODULE$.empty(), ClassTag$.MODULE$.<T>apply(config
      .getEntityClass()));
  config.setRddId(id());
  this.config =
      sc.broadcast(config, ClassTag$.MODULE$
          .<S>apply(config.getClass()));
}

@Override
public BoxedUnit apply(Iterator<T> v1) {
  IExtractor<T, S> extractor;
  try {
    extractor = getExtractorInstance(config);
  } catch (DeepExtractorInitializationException e) {
    extractor = getExtractorClient();
  }
  extractor.initSave(config, first, queryBuilder);
  while (v1.hasNext()) {
    extractor.saveRDD(v1.next());
  }
  config.setPartitionId(config.getPartitionId() + 1);
  extractor.close();
  return null;
}

@Override
public void initSave(S config, T first, UpdateQueryBuilder queryBuilder) {
  int id = config.getRddId();
  int partitionIndex = config.getPartitionId();
  TaskAttemptID attemptId = DeepSparkHadoopMapReduceUtil
      .newTaskAttemptID(jobTrackerId, id, true, partitionIndex, 0);
  Configuration configuration = getHadoopConfig(config);
  hadoopAttemptContext = DeepSparkHadoopMapReduceUtil
      .newTaskAttemptContext(configuration,
          attemptId);
  try {
    writer = outputFormat.getRecordWriter(hadoopAttemptContext);
  } catch (IOException | InterruptedException e) {
    throw new DeepGenericException(e);
  }
}

/**
 * Test get extractor instance.
 *
 * @throws Exception the exception
 */
@Test
public void testGetExtractorInstance() throws Exception {
  BaseConfig<Cells, BaseConfig> baseConfig = new BaseConfig<>();
  baseConfig.setEntityClass(Cells.class);
  baseConfig.setExtractorImplClass(testExtractor.class);
  IExtractor extractorInstance1 = getExtractorInstance(baseConfig);
  assertNotNull(extractorInstance1);
}

public DeepJavaRDD(DeepRDD<T, S> rdd) {
  super(rdd, ClassTag$.MODULE$.<T>apply(rdd.config.value().getEntityClass()));
}

@Override
public Partition[] getPartitions(S config) {
  int id = config.getRddId();
  jobId = new JobID(jobTrackerId, id);
  Configuration conf = getHadoopConfig(config);
  JobContext jobContext = DeepSparkHadoopMapReduceUtil.newJobContext(conf, jobId);
  try {
    List<InputSplit> splits = inputFormat.getSplits(jobContext);
    Partition[] partitions = new Partition[(splits.size())];
    for (int i = 0; i < splits.size(); i++) {
      partitions[i] = new NewHadoopPartition(id, i, splits.get(i));
    }
    return partitions;
  } catch (IOException | InterruptedException | RuntimeException e) {
    LOG.error("Impossible to calculate partitions " + e.getMessage());
    throw new DeepGenericException("Impossible to calculate partitions ", e);
  }
}

public DeepRDD(SparkContext sc, S config) {
  super(sc, scala.collection.Seq$.MODULE$.empty(), ClassTag$.MODULE$.<T>apply(config
      .getEntityClass()));
  config.setRddId(id());
  this.config =
      sc.broadcast(config, ClassTag$.MODULE$
          .<S>apply(config.getClass()));
}

@Override
public BoxedUnit apply(Iterator<T> v1) {
  IExtractor<T, S> extractor;
  try {
    extractor = getExtractorInstance(config);
  } catch (DeepExtractorInitializationException e) {
    extractor = getExtractorClient();
  }
  extractor.initSave(config, first, queryBuilder);
  while (v1.hasNext()) {
    extractor.saveRDD(v1.next());
  }
  config.setPartitionId(config.getPartitionId() + 1);
  extractor.close();
  return null;
}

@Override
public void initSave(S config, T first, UpdateQueryBuilder queryBuilder) {
  int id = config.getRddId();
  int partitionIndex = config.getPartitionId();
  TaskAttemptID attemptId = DeepSparkHadoopMapReduceUtil
      .newTaskAttemptID(jobTrackerId, id, true, partitionIndex, 0);
  Configuration configuration = getHadoopConfig(config);
  hadoopAttemptContext = DeepSparkHadoopMapReduceUtil
      .newTaskAttemptContext(configuration,
          attemptId);
  try {
    writer = outputFormat.getRecordWriter(hadoopAttemptContext);
  } catch (IOException | InterruptedException e) {
    throw new DeepGenericException(e);
  }
}

public DeepJavaRDD(DeepRDD<T, S> rdd) {
  super(rdd, ClassTag$.MODULE$.<T>apply(rdd.config.value().getEntityClass()));
}

@Override
public Partition[] getPartitions(S config) {
  int id = config.getRddId();
  jobId = new JobID(jobTrackerId, id);
  Configuration conf = getHadoopConfig(config);
  JobContext jobContext = DeepSparkHadoopMapReduceUtil.newJobContext(conf, jobId);
  try {
    List<InputSplit> splits = inputFormat.getSplits(jobContext);
    Partition[] partitions = new Partition[(splits.size())];
    for (int i = 0; i < splits.size(); i++) {
      partitions[i] = new NewHadoopPartition(id, i, splits.get(i));
    }
    return partitions;
  } catch (IOException | InterruptedException | RuntimeException e) {
    LOG.error("Impossible to calculate partitions " + e.getMessage());
    throw new DeepGenericException("Impossible to calculate partitions ", e);
  }
}

/**
 * Gets extractor instance.
 *
 * @param config the config
 * @return the extractor instance
 */
public static <T, S extends BaseConfig> IExtractor<T, S> getExtractorInstance(S config) {
  try {
    Class<T> rdd = (Class<T>) config.getExtractorImplClass();
    if (rdd == null) {
      rdd = (Class<T>) Class.forName(config.getExtractorImplClassName());
    }
    Constructor<T> c;
    if (config.getEntityClass().isAssignableFrom(Cells.class)) {
      c = rdd.getConstructor();
      return (IExtractor<T, S>) c.newInstance();
    } else {
      c = rdd.getConstructor(Class.class);
      return (IExtractor<T, S>) c.newInstance(config.getEntityClass());
    }
  } catch (ClassNotFoundException | InstantiationException | IllegalAccessException | IllegalArgumentException
      | InvocationTargetException | NoSuchMethodException | SecurityException e) {
    String message = "A exception happens and we wrap with DeepExtractorInitializationException" + e.getMessage();
    LOG.error(message);
    throw new DeepExtractorInitializationException(message,e);
  }
}

@Override
public ClassTag<T> classTag() {
  return ClassTag$.MODULE$.<T>apply(((BaseConfig<T,BaseConfig>)((DeepRDD) this.rdd()).config.value())
      .getEntityClass());
}

/**
 *
 * @param rdd
 * @param config
 * @param queryBuilder
 * @param <T>
 * @param <S>
 */
public static <T, S extends BaseConfig> void saveRDD(RDD<T> rdd, S config, UpdateQueryBuilder queryBuilder) {
  config.setRddId(rdd.id());
  config.setPartitionId(0);
  rdd.foreachPartition(new PrepareSaveFunction<>(queryBuilder, config, rdd.first()));
}

@Override
public void initIterator(Partition dp, S config) {
  int id = config.getRddId();
  NewHadoopPartition split = (NewHadoopPartition) dp;
  TaskAttemptID attemptId = DeepSparkHadoopMapReduceUtil
      .newTaskAttemptID(jobTrackerId, id, true, split.index(), 0);
  Configuration configuration = getHadoopConfig(config);
  TaskAttemptContext hadoopAttemptContext = DeepSparkHadoopMapReduceUtil
      .newTaskAttemptContext(configuration, attemptId);
  try {
    reader = inputFormat.createRecordReader(split.serializableHadoopSplit().value(), hadoopAttemptContext);
    reader.initialize(split.serializableHadoopSplit().value(), hadoopAttemptContext);
  } catch (IOException | InterruptedException e) {
    throw new DeepGenericException(e);
  }
}

@Override
public ClassTag<T> classTag() {
  return ClassTag$.MODULE$.<T>apply(((BaseConfig<T,BaseConfig>)((DeepRDD) this.rdd()).config.value())
      .getEntityClass());
}

@Override
public void initIterator(Partition dp, S config) {
  int id = config.getRddId();
  NewHadoopPartition split = (NewHadoopPartition) dp;
  TaskAttemptID attemptId = DeepSparkHadoopMapReduceUtil
      .newTaskAttemptID(jobTrackerId, id, true, split.index(), 0);
  Configuration configuration = getHadoopConfig(config);
  TaskAttemptContext hadoopAttemptContext = DeepSparkHadoopMapReduceUtil
      .newTaskAttemptContext(configuration, attemptId);
  try {
    reader = inputFormat.createRecordReader(split.serializableHadoopSplit().value(), hadoopAttemptContext);
    reader.initialize(split.serializableHadoopSplit().value(), hadoopAttemptContext);
  } catch (IOException | InterruptedException e) {
    throw new DeepGenericException(e);
  }
}

Javadoc

Created by rcrespo on 13/10/14.

Most used methods

toString

Popular in Java

Creating JSON documents from java classes using gson
getSystemService (Context)
startActivity (Activity)
putExtra (Intent)
EOFException (java.io)
Thrown when a program encounters the end of a file or stream during an input operation.
Charset (java.nio.charset)
A charset is a named mapping between Unicode characters and byte sequences. Every Charset can decode
Path (java.nio.file)
TreeMap (java.util)
Walk the nodes of the tree left-to-right or right-to-left. Note that in descending iterations, next
TreeSet (java.util)
TreeSet is an implementation of SortedSet. All optional operations (adding and removing) are support
XPath (javax.xml.xpath)
XPath provides access to the XPath evaluation environment and expressions. Evaluation of XPath Expr
Top 12 Jupyter Notebook extensions

How to useBaseConfig in com.stratio.deep.commons.config

Best Java code snippets using com.stratio.deep.commons.config.BaseConfig (Showing top 19 results out of 315)

How to use
BaseConfig
in
com.stratio.deep.commons.config