org.apache.hadoop.hive.ql.exec.spark java code examples

protected void initJobConf() {
 if (jobConf == null) {
  jobConf = KryoSerializer.deserializeJobConf(this.buffer);
  SmallTableCache.initialize(jobConf);
  setupMRLegacyConfigs();
 }
}

@SuppressWarnings("unchecked")
@Override
public Iterator<Tuple2<HiveKey, BytesWritable>>
call(Iterator<Tuple2<HiveKey, V>> it) throws Exception {
 initJobConf();
 SparkReduceRecordHandler reducerRecordhandler = new SparkReduceRecordHandler();
 HiveReduceFunctionResultList<V> result =
   new HiveReduceFunctionResultList<V>(it, reducerRecordhandler);
 reducerRecordhandler.init(jobConf, result, sparkReporter);
 return result;
}

@Override
public void collect(HiveKey key, BytesWritable value) throws IOException {
 lastRecordOutput.add(SparkUtilities.copyHiveKey(key),
   SparkUtilities.copyBytesWritable(value));
}

public static HiveSparkClient createHiveSparkClient(HiveConf hiveconf, String sparkSessionId,
                          String hiveSessionId) throws Exception {
 Map<String, String> sparkConf = initiateSparkConf(hiveconf, hiveSessionId);
 // Submit spark job through local spark context while spark master is local mode, otherwise submit
 // spark job through remote spark context.
 String master = sparkConf.get("spark.master");
 if (master.equals("local") || master.startsWith("local[")) {
  // With local spark context, all user sessions share the same spark context.
  return LocalHiveSparkClient.getInstance(generateSparkConf(sparkConf), hiveconf);
 } else {
  return new RemoteHiveSparkClient(hiveconf, sparkConf, sparkSessionId);
 }
}

/**
 * Process one row using a dummy iterator.  Or, add row to vector batch.
 */
@Override
public void processRow(Object key, final Object value) throws IOException {
 if (!anyRow) {
  anyRow = true;
 }
 if (vectorized) {
  processVectorRow(key, value);
 } else {
  dummyIterator.setValue(value);
  processRow(key, dummyIterator);
 }
}

/**
 * Process one row using a dummy iterator.
 */
@Override
public void processRow(Object key, final Object value) throws IOException {
 dummyIterator.setValue(value);
 processRow(key, dummyIterator);
}

@Override
public Tuple2<HiveKey, BytesWritable> next() {
 if (hasNext()) {
  return lastRecordOutput.next();
 }
 throw new NoSuchElementException("There are no more elements");
}

RemoteHiveSparkClient(HiveConf hiveConf, Map<String, String> conf, String sessionId) throws Exception {
 this.hiveConf = hiveConf;
 sparkClientTimtout = hiveConf.getTimeVar(HiveConf.ConfVars.SPARK_CLIENT_FUTURE_TIMEOUT,
   TimeUnit.SECONDS);
 sparkConf = HiveSparkClientFactory.generateSparkConf(conf);
 this.conf = conf;
 this.sessionId = sessionId;
 createRemoteClient();
}

@Test
public void testSpilling() throws Exception {
 HiveKVResultCache cache = new HiveKVResultCache();
 final int recordCount = HiveKVResultCache.IN_MEMORY_NUM_ROWS * 3;
 // Test using the same cache where first n rows are inserted then cache is cleared.
 // Next reuse the same cache and insert another m rows and verify the cache stores correctly.
 // This simulates reusing the same cache over and over again.
 testSpillingHelper(cache, recordCount);
 testSpillingHelper(cache, 1);
 testSpillingHelper(cache, recordCount);
}

public HiveBaseFunctionResultList(Iterator<T> inputIterator) {
 this.inputIterator = inputIterator;
 this.lastRecordOutput = new HiveKVResultCache();
}

 @Override
 protected void closeRecordProcessor() {
  reduceRecordHandler.close();
 }
}

public static synchronized LocalHiveSparkClient getInstance(SparkConf sparkConf, HiveConf hiveConf)
  throws FileNotFoundException, MalformedURLException {
 if (client == null) {
  client = new LocalHiveSparkClient(sparkConf, hiveConf);
 }
 ++client.activeSessions;
 return client;
}

 @Override
 protected void closeRecordProcessor() {
  recordHandler.close();
 }
}

@Override
protected boolean processingDone() {
 return recordHandler.getDone();
}

@Override
public int getExecutorCount() throws Exception {
 return getExecutorCount(sparkClientTimtout, TimeUnit.SECONDS);
}

public static MapJoinTableContainer get(String key, Callable<MapJoinTableContainer> valueLoader)
    throws ExecutionException {
 return TABLE_CONTAINER_CACHE.get(key, valueLoader);
}

private static long[] scanResultList(long rows, int threshold, int extra) {
 // 1. Simulate emitting all records in closeRecordProcessor().
 long t1 = scanResultList(rows, 0, 0, null, "a", "b");
 // 2. Simulate emitting records in processNextRecord() with small memory usage limit.
 long t2 = scanResultList(rows, threshold, 0, null, "c", "d");
 // 3. Simulate emitting records in processNextRecord() with large memory usage limit.
 long t3 = scanResultList(rows, threshold * 10, 0, null, "e", "f");
 // 4. Same as 2. Also emit extra records from a separate thread.
 long t4 = scanResultList(rows, threshold, extra, null, "g", "h");
 // 5. Same as 3. Also emit extra records from a separate thread.
 long t5 = scanResultList(rows, threshold * 10, extra, null, "i", "j");
 return new long[] {t1, t2, t3, t4, t5};
}

@SuppressWarnings("unchecked")
@Override
public Iterator<Tuple2<HiveKey, BytesWritable>>
call(Iterator<Tuple2<HiveKey, V>> it) throws Exception {
 initJobConf();
 SparkReduceRecordHandler reducerRecordhandler = new SparkReduceRecordHandler();
 HiveReduceFunctionResultList<V> result =
   new HiveReduceFunctionResultList<V>(it, reducerRecordhandler);
 reducerRecordhandler.init(jobConf, result, sparkReporter);
 return result;
}

@Override
public void collect(HiveKey key, BytesWritable value) throws IOException {
 lastRecordOutput.add(SparkUtilities.copyHiveKey(key),
   SparkUtilities.copyBytesWritable(value));
}

protected void initJobConf() {
 if (jobConf == null) {
  jobConf = KryoSerializer.deserializeJobConf(this.buffer);
  SmallTableCache.initialize(jobConf);
  setupMRLegacyConfigs();
 }
}

How to use org.apache.hadoop.hive.ql.exec.spark

Best Java code snippets using org.apache.hadoop.hive.ql.exec.spark (Showing top 20 results out of 315)