protected void initJobConf() { if (jobConf == null) { jobConf = KryoSerializer.deserializeJobConf(this.buffer); SmallTableCache.initialize(jobConf); setupMRLegacyConfigs(); } }
@SuppressWarnings("unchecked") @Override public Iterator<Tuple2<HiveKey, BytesWritable>> call(Iterator<Tuple2<HiveKey, V>> it) throws Exception { initJobConf(); SparkReduceRecordHandler reducerRecordhandler = new SparkReduceRecordHandler(); HiveReduceFunctionResultList<V> result = new HiveReduceFunctionResultList<V>(it, reducerRecordhandler); reducerRecordhandler.init(jobConf, result, sparkReporter); return result; }
@Override public void collect(HiveKey key, BytesWritable value) throws IOException { lastRecordOutput.add(SparkUtilities.copyHiveKey(key), SparkUtilities.copyBytesWritable(value)); }
public static HiveSparkClient createHiveSparkClient(HiveConf hiveconf, String sparkSessionId, String hiveSessionId) throws Exception { Map<String, String> sparkConf = initiateSparkConf(hiveconf, hiveSessionId); // Submit spark job through local spark context while spark master is local mode, otherwise submit // spark job through remote spark context. String master = sparkConf.get("spark.master"); if (master.equals("local") || master.startsWith("local[")) { // With local spark context, all user sessions share the same spark context. return LocalHiveSparkClient.getInstance(generateSparkConf(sparkConf), hiveconf); } else { return new RemoteHiveSparkClient(hiveconf, sparkConf, sparkSessionId); } }
/** * Process one row using a dummy iterator. Or, add row to vector batch. */ @Override public void processRow(Object key, final Object value) throws IOException { if (!anyRow) { anyRow = true; } if (vectorized) { processVectorRow(key, value); } else { dummyIterator.setValue(value); processRow(key, dummyIterator); } }
/** * Process one row using a dummy iterator. */ @Override public void processRow(Object key, final Object value) throws IOException { dummyIterator.setValue(value); processRow(key, dummyIterator); }
RemoteHiveSparkClient(HiveConf hiveConf, Map<String, String> conf, String sessionId) throws Exception { this.hiveConf = hiveConf; sparkClientTimtout = hiveConf.getTimeVar(HiveConf.ConfVars.SPARK_CLIENT_FUTURE_TIMEOUT, TimeUnit.SECONDS); sparkConf = HiveSparkClientFactory.generateSparkConf(conf); this.conf = conf; this.sessionId = sessionId; createRemoteClient(); }
@Test public void testSpilling() throws Exception { HiveKVResultCache cache = new HiveKVResultCache(); final int recordCount = HiveKVResultCache.IN_MEMORY_NUM_ROWS * 3; // Test using the same cache where first n rows are inserted then cache is cleared. // Next reuse the same cache and insert another m rows and verify the cache stores correctly. // This simulates reusing the same cache over and over again. testSpillingHelper(cache, recordCount); testSpillingHelper(cache, 1); testSpillingHelper(cache, recordCount); }
public HiveBaseFunctionResultList(Iterator<T> inputIterator) { this.inputIterator = inputIterator; this.lastRecordOutput = new HiveKVResultCache(); }
@Override protected void closeRecordProcessor() { reduceRecordHandler.close(); } }
public static synchronized LocalHiveSparkClient getInstance(SparkConf sparkConf, HiveConf hiveConf) throws FileNotFoundException, MalformedURLException { if (client == null) { client = new LocalHiveSparkClient(sparkConf, hiveConf); } ++client.activeSessions; return client; }
@Override protected void closeRecordProcessor() { recordHandler.close(); } }
@Override protected boolean processingDone() { return recordHandler.getDone(); }
@Override public int getExecutorCount() throws Exception { return getExecutorCount(sparkClientTimtout, TimeUnit.SECONDS); }
public static MapJoinTableContainer get(String key, Callable<MapJoinTableContainer> valueLoader) throws ExecutionException { return TABLE_CONTAINER_CACHE.get(key, valueLoader); }
private static long[] scanResultList(long rows, int threshold, int extra) { // 1. Simulate emitting all records in closeRecordProcessor(). long t1 = scanResultList(rows, 0, 0, null, "a", "b"); // 2. Simulate emitting records in processNextRecord() with small memory usage limit. long t2 = scanResultList(rows, threshold, 0, null, "c", "d"); // 3. Simulate emitting records in processNextRecord() with large memory usage limit. long t3 = scanResultList(rows, threshold * 10, 0, null, "e", "f"); // 4. Same as 2. Also emit extra records from a separate thread. long t4 = scanResultList(rows, threshold, extra, null, "g", "h"); // 5. Same as 3. Also emit extra records from a separate thread. long t5 = scanResultList(rows, threshold * 10, extra, null, "i", "j"); return new long[] {t1, t2, t3, t4, t5}; }
@SuppressWarnings("unchecked") @Override public Iterator<Tuple2<HiveKey, BytesWritable>> call(Iterator<Tuple2<HiveKey, V>> it) throws Exception { initJobConf(); SparkReduceRecordHandler reducerRecordhandler = new SparkReduceRecordHandler(); HiveReduceFunctionResultList<V> result = new HiveReduceFunctionResultList<V>(it, reducerRecordhandler); reducerRecordhandler.init(jobConf, result, sparkReporter); return result; }
@Override public void collect(HiveKey key, BytesWritable value) throws IOException { lastRecordOutput.add(SparkUtilities.copyHiveKey(key), SparkUtilities.copyBytesWritable(value)); }
protected void initJobConf() { if (jobConf == null) { jobConf = KryoSerializer.deserializeJobConf(this.buffer); SmallTableCache.initialize(jobConf); setupMRLegacyConfigs(); } }