org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer.getHashPartitions java code examples

/**
 * Postpone processing the big table row temporarily by spilling it to a row container
 * @param hybridHtContainer Hybrid hashtable container
 * @param row big table row
 */
protected void spillBigTableRow(MapJoinTableContainer hybridHtContainer, Object row) throws HiveException {
 HybridHashTableContainer ht = (HybridHashTableContainer) hybridHtContainer;
 int partitionId = ht.getToSpillPartitionId();
 HashPartition hp = ht.getHashPartitions()[partitionId];
 ObjectContainer bigTable = hp.getMatchfileObjContainer();
 bigTable.add(row);
}

/**
 * Postpone processing the big table row temporarily by spilling it to a row container
 * @param hybridHtContainer Hybrid hashtable container
 * @param row big table row
 */
protected void spillBigTableRow(MapJoinTableContainer hybridHtContainer, Object row) throws HiveException {
 HybridHashTableContainer ht = (HybridHashTableContainer) hybridHtContainer;
 int partitionId = ht.getToSpillPartitionId();
 HashPartition hp = ht.getHashPartitions()[partitionId];
 ObjectContainer bigTable = hp.getMatchfileObjContainer();
 bigTable.add(row);
}

private void spillSerializeRow(VectorizedRowBatch batch, int batchIndex,
  int partitionId) throws IOException {
 HybridHashTableContainer ht = (HybridHashTableContainer) mapJoinTables[posSingleVectorMapJoinSmallTable];
 HashPartition hp = ht.getHashPartitions()[partitionId];
 VectorRowBytesContainer rowBytesContainer = hp.getMatchfileRowBytesContainer();
 Output output = rowBytesContainer.getOuputForRowBytes();
 bigTableVectorSerializeRow.setOutputAppend(output);
 bigTableVectorSerializeRow.serializeWrite(batch, batchIndex);
 rowBytesContainer.finishRow();
}

/**
 * Iterate over the big table row container and feed process() with leftover rows
 * @param partitionId the partition from which to take out spilled big table rows
 * @throws HiveException
 */
protected void reProcessBigTable(int partitionId) throws HiveException {
 // For binary join, firstSmallTable is the only small table; it has reference to spilled big
 // table rows;
 // For n-way join, since we only spill once, when processing the first small table, so only the
 // firstSmallTable has reference to the spilled big table rows.
 HashPartition partition = firstSmallTable.getHashPartitions()[partitionId];
 ObjectContainer bigTable = partition.getMatchfileObjContainer();
 LOG.info("Hybrid Grace Hash Join: Going to process spilled big table rows in partition " +
   partitionId + ". Number of rows: " + bigTable.size());
 while (bigTable.hasNext()) {
  Object row = bigTable.next();
  process(row, conf.getPosBigTable());
 }
 bigTable.clear();
}

/**
 * Iterate over the big table row container and feed process() with leftover rows
 * @param partitionId the partition from which to take out spilled big table rows
 * @throws HiveException
 */
protected void reProcessBigTable(int partitionId) throws HiveException {
 // For binary join, firstSmallTable is the only small table; it has reference to spilled big
 // table rows;
 // For n-way join, since we only spill once, when processing the first small table, so only the
 // firstSmallTable has reference to the spilled big table rows.
 HashPartition partition = firstSmallTable.getHashPartitions()[partitionId];
 ObjectContainer bigTable = partition.getMatchfileObjContainer();
 LOG.info("Hybrid Grace Hash Join: Going to process spilled big table rows in partition " +
   partitionId + ". Number of rows: " + bigTable.size());
 while (bigTable.hasNext()) {
  Object row = bigTable.next();
  process(row, conf.getPosBigTable());
 }
 bigTable.clear();
}

 private void spillSerializeRow(VectorizedRowBatch batch, int batchIndex,
   VectorMapJoinHashTableResult hashTableResult) throws IOException {

  int partitionId = hashTableResult.spillPartitionId();

  HybridHashTableContainer ht = (HybridHashTableContainer) mapJoinTables[posSingleVectorMapJoinSmallTable];
  HashPartition hp = ht.getHashPartitions()[partitionId];

  VectorMapJoinRowBytesContainer rowBytesContainer = hp.getMatchfileRowBytesContainer();
  Output output = rowBytesContainer.getOuputForRowBytes();
//  int offset = output.getLength();
  bigTableVectorSerializeRow.setOutputAppend(output);
  bigTableVectorSerializeRow.serializeWrite(batch, batchIndex);
//  int length = output.getLength() - offset;
  rowBytesContainer.finishRow();

//  LOG.debug("spillSerializeRow spilled batchIndex " + batchIndex + ", length " + length);
 }

HashPartition partition = firstSmallTable.getHashPartitions()[partitionId];

 throws IOException, HiveException, SerDeException, ClassNotFoundException {
HybridHashTableContainer container = (HybridHashTableContainer)mapJoinTables[pos];
HashPartition partition = container.getHashPartitions()[partitionId];

if (pos != conf.getPosBigTable()) {
 firstSmallTable = (HybridHashTableContainer) mapJoinTables[pos];
 numPartitions = firstSmallTable.getHashPartitions().length;
 break;
  hybridHtContainer.dumpStats();
  HashPartition[] hashPartitions = hybridHtContainer.getHashPartitions();
 HashPartition[] hashPartitions = firstSmallTable.getHashPartitions();
 if (hashPartitions[i].isHashMapOnDisk()) {
  try {

if (pos != conf.getPosBigTable()) {
 firstSmallTable = (HybridHashTableContainer) mapJoinTables[pos];
 numPartitions = firstSmallTable.getHashPartitions().length;
 break;
  hybridHtContainer.dumpStats();
  HashPartition[] hashPartitions = hybridHtContainer.getHashPartitions();
 HashPartition[] hashPartitions = firstSmallTable.getHashPartitions();
 if (hashPartitions[i].isHashMapOnDisk()) {
  try {

HashPartition partition = firstSmallTable.getHashPartitions()[partitionId];

 throws IOException, HiveException, SerDeException, ClassNotFoundException {
HybridHashTableContainer container = (HybridHashTableContainer)mapJoinTables[pos];
HashPartition partition = container.getHashPartitions()[partitionId];

HybridHashTableContainer.HashPartition partition = firstSmallTable.getHashPartitions()[partitionId];
ObjectContainer bigTable = partition.getMatchfileObjContainer();

HybridHashTableContainer.HashPartition partition = firstSmallTable.getHashPartitions()[partitionId];
ObjectContainer bigTable = partition.getMatchfileObjContainer();

/**
 * Postpone processing the big table row temporarily by spilling it to a row container
 * @param hybridHtContainer Hybrid hashtable container
 * @param row big table row
 */
protected void spillBigTableRow(MapJoinTableContainer hybridHtContainer, Object row) throws HiveException {
 HybridHashTableContainer ht = (HybridHashTableContainer) hybridHtContainer;
 int partitionId = ht.getToSpillPartitionId();
 HashPartition hp = ht.getHashPartitions()[partitionId];
 ObjectContainer bigTable = hp.getMatchfileObjContainer();
 bigTable.add(row);
}

 private void spillSerializeRow(VectorizedRowBatch batch, int batchIndex,
   VectorMapJoinHashTableResult hashTableResult) throws IOException {

  int partitionId = hashTableResult.spillPartitionId();

  HybridHashTableContainer ht = (HybridHashTableContainer) mapJoinTables[posSingleVectorMapJoinSmallTable];
  HashPartition hp = ht.getHashPartitions()[partitionId];

  VectorMapJoinRowBytesContainer rowBytesContainer = hp.getMatchfileRowBytesContainer();
  Output output = rowBytesContainer.getOuputForRowBytes();
//  int offset = output.getLength();
  bigTableVectorSerializeRow.setOutputAppend(output);
  bigTableVectorSerializeRow.serializeWrite(batch, batchIndex);
//  int length = output.getLength() - offset;
  rowBytesContainer.finishRow();

//  LOG.debug("spillSerializeRow spilled batchIndex " + batchIndex + ", length " + length);
 }

/**
 * Iterate over the big table row container and feed process() with leftover rows
 * @param partitionId the partition from which to take out spilled big table rows
 * @throws HiveException
 */
protected void reProcessBigTable(int partitionId) throws HiveException {
 // For binary join, firstSmallTable is the only small table; it has reference to spilled big
 // table rows;
 // For n-way join, since we only spill once, when processing the first small table, so only the
 // firstSmallTable has reference to the spilled big table rows.
 HashPartition partition = firstSmallTable.getHashPartitions()[partitionId];
 ObjectContainer bigTable = partition.getMatchfileObjContainer();
 while (bigTable.hasNext()) {
  Object row = bigTable.next();
  process(row, conf.getPosBigTable());
 }
 bigTable.clear();
}

HybridHashTableContainer.HashPartition partition = firstSmallTable.getHashPartitions()[partitionId];
ObjectContainer bigTable = partition.getMatchfileObjContainer();

HashPartition partition = firstSmallTable.getHashPartitions()[partitionId];

 throws IOException, HiveException, SerDeException, ClassNotFoundException {
HybridHashTableContainer container = (HybridHashTableContainer)mapJoinTables[pos];
HashPartition partition = container.getHashPartitions()[partitionId];

Popular methods of HybridHashTableContainer

<init>
biggestPartition
Find the partition with biggest hashtable in memory at this moment
calcNumPartitions
Calculate how many partitions are needed. For n-way join, we only do this calculation once in the Ha
clear
dumpStats
getInternalValueOi
getMemoryThreshold
getSortableSortOrders
getTableRowSize
getToSpillPartitionId
Gets the partition Id into which to spill the big table row
getTotalInMemRowCount
getWriteHelper

Popular in Java

Reactive rest calls using spring rest template
scheduleAtFixedRate (ScheduledExecutorService)
putExtra (Intent)
setRequestProperty (URLConnection)
BufferedInputStream (java.io)
A BufferedInputStream adds functionality to another input stream-namely, the ability to buffer the i
InputStream (java.io)
A readable source of bytes.Most clients will use input streams that read data from the file system (
DecimalFormat (java.text)
A concrete subclass of NumberFormat that formats decimal numbers. It has a variety of features desig
Date (java.util)
A specific moment in time, with millisecond precision. Values typically come from System#currentTime
LinkedList (java.util)
Doubly-linked list implementation of the List and Dequeinterfaces. Implements all optional list oper
GridLayout (java.awt)
The GridLayout class is a layout manager that lays out a container's components in a rectangular gri
Top 12 Jupyter Notebook extensions

How to use getHashPartitionsmethodin org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer

Best Java code snippets using org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer.getHashPartitions (Showing top 20 results out of 315)

How to use
getHashPartitions
method
in
org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer