@Override protected void initializeOp(Configuration hconf) throws HiveException { super.initializeOp(hconf); if (handleSkewJoin) { skewJoinKeyContext = new SkewJoinHandler(this); skewJoinKeyContext.initiliaze(hconf); skewJoinKeyContext.setSkewJoinJobCounter(skewjoin_followup_jobs); } statsMap.put(SkewkeyTableCounter.SKEWJOINFOLLOWUPJOBS.toString(), skewjoin_followup_jobs); }
/** * All done. * */ @Override public void closeOp(boolean abort) throws HiveException { if (handleSkewJoin) { skewJoinKeyContext.close(abort); } super.closeOp(abort); }
public void close(boolean abort) throws HiveException { if (!abort) { try { endGroup(); commit(); } catch (IOException e) { throw new HiveException(e); Path bigKeyPath = getOperatorOutputPath(specPath); FileSystem fs = bigKeyPath.getFileSystem(hconf); delete(bigKeyPath, fs); for (int smallKeyTbl = 0; smallKeyTbl < numAliases; smallKeyTbl++) { if (((byte) smallKeyTbl) == bigKeyTbl) { delete(getOperatorOutputPath(specPath), fs);
private void commitOutputPathToFinalPath(Path specPath, boolean ignoreNonExisting) throws IOException { Path outPath = getOperatorOutputPath(specPath); Path finalPath = getOperatorFinalPath(specPath); FileSystem fs = outPath.getFileSystem(hconf); if (ignoreNonExisting && !fs.exists(outPath)) { return; } if (!fs.rename(outPath, finalPath)) { throw new IOException("Unable to rename output to: " + finalPath); } }
/** * Forward a record of join results. * * @throws HiveException */ @Override public void endGroup() throws HiveException { // if this is a skew key, we need to handle it in a separate map reduce job. if (handleSkewJoin && skewJoinKeyContext.currBigKeyTag >= 0) { try { skewJoinKeyContext.endGroup(); } catch (IOException e) { LOG.error(e.getMessage(), e); throw new HiveException(e); } return; } else { checkAndGenObject(); } }
void endGroup() throws IOException, HiveException { if (skewKeyInCurrentGroup) { Path specPath = conf.getBigKeysDirMap().get((byte) currBigKeyTag); RowContainer<ArrayList<Object>> bigKey = (RowContainer)joinOp.storage[currBigKeyTag]; Path outputPath = getOperatorOutputPath(specPath); FileSystem destFs = outputPath.getFileSystem(hconf); bigKey.copyToDFSDirecory(destFs, outputPath); for (int i = 0; i < numAliases; i++) { if (((byte) i) == currBigKeyTag) { continue; } RowContainer<ArrayList<Object>> values = (RowContainer)joinOp.storage[i]; if (values != null) { specPath = conf.getSmallKeysDirMap().get((byte) currBigKeyTag).get( (byte) i); values.copyToDFSDirecory(destFs, getOperatorOutputPath(specPath)); } } } skewKeyInCurrentGroup = false; }
private void commit() throws IOException { for (int bigKeyTbl = 0; bigKeyTbl < numAliases; bigKeyTbl++) { // if we did not see a skew key in this table, continue to next table // we are trying to avoid an extra call of FileSystem.exists() Boolean existing = bigKeysExistingMap.get(Byte.valueOf((byte) bigKeyTbl)); if (existing == null || !existing) { continue; } Path specPath = conf.getBigKeysDirMap().get( Byte.valueOf((byte) bigKeyTbl)); commitOutputPathToFinalPath(specPath, false); for (int smallKeyTbl = 0; smallKeyTbl < numAliases; smallKeyTbl++) { if (smallKeyTbl == bigKeyTbl) { continue; } specPath = conf.getSmallKeysDirMap() .get(Byte.valueOf((byte) bigKeyTbl)).get( Byte.valueOf((byte) smallKeyTbl)); // the file may not exist, and we just ignore this commitOutputPathToFinalPath(specPath, true); } } }
skewJoinKeyContext.handleSkew(tag);
private void commitOutputPathToFinalPath(Path specPath, boolean ignoreNonExisting) throws IOException { Path outPath = getOperatorOutputPath(specPath); Path finalPath = getOperatorFinalPath(specPath); FileSystem fs = outPath.getFileSystem(hconf); if (ignoreNonExisting && !fs.exists(outPath)) { return; } if (!fs.rename(outPath, finalPath)) { throw new IOException("Unable to rename output to: " + finalPath); } }
/** * Forward a record of join results. * * @throws HiveException */ @Override public void endGroup() throws HiveException { // if this is a skew key, we need to handle it in a separate map reduce job. if (handleSkewJoin && skewJoinKeyContext.currBigKeyTag >= 0) { try { skewJoinKeyContext.endGroup(); } catch (IOException e) { LOG.error(e.getMessage(), e); throw new HiveException(e); } return; } else { checkAndGenObject(); } }
void endGroup() throws IOException, HiveException { if (skewKeyInCurrentGroup) { Path specPath = conf.getBigKeysDirMap().get((byte) currBigKeyTag); RowContainer<ArrayList<Object>> bigKey = (RowContainer)joinOp.storage[currBigKeyTag]; Path outputPath = getOperatorOutputPath(specPath); FileSystem destFs = outputPath.getFileSystem(hconf); bigKey.copyToDFSDirecory(destFs, outputPath); for (int i = 0; i < numAliases; i++) { if (((byte) i) == currBigKeyTag) { continue; } RowContainer<ArrayList<Object>> values = (RowContainer)joinOp.storage[i]; if (values != null) { specPath = conf.getSmallKeysDirMap().get((byte) currBigKeyTag).get( (byte) i); values.copyToDFSDirecory(destFs, getOperatorOutputPath(specPath)); } } } skewKeyInCurrentGroup = false; }
private void commit() throws IOException { for (int bigKeyTbl = 0; bigKeyTbl < numAliases; bigKeyTbl++) { // if we did not see a skew key in this table, continue to next table // we are trying to avoid an extra call of FileSystem.exists() Boolean existing = bigKeysExistingMap.get(Byte.valueOf((byte) bigKeyTbl)); if (existing == null || !existing) { continue; } Path specPath = conf.getBigKeysDirMap().get( Byte.valueOf((byte) bigKeyTbl)); commitOutputPathToFinalPath(specPath, false); for (int smallKeyTbl = 0; smallKeyTbl < numAliases; smallKeyTbl++) { if (smallKeyTbl == bigKeyTbl) { continue; } specPath = conf.getSmallKeysDirMap() .get(Byte.valueOf((byte) bigKeyTbl)).get( Byte.valueOf((byte) smallKeyTbl)); // the file may not exist, and we just ignore this commitOutputPathToFinalPath(specPath, true); } } }
skewJoinKeyContext.handleSkew(tag);
public void close(boolean abort) throws HiveException { if (!abort) { try { endGroup(); commit(); } catch (IOException e) { throw new HiveException(e); Path bigKeyPath = getOperatorOutputPath(specPath); FileSystem fs = bigKeyPath.getFileSystem(hconf); delete(bigKeyPath, fs); for (int smallKeyTbl = 0; smallKeyTbl < numAliases; smallKeyTbl++) { if (((byte) smallKeyTbl) == bigKeyTbl) { delete(getOperatorOutputPath(specPath), fs);
@Override protected void initializeOp(Configuration hconf) throws HiveException { super.initializeOp(hconf); if (handleSkewJoin) { skewJoinKeyContext = new SkewJoinHandler(this); skewJoinKeyContext.initiliaze(hconf); skewJoinKeyContext.setSkewJoinJobCounter(skewjoin_followup_jobs); } statsMap.put(SkewkeyTableCounter.SKEWJOINFOLLOWUPJOBS.toString(), skewjoin_followup_jobs); }
private void commitOutputPathToFinalPath(Path specPath, boolean ignoreNonExisting) throws IOException { Path outPath = getOperatorOutputPath(specPath); Path finalPath = getOperatorFinalPath(specPath); FileSystem fs = outPath.getFileSystem(hconf); if (ignoreNonExisting && !fs.exists(outPath)) { return; } if (!fs.rename(outPath, finalPath)) { throw new IOException("Unable to rename output to: " + finalPath); } }
/** * All done. * */ @Override public void closeOp(boolean abort) throws HiveException { if (handleSkewJoin) { skewJoinKeyContext.close(abort); } super.closeOp(abort); }
/** * Forward a record of join results. * * @throws HiveException */ @Override public void endGroup() throws HiveException { // if this is a skew key, we need to handle it in a separate map reduce job. if (handleSkewJoin && skewJoinKeyContext.currBigKeyTag >= 0) { try { skewJoinKeyContext.endGroup(); } catch (IOException e) { LOG.error(e.getMessage(), e); throw new HiveException(e); } return; } else { checkAndGenObject(); } }
void endGroup() throws IOException, HiveException { if (skewKeyInCurrentGroup) { String specPath = conf.getBigKeysDirMap().get((byte) currBigKeyTag); RowContainer<ArrayList<Object>> bigKey = (RowContainer)joinOp.storage.get(Byte .valueOf((byte) currBigKeyTag)); Path outputPath = getOperatorOutputPath(specPath); FileSystem destFs = outputPath.getFileSystem(hconf); bigKey.copyToDFSDirecory(destFs, outputPath); for (int i = 0; i < numAliases; i++) { if (((byte) i) == currBigKeyTag) { continue; } RowContainer<ArrayList<Object>> values = (RowContainer)joinOp.storage.get(Byte .valueOf((byte) i)); if (values != null) { specPath = conf.getSmallKeysDirMap().get((byte) currBigKeyTag).get( (byte) i); values.copyToDFSDirecory(destFs, getOperatorOutputPath(specPath)); } } } skewKeyInCurrentGroup = false; }
private void commit() throws IOException { for (int bigKeyTbl = 0; bigKeyTbl < numAliases; bigKeyTbl++) { // if we did not see a skew key in this table, continue to next table // we are trying to avoid an extra call of FileSystem.exists() Boolean existing = bigKeysExistingMap.get(Byte.valueOf((byte) bigKeyTbl)); if (existing == null || !existing) { continue; } String specPath = conf.getBigKeysDirMap().get( Byte.valueOf((byte) bigKeyTbl)); commitOutputPathToFinalPath(specPath, false); for (int smallKeyTbl = 0; smallKeyTbl < numAliases; smallKeyTbl++) { if (smallKeyTbl == bigKeyTbl) { continue; } specPath = conf.getSmallKeysDirMap() .get(Byte.valueOf((byte) bigKeyTbl)).get( Byte.valueOf((byte) smallKeyTbl)); // the file may not exist, and we just ignore this commitOutputPathToFinalPath(specPath, true); } } }