@Override public String toString() { StringBuilder builder = new StringBuilder(); builder.append(plan.getType() + " \n"); for (Entry<String, Set<FragmentProto>> e : fragMap.entrySet()) { builder.append(e.getKey()).append(" : "); for (FragmentProto fragment : e.getValue()) { builder.append(fragment).append(", "); } } for (Entry<String, Set<FetchProto>> e : fetchMap.entrySet()) { builder.append(e.getKey()).append(" : "); for (FetchProto t : e.getValue()) { for (URI uri : Repartitioner.createFullURIs(maxUrlLength, t)){ builder.append(uri).append(" "); } } } return builder.toString(); }
public static void scheduleFetchesByEvenDistributedVolumes(Stage stage, Map<Integer, FetchGroupMeta> partitions, String tableName, int num) { Map<String, List<FetchProto>>[] fetchsArray = makeEvenDistributedFetchImpl(partitions, tableName, num).getSecond(); // Schedule FetchImpls for (Map<String, List<FetchProto>> eachFetches : fetchsArray) { Stage.scheduleFetches(stage, eachFetches); } }
/** * Get the pull server URIs. */ public static List<URI> createFullURIs(int maxUrlLength, FetchProto fetch) { return createFetchURL(maxUrlLength, fetch, true); }
Map<Integer, List<IntermediateEntry>> hashed = hashByKey(partitions); for (Entry<Integer, List<IntermediateEntry>> interm : hashed.entrySet()) { Map<Task.PullHost, List<IntermediateEntry>> hashedByHost = hashByHost(interm.getValue()); for (Entry<Task.PullHost, List<IntermediateEntry>> e : hashedByHost.entrySet()) { LOG.info(stage.getId() + ", No Grouping Column - determinedTaskNum is set to 1"); } else { TableStats totalStat = computeChildBlocksStats(stage.getContext(), masterPlan, stage.getId()); if (totalStat.getNumRows() == 0) { determinedTaskNum = 1; scheduleScatteredHashShuffleFetches(schedulerContext, stage, intermediates, scan.getTableName()); } else { schedulerContext.setEstimatedTaskNum(determinedTaskNum); scheduleFetchesByEvenDistributedVolumes(stage, finalFetches, scan.getTableName(), determinedTaskNum); LOG.info(stage.getId() + ", DeterminedTaskNum : " + determinedTaskNum);
List<List<FetchProto>> eachFetches = splitOrMergeIntermediates(tableName, listEntry.getKey(), partitionEntries, splitVolume, pageSize); if (eachFetches != null && !eachFetches.isEmpty()) {
List<URI> uris = Repartitioner.createFullURIs(2 * StorageUnit.KB, fetch.getProto()); assertEquals(1, uris.size()); //In Hash Suffle, Fetcher return only one URI per partition. Repartitioner.mergeIntermediateByPullHost(hashEntries);
TableStats totalStat = computeChildBlocksStats(stage.getContext(), masterPlan, stage.getId()); scheduleFetchesByRoundRobin(stage, map, scan.getTableName(), determinedTaskNum);
partitionScanPaths = partitionScan.getInputPaths(); getFragmentsFromPartitionedTable((FileTablespace) space, eachScan, tableDesc); partitionScan.setInputPaths(partitionScanPaths); addJoinShuffle(stage, entry.getKey(), entry.getValue());
LOG.debug("Table Id: " + f.getName() + ", Simple URIs: " + Repartitioner.createSimpleURIs(maxUrlLength, f));
private void initSeqScanExec() throws IOException, TajoException { Tablespace tablespace = TablespaceManager.get(tableDesc.getUri()); List<Fragment> fragments = Lists.newArrayList(); if (tableDesc.hasPartition()) { FileTablespace fileTablespace = TUtil.checkTypeAndGet(tablespace, FileTablespace.class); fragments.addAll(Repartitioner.getFragmentsFromPartitionedTable(fileTablespace, scanNode, tableDesc)); } else { fragments.addAll(tablespace.getSplits(tableDesc.getName(), tableDesc, scanNode.getQual())); } if (!fragments.isEmpty()) { FragmentProto[] fragmentProtos = FragmentConvertor.toFragmentProtoArray(fragments.toArray(new Fragment[]{})); this.taskContext = new TaskAttemptContext( new QueryContext(tajoConf), null, new TaskAttemptId(new TaskId(new ExecutionBlockId(queryId, 1), 0), 0), fragmentProtos, null); this.scanExec = new PartitionMergeScanExec(taskContext, scanNode, fragmentProtos); this.scanExec.init(); } else { close(); } }
urlPrefix.append("h"); } else if (fetch.getType() == RANGE_SHUFFLE) { urlPrefix.append("r").append("&").append(getRangeParam(fetch)); } else if (fetch.getType() == SCATTERED_HASH_SHUFFLE) { urlPrefix.append("s");
addJoinShuffle(stage, entry.getKey(), entry.getValue());
Map<Integer, List<IntermediateEntry>> hashed = hashByKey(partitions); for (Entry<Integer, List<IntermediateEntry>> interm : hashed.entrySet()) { Map<Task.PullHost, List<IntermediateEntry>> hashedByHost = hashByHost(interm.getValue()); for (Entry<Task.PullHost, List<IntermediateEntry>> e : hashedByHost.entrySet()) { LOG.info(stage.getId() + ", No Grouping Column - determinedTaskNum is set to 1"); } else { TableStats totalStat = computeChildBlocksStats(stage.getContext(), masterPlan, stage.getId()); if (totalStat.getNumRows() == 0) { determinedTaskNum = 1; scheduleScatteredHashShuffleFetches(schedulerContext, stage, intermediates, scan.getTableName()); } else { schedulerContext.setEstimatedTaskNum(determinedTaskNum); scheduleFetchesByEvenDistributedVolumes(stage, finalFetches, scan.getTableName(), determinedTaskNum); LOG.info(stage.getId() + ", DeterminedTaskNum : " + determinedTaskNum);
List<List<FetchProto>> eachFetches = splitOrMergeIntermediates(tableName, listEntry.getKey(), partitionEntries, splitVolume, pageSize); if (eachFetches != null && !eachFetches.isEmpty()) {
TableStats totalStat = computeChildBlocksStats(stage.getContext(), masterPlan, stage.getId()); scheduleFetchesByRoundRobin(stage, map, scan.getTableName(), determinedTaskNum);
LOG.debug("Table Id: " + f.getName() + ", Simple URIs: " + Repartitioner.createSimpleURIs(maxUrlLength, f));
partitionScanPaths = partitionScan.getInputPaths(); scanFragments = getFragmentsFromPartitionedTable(space, scan, desc); } else { scanFragments = space.getSplits(scan.getCanonicalName(), desc, scan.getQual());
List<List<FetchProto>> fetches = Repartitioner.splitOrMergeIntermediates("name", ebId, intermediateEntries, splitVolume, 10 * 1024 * 1024); assertEquals(32, fetches.size());
@Override public String toString() { StringBuilder builder = new StringBuilder(); builder.append(plan.getType() + " \n"); for (Entry<String, Set<FragmentProto>> e : fragMap.entrySet()) { builder.append(e.getKey()).append(" : "); for (FragmentProto fragment : e.getValue()) { builder.append(fragment).append(", "); } } for (Entry<String, Set<FetchProto>> e : fetchMap.entrySet()) { builder.append(e.getKey()).append(" : "); for (FetchProto t : e.getValue()) { for (URI uri : Repartitioner.createFullURIs(maxUrlLength, t)){ builder.append(uri).append(" "); } } } return builder.toString(); }
public static void scheduleFetchesByEvenDistributedVolumes(Stage stage, Map<Integer, FetchGroupMeta> partitions, String tableName, int num) { Map<String, List<FetchProto>>[] fetchsArray = makeEvenDistributedFetchImpl(partitions, tableName, num).getSecond(); // Schedule FetchImpls for (Map<String, List<FetchProto>> eachFetches : fetchsArray) { Stage.scheduleFetches(stage, eachFetches); } }