private static JobSpecification combineIntakeCollectJobs(MetadataProvider metadataProvider, Feed feed, JobSpecification intakeJob, List<JobSpecification> jobsList, List<FeedConnection> feedConnections, String[] intakeLocations) throws AlgebricksException, HyracksDataException { JobSpecification jobSpec = new JobSpecification(intakeJob.getFrameSize()); (FeedIntakeOperatorDescriptor) intakeJob.getOperatorMap().get(new OperatorDescriptorId(0)); FeedIntakeOperatorDescriptor ingestionOp; if (firstOp.getAdaptorFactory() == null) { jobSpec.connect(new OneToOneConnectorDescriptor(jobSpec), ingestionOp, 0, replicateOp, 0); PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, ingestionOp, intakeLocations); PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, replicateOp, intakeLocations); JobSpecification subJob = jobsList.get(iter1); operatorIdMapping.clear(); Map<OperatorDescriptorId, IOperatorDescriptor> operatorsMap = subJob.getOperatorMap(); String datasetName = feedConnections.get(iter1).getDatasetName(); FeedConnectionId feedConnectionId = new FeedConnectionId(ingestionOp.getEntityId(), datasetName); subJob.getOperatorInputMap().get(opDesc.getOperatorId()).get(0); opId = jobSpec.createOperatorDescriptorId(opDesc); subJob.getConnectorMap().forEach((key, connDesc) -> { ConnectorDescriptorId newConnId; if (connDesc instanceof MToNPartitioningConnectorDescriptor) { newConnId = connDesc.getConnectorId(); } else { newConnId = jobSpec.createConnectorDescriptor(connDesc);
private static Pair<JobSpecification, IAdapterFactory> buildFeedIntakeJobSpec(Feed feed, MetadataProvider metadataProvider, FeedPolicyAccessor policyAccessor) throws Exception { JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext()); spec.setFrameSize(metadataProvider.getApplicationContext().getCompilerProperties().getFrameSize()); IAdapterFactory adapterFactory; IOperatorDescriptor feedIngestor; AlgebricksPartitionConstraint ingesterPc; Triple<IOperatorDescriptor, AlgebricksPartitionConstraint, IAdapterFactory> t = metadataProvider.buildFeedIntakeRuntime(spec, feed, policyAccessor); feedIngestor = t.first; ingesterPc = t.second; adapterFactory = t.third; AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, feedIngestor, ingesterPc); NullSinkOperatorDescriptor nullSink = new NullSinkOperatorDescriptor(spec); AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, nullSink, ingesterPc); spec.connect(new OneToOneConnectorDescriptor(spec), feedIngestor, 0, nullSink, 0); spec.addRoot(nullSink); return Pair.of(spec, adapterFactory); }
/** * Generates DOT format plan for {@link JobSpecification} that can be visualized using any DOT format visualizer. * * @param jobSpecification The job specification * @return DOT format plan */ public static String generate(final JobSpecification jobSpecification) { final DotFormatBuilder graphBuilder = new DotFormatBuilder(DotFormatBuilder.StringValue.of("JobSpecification")); final Map<ConnectorDescriptorId, IConnectorDescriptor> connectorMap = jobSpecification.getConnectorMap(); final Set<Constraint> constraints = jobSpecification.getUserConstraints(); Map<ConnectorDescriptorId, Pair<Pair<IOperatorDescriptor, Integer>, Pair<IOperatorDescriptor, Integer>>> cOp = jobSpecification.getConnectorOperatorMap(); cOp.forEach((connId, srcAndDest) -> addToGraph(graphBuilder, constraints, connectorMap, connId, srcAndDest)); return graphBuilder.getDotDocument(); }
private void setPartitionConstraintsTopdown(OperatorDescriptorId opId, Map<IConnectorDescriptor, TargetConstraint> tgtConstraints, IOperatorDescriptor parentOp) { List<IConnectorDescriptor> opInputs = jobSpec.getOperatorInputMap().get(opId); AlgebricksPartitionConstraint opConstraint; IOperatorDescriptor opDesc = jobSpec.getOperatorMap().get(opId); if (opInputs != null) { for (IConnectorDescriptor conn : opInputs) { ConnectorDescriptorId cid = conn.getConnectorId(); org.apache.commons.lang3.tuple.Pair<org.apache.commons.lang3.tuple.Pair<IOperatorDescriptor, Integer>, org.apache.commons.lang3.tuple.Pair<IOperatorDescriptor, Integer>> p = jobSpec.getConnectorOperatorMap().get(cid); IOperatorDescriptor src = p.getLeft().getLeft(); TargetConstraint constraint = tgtConstraints.get(conn); if (constraint != null) { if (constraint == TargetConstraint.SAME_COUNT) { opConstraint = partitionConstraintMap.get(opDesc); if (partitionConstraintMap.get(src) == null) { if (opConstraint != null) { partitionConstraintMap.put(src, opConstraint); AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(jobSpec, src, opConstraint); } } } } // Post Order DFS setPartitionConstraintsTopdown(src.getOperatorId(), tgtConstraints, opDesc); } } }
private JobSpecification compilePlanImpl(ILogicalPlan plan, boolean isNestedPlan, IOperatorSchema outerPlanSchema, IJobletEventListenerFactory jobEventListenerFactory) throws AlgebricksException { JobSpecification spec = new JobSpecification(context.getFrameSize()); if (jobEventListenerFactory != null) { spec.setJobletEventListenerFactory(jobEventListenerFactory); } List<ILogicalOperator> rootOps = new ArrayList<>(); JobBuilder builder = new JobBuilder(spec, context.getClusterLocations()); for (Mutable<ILogicalOperator> opRef : plan.getRoots()) { compileOpRef(opRef, spec, builder, outerPlanSchema); rootOps.add(opRef.getValue()); } reviseEdges(builder); operatorVisitedToParents.clear(); builder.buildSpec(rootOps); spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy()); // Do not do activity cluster planning because it is slow on large clusters spec.setUseConnectorPolicyForScheduling(false); if (isNestedPlan) { spec.setMetaOps(builder.getGeneratedMetaOps()); } return spec; }
private static JobSpecification createJob(Options options) { JobSpecification spec = new JobSpecification(options.frameSize); spec.connect(new OneToOneConnectorDescriptor(spec), btreeScanOp, 0, sorter, 0); spec.connect(new OneToOneConnectorDescriptor(spec), sorter, 0, btreeBulkLoad, 0); spec.connect(new OneToOneConnectorDescriptor(spec), btreeBulkLoad, 0, nsOpDesc, 0); spec.addRoot(nsOpDesc);
if (isOverridingKeyFieldTypes && !enforcedItemType.equals(itemType)) { sourceOp = createCastOp(spec, dataset.getDatasetType(), index.isEnforced()); spec.connect(new OneToOneConnectorDescriptor(spec), primaryScanOp, 0, sourceOp, 0); metaOp.setSourceLocation(sourceLoc); spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, primaryScanOp, 0); spec.connect(new OneToOneConnectorDescriptor(spec), sourceOp, 0, asterixAssignOp, 0); if (anySecondaryKeyIsNullable || isOverridingKeyFieldTypes) { spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, selectOp, 0); spec.connect(new OneToOneConnectorDescriptor(spec), selectOp, 0, tokenizerOp, 0); } else { spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, tokenizerOp, 0); spec.connect(new OneToOneConnectorDescriptor(spec), tokenizerOp, 0, sortOp, 0); spec.connect(new OneToOneConnectorDescriptor(spec), sortOp, 0, invIndexBulkLoadOp, 0); spec.connect(new OneToOneConnectorDescriptor(spec), invIndexBulkLoadOp, 0, metaOp, 0); spec.addRoot(metaOp); spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy()); return spec;
private static JobSpecification createJob(FileSplit[] ordersSplits, FileSplit[] outputSplit, String memBufferAlg, int frameLimit, int frameSize, int limit, boolean usingHeapSorter) { JobSpecification spec = new JobSpecification(); spec.setFrameSize(frameSize); IFileSplitProvider ordersSplitProvider = new ConstantFileSplitProvider(ordersSplits); FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitProvider, createPartitionConstraint(spec, printer, outputSplit); spec.connect(new OneToOneConnectorDescriptor(spec), ordScanner, 0, sorter, 0); spec.connect( new MToNPartitioningMergingConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(SortFields, orderBinaryHashFunctionFactories), SortFields, sorter, 0, printer, 0); spec.addRoot(printer); return spec;
@Override public JobSpecification buildCreationJobSpec() throws AlgebricksException { JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext()); IStorageComponentProvider storageComponentProvider = metadataProvider.getStorageComponentProvider(); IResourceFactory resourceFactory = dataset.getResourceFactory(metadataProvider, index, itemType, metaType, mergePolicyFactory, mergePolicyProperties); IIndexBuilderFactory indexBuilderFactory = new IndexBuilderFactory(storageComponentProvider.getStorageManager(), secondaryFileSplitProvider, resourceFactory, true); IndexCreateOperatorDescriptor secondaryIndexCreateOp = new IndexCreateOperatorDescriptor(spec, indexBuilderFactory); secondaryIndexCreateOp.setSourceLocation(sourceLoc); AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, secondaryIndexCreateOp, secondaryPartitionConstraint); spec.addRoot(secondaryIndexCreateOp); spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy()); return spec; }
private static void populateDataToRebalanceTarget(Dataset source, Dataset target, MetadataProvider metadataProvider, IHyracksClientConnection hcc) throws Exception { JobSpecification spec = new JobSpecification(); TxnId txnId = metadataProvider.getTxnIdFactory().create(); JobEventListenerFactory jobEventListenerFactory = new JobEventListenerFactory(txnId, true); spec.setJobletEventListenerFactory(jobEventListenerFactory); // The pipeline starter. IOperatorDescriptor starter = DatasetUtil.createDummyKeyProviderOp(spec, source, metadataProvider); // Creates primary index scan op. IOperatorDescriptor primaryScanOp = DatasetUtil.createPrimaryIndexScanOp(spec, metadataProvider, source); // Creates secondary BTree upsert op. IOperatorDescriptor upsertOp = createPrimaryIndexUpsertOp(spec, metadataProvider, source, target); // The final commit operator. IOperatorDescriptor commitOp = createUpsertCommitOp(spec, metadataProvider, target); // Connects empty-tuple-source and scan. spec.connect(new OneToOneConnectorDescriptor(spec), starter, 0, primaryScanOp, 0); // Connects scan and upsert. int numKeys = target.getPrimaryKeys().size(); int[] keys = IntStream.range(0, numKeys).toArray(); IConnectorDescriptor connectorDescriptor = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(keys, target.getPrimaryHashFunctionFactories(metadataProvider))); spec.connect(connectorDescriptor, primaryScanOp, 0, upsertOp, 0); // Connects upsert and sink. spec.connect(new OneToOneConnectorDescriptor(spec), upsertOp, 0, commitOp, 0); // Executes the job. JobUtils.runJob(hcc, spec, true); }
@Override public void buildSpec(List<ILogicalOperator> roots) throws AlgebricksException { buildAsterixComponents(); Map<IConnectorDescriptor, TargetConstraint> tgtConstraints = setupConnectors(); for (ILogicalOperator r : roots) { IOperatorDescriptor opDesc = findOpDescForAlgebraicOp(r); jobSpec.addRoot(opDesc); } setAllPartitionConstraints(tgtConstraints); }
acg.setFrameSize(spec.getFrameSize()); acg.setMaxReattempts(spec.getMaxReattempts()); acg.setJobletEventListenerFactory(spec.getJobletEventListenerFactory()); acg.setGlobalJobDataFactory(spec.getGlobalJobDataFactory()); acg.setConnectorPolicyAssignmentPolicy(spec.getConnectorPolicyAssignmentPolicy()); acg.setUseConnectorPolicyForScheduling(spec.isUseConnectorPolicyForScheduling()); final Set<Constraint> constraints = new HashSet<>(); final IConstraintAcceptor acceptor = new IConstraintAcceptor() { constraints.addAll(spec.getUserConstraints()); return new IActivityClusterGraphGenerator() { @Override
@Test public void testJobLocations() { final String nc1 = "nc1"; final String nc2 = "nc2"; final NodeJobTracker nodeJobTracker = new NodeJobTracker(); nodeJobTracker.notifyNodeJoin(nc1, null); nodeJobTracker.notifyNodeJoin(nc2, null); final JobSpecification jobSpec = new JobSpecification(); // add only nc1 to the job locations final ConstantExpression nc1Location = new ConstantExpression(nc1); final LValueConstraintExpression lValueMock = Mockito.mock(LValueConstraintExpression.class); jobSpec.getUserConstraints().add(new Constraint(lValueMock, nc1Location)); final String[] clusterLocation = new String[] { nc1, nc2 }; final AlgebricksAbsolutePartitionConstraint jobLocations = APIFramework.getJobLocations(jobSpec, nodeJobTracker, new AlgebricksAbsolutePartitionConstraint(clusterLocation)); // ensure nc2 wasn't included Assert.assertEquals(1, jobLocations.getLocations().length); Assert.assertEquals(nc1, jobLocations.getLocations()[0]); } }
public static JobSpecification createJobSpecification(ICcApplicationContext appCtx) { CompilerProperties compilerProperties = appCtx.getCompilerProperties(); int frameSize = compilerProperties.getFrameSize(); return new JobSpecification(frameSize); } }
private Map<IConnectorDescriptor, TargetConstraint> setupConnectors() throws AlgebricksException { Map<IConnectorDescriptor, TargetConstraint> tgtConstraints = new HashMap<>(); for (ILogicalOperator exchg : connectors.keySet()) { ILogicalOperator inOp = inEdges.get(exchg).get(0); ILogicalOperator outOp = outEdges.get(exchg).get(0); IOperatorDescriptor inOpDesc = findOpDescForAlgebraicOp(inOp); IOperatorDescriptor outOpDesc = findOpDescForAlgebraicOp(outOp); Pair<IConnectorDescriptor, TargetConstraint> connPair = connectors.get(exchg); IConnectorDescriptor conn = connPair.first; int producerPort = outEdges.get(inOp).indexOf(exchg); int consumerPort = inEdges.get(outOp).indexOf(exchg); jobSpec.connect(conn, inOpDesc, producerPort, outOpDesc, consumerPort); if (connPair.second != null) { tgtConstraints.put(conn, connPair.second); } } return tgtConstraints; }
public JobSpecification(int frameSize) { roots = new ArrayList<>(); resultSetIds = new ArrayList<>(); opMap = new HashMap<>(); connMap = new HashMap<>(); opInputMap = new HashMap<>(); opOutputMap = new HashMap<>(); connectorOpMap = new HashMap<>(); properties = new HashMap<>(); userConstraints = new HashSet<>(); operatorIdCounter = 0; connectorIdCounter = 0; maxReattempts = 0; useConnectorPolicyForScheduling = false; requiredClusterCapacity = new ClusterCapacity(); setFrameSize(frameSize); }
@Override public Set<String> getJobParticipatingNodes(JobSpecification spec) { return spec.getUserConstraints().stream().map(Constraint::getRValue) .filter(ce -> ce.getTag() == ExpressionTag.CONSTANT).map(ConstantExpression.class::cast) .map(ConstantExpression::getValue).map(Object::toString).filter(nodeJobs::containsKey) .collect(Collectors.toSet()); } }
public static void visit(JobSpecification spec, IOperatorDescriptorVisitor visitor) throws HyracksException { Set<OperatorDescriptorId> seen = new HashSet<>(); for (IOperatorDescriptor op : spec.getOperatorMap().values()) { visitOperator(visitor, seen, op); } }
opSchema.addAllVariables(topOpInSubplanScm); Map<OperatorDescriptorId, IOperatorDescriptor> opMap = nestedJob.getOperatorMap(); List<? extends IOperatorDescriptor> metaOps = nestedJob.getMetaOps(); if (opMap.size() != metaOps.size()) { for (IOperatorDescriptor opd : opMap.values()) {
public static void visit(JobSpecification spec, IConnectorDescriptorVisitor visitor) throws HyracksException { for (IConnectorDescriptor c : spec.getConnectorMap().values()) { visitor.visit(c); } } }