private AbstractSingleActivityOperatorDescriptor createInvertedIndexBulkLoadOp(JobSpecification spec) { int[] fieldPermutation = new int[numTokenKeyPairFields + numFilterFields]; for (int i = 0; i < fieldPermutation.length; i++) { fieldPermutation[i] = i; } IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory( metadataProvider.getStorageComponentProvider().getStorageManager(), secondaryFileSplitProvider); return createTreeIndexBulkLoadOp(spec, fieldPermutation, dataflowHelperFactory, GlobalConfig.DEFAULT_TREE_FILL_FACTOR); } }
protected LSMSecondaryIndexBulkLoadOperatorDescriptor createTreeIndexBulkLoadOp(JobSpecification spec, MetadataProvider metadataProvider, RecordDescriptor taggedSecondaryRecDesc, int[] fieldPermutation, int numSecondaryKeys, int numPrimaryKeys, boolean hasBuddyBtree) throws AlgebricksException { IndexDataflowHelperFactory primaryIndexHelperFactory = new IndexDataflowHelperFactory( metadataProvider.getStorageComponentProvider().getStorageManager(), primaryFileSplitProvider); IndexDataflowHelperFactory secondaryIndexHelperFactory = new IndexDataflowHelperFactory( metadataProvider.getStorageComponentProvider().getStorageManager(), secondaryFileSplitProvider); LSMSecondaryIndexBulkLoadOperatorDescriptor treeIndexBulkLoadOp = new LSMSecondaryIndexBulkLoadOperatorDescriptor(spec, taggedSecondaryRecDesc, primaryIndexHelperFactory, secondaryIndexHelperFactory, fieldPermutation, NUM_TAG_FIELDS, numSecondaryKeys, numPrimaryKeys, hasBuddyBtree); treeIndexBulkLoadOp.setSourceLocation(sourceLoc); AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, treeIndexBulkLoadOp, secondaryPartitionConstraint); return treeIndexBulkLoadOp; }
public IndexDataflowHelperFactory getPrimaryIndexDataflowHelperFactory(PrimaryIndexInfo primaryIndexInfo, IStorageComponentProvider storageComponentProvider) throws AlgebricksException { return new IndexDataflowHelperFactory(storageComponentProvider.getStorageManager(), primaryIndexInfo.getFileSplitProvider()); }
protected LSMIndexBulkLoadOperatorDescriptor createTreeIndexBulkLoadOp(JobSpecification spec, int[] fieldPermutation, IIndexDataflowHelperFactory dataflowHelperFactory, float fillFactor) { IndexDataflowHelperFactory primaryIndexDataflowHelperFactory = new IndexDataflowHelperFactory( metadataProvider.getStorageComponentProvider().getStorageManager(), primaryFileSplitProvider); LSMIndexBulkLoadOperatorDescriptor treeIndexBulkLoadOp = new LSMIndexBulkLoadOperatorDescriptor(spec, secondaryRecDesc, fieldPermutation, fillFactor, false, numElementsHint, false, dataflowHelperFactory, primaryIndexDataflowHelperFactory, BulkLoadUsage.CREATE_INDEX, dataset.getDatasetId()); treeIndexBulkLoadOp.setSourceLocation(sourceLoc); AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, treeIndexBulkLoadOp, secondaryPartitionConstraint); return treeIndexBulkLoadOp; }
protected IOperatorDescriptor createPrimaryIndexScanDiskComponentsOp(JobSpecification spec, MetadataProvider metadataProvider, RecordDescriptor outRecDesc) { ITransactionSubsystemProvider txnSubsystemProvider = TransactionSubsystemProvider.INSTANCE; ISearchOperationCallbackFactory searchCallbackFactory = new PrimaryIndexInstantSearchOperationCallbackFactory( dataset.getDatasetId(), dataset.getPrimaryBloomFilterFields(), txnSubsystemProvider, IRecoveryManager.ResourceType.LSM_BTREE); IndexDataflowHelperFactory indexHelperFactory = new IndexDataflowHelperFactory( metadataProvider.getStorageComponentProvider().getStorageManager(), primaryFileSplitProvider); LSMBTreeDiskComponentScanOperatorDescriptor primaryScanOp = new LSMBTreeDiskComponentScanOperatorDescriptor( spec, outRecDesc, indexHelperFactory, searchCallbackFactory); primaryScanOp.setSourceLocation(sourceLoc); AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, primaryScanOp, primaryPartitionConstraint); return primaryScanOp; }
public static JobSpecification dropDatasetJobSpec(Dataset dataset, MetadataProvider metadataProvider) throws AlgebricksException, ACIDException { LOGGER.info("DROP DATASET: " + dataset); if (dataset.getDatasetType() == DatasetType.EXTERNAL) { return RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext()); } JobSpecification specPrimary = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext()); Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset); IIndexDataflowHelperFactory indexHelperFactory = new IndexDataflowHelperFactory( metadataProvider.getStorageComponentProvider().getStorageManager(), splitsAndConstraint.first); IndexDropOperatorDescriptor primaryBtreeDrop = new IndexDropOperatorDescriptor(specPrimary, indexHelperFactory); AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(specPrimary, primaryBtreeDrop, splitsAndConstraint.second); specPrimary.addRoot(primaryBtreeDrop); return specPrimary; }
public static JobSpecification buildDropFilesIndexJobSpec(MetadataProvider metadataProvider, Dataset dataset) throws AlgebricksException { String indexName = IndexingConstants.getFilesIndexName(dataset.getDatasetName()); JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext()); Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset, indexName); IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory( metadataProvider.getStorageComponentProvider().getStorageManager(), splitsAndConstraint.first); IndexDropOperatorDescriptor btreeDrop = new IndexDropOperatorDescriptor(spec, dataflowHelperFactory); AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, btreeDrop, splitsAndConstraint.second); spec.addRoot(btreeDrop); return spec; }
public static JobSpecification buildDropFilesIndexJobSpec(MetadataProvider metadataProvider, Dataset dataset) throws AlgebricksException { String indexName = IndexingConstants.getFilesIndexName(dataset.getDatasetName()); JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext()); Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset, indexName); IIndexDataflowHelperFactory indexHelperFactory = new IndexDataflowHelperFactory( metadataProvider.getStorageComponentProvider().getStorageManager(), splitsAndConstraint.first); IndexDropOperatorDescriptor btreeDrop = new IndexDropOperatorDescriptor(spec, indexHelperFactory); AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, btreeDrop, splitsAndConstraint.second); spec.addRoot(btreeDrop); return spec; }
@Override public JobSpecification buildDropJobSpec(Set<DropOption> options) throws AlgebricksException { JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext()); Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset, index.getIndexName()); IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory( metadataProvider.getStorageComponentProvider().getStorageManager(), splitsAndConstraint.first); // The index drop operation should be persistent regardless of temp datasets or permanent dataset. IndexDropOperatorDescriptor btreeDrop = new IndexDropOperatorDescriptor(spec, dataflowHelperFactory, options); btreeDrop.setSourceLocation(sourceLoc); AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, btreeDrop, splitsAndConstraint.second); spec.addRoot(btreeDrop); return spec; }
public static JobSpecification buildRecoverOp(Dataset ds, List<Index> indexes, MetadataProvider metadataProvider) throws AlgebricksException { JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext()); IStorageManager storageMgr = metadataProvider.getStorageComponentProvider().getStorageManager(); ArrayList<IIndexDataflowHelperFactory> treeDataflowHelperFactories = new ArrayList<>(); AlgebricksPartitionConstraint constraints = null; for (Index index : indexes) { IFileSplitProvider indexSplitProvider; if (isValidIndexName(index.getDatasetName(), index.getIndexName())) { Pair<IFileSplitProvider, AlgebricksPartitionConstraint> sAndConstraints = metadataProvider.getSplitProviderAndConstraints(ds, index.getIndexName()); indexSplitProvider = sAndConstraints.first; constraints = sAndConstraints.second; } else { indexSplitProvider = metadataProvider.getSplitProviderAndConstraints(ds, IndexingConstants.getFilesIndexName(ds.getDatasetName())).first; } IIndexDataflowHelperFactory indexDataflowHelperFactory = new IndexDataflowHelperFactory(storageMgr, indexSplitProvider); treeDataflowHelperFactories.add(indexDataflowHelperFactory); } ExternalDatasetIndexesRecoverOperatorDescriptor op = new ExternalDatasetIndexesRecoverOperatorDescriptor(spec, treeDataflowHelperFactories); spec.addRoot(op); AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, op, constraints); spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy()); return spec; }
@Override public JobSpecification buildCompactJobSpec() throws AlgebricksException { JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext()); Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset, index.getIndexName()); IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory( metadataProvider.getStorageComponentProvider().getStorageManager(), splitsAndConstraint.first); LSMTreeIndexCompactOperatorDescriptor compactOp = new LSMTreeIndexCompactOperatorDescriptor(spec, dataflowHelperFactory); compactOp.setSourceLocation(sourceLoc); AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, compactOp, secondaryPartitionConstraint); spec.addRoot(compactOp); spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy()); return spec; } }
public static JobSpecification buildFilesIndexUpdateJobSpec(Dataset dataset, List<ExternalFile> externalFilesSnapshot, MetadataProvider metadataProvider) throws AlgebricksException { IStorageComponentProvider storageComponentProvider = metadataProvider.getStorageComponentProvider(); JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext()); Pair<IFileSplitProvider, AlgebricksPartitionConstraint> secondarySplitsAndConstraint = metadataProvider .getSplitProviderAndConstraints(dataset, IndexingConstants.getFilesIndexName(dataset.getDatasetName())); IFileSplitProvider secondaryFileSplitProvider = secondarySplitsAndConstraint.first; IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory( storageComponentProvider.getStorageManager(), secondaryFileSplitProvider); ExternalFilesIndexModificationOperatorDescriptor externalFilesOp = new ExternalFilesIndexModificationOperatorDescriptor(spec, dataflowHelperFactory, externalFilesSnapshot); AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, externalFilesOp, secondarySplitsAndConstraint.second); spec.addRoot(externalFilesOp); spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy()); return spec; }
public static JobSpecification compactDatasetJobSpec(Dataverse dataverse, String datasetName, MetadataProvider metadataProvider) throws AlgebricksException { String dataverseName = dataverse.getDataverseName(); Dataset dataset = metadataProvider.findDataset(dataverseName, datasetName); if (dataset == null) { throw new AsterixException("Could not find dataset " + datasetName + " in dataverse " + dataverseName); } JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext()); Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset); IIndexDataflowHelperFactory indexHelperFactory = new IndexDataflowHelperFactory( metadataProvider.getStorageComponentProvider().getStorageManager(), splitsAndConstraint.first); LSMTreeIndexCompactOperatorDescriptor compactOp = new LSMTreeIndexCompactOperatorDescriptor(spec, indexHelperFactory); AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, compactOp, splitsAndConstraint.second); AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, compactOp, splitsAndConstraint.second); spec.addRoot(compactOp); return spec; }
private void createSecondaryIndex() throws HyracksDataException, RemoteException, ACIDException, AlgebricksException { SecondaryIndexInfo secondaryIndexInfo = nc.createSecondaryIndex(primaryIndexInfo, secondaryIndex, storageManager, 0); IndexDataflowHelperFactory iHelperFactory = new IndexDataflowHelperFactory(nc.getStorageManager(), secondaryIndexInfo.getFileSplitProvider()); secondaryIndexDataflowHelper = iHelperFactory.create(taskCtx.getJobletContext().getServiceContext(), 0); secondaryIndexDataflowHelper.open(); secondaryLsmBtree = (TestLsmBtree) secondaryIndexDataflowHelper.getIndexInstance(); secondaryIndexDataflowHelper.close(); }
@Before public void createIndex() throws Exception { List<List<String>> partitioningKeys = new ArrayList<>(); partitioningKeys.add(Collections.singletonList("key")); dataset = new TestDataset(DATAVERSE_NAME, DATASET_NAME, DATAVERSE_NAME, DATA_TYPE_NAME, NODE_GROUP_NAME, NoMergePolicyFactory.NAME, null, new InternalDatasetDetails(null, PartitioningStrategy.HASH, partitioningKeys, null, null, null, false, null), null, DatasetType.INTERNAL, DATASET_ID, 0); PrimaryIndexInfo primaryIndexInfo = nc.createPrimaryIndex(dataset, KEY_TYPES, RECORD_TYPE, META_TYPE, null, storageManager, KEY_INDEXES, KEY_INDICATORS_LIST, 0); IndexDataflowHelperFactory iHelperFactory = new IndexDataflowHelperFactory(nc.getStorageManager(), primaryIndexInfo.getFileSplitProvider()); JobId jobId = nc.newJobId(); ctx = nc.createTestContext(jobId, 0, false); indexDataflowHelper = iHelperFactory.create(ctx.getJobletContext().getServiceContext(), 0); indexDataflowHelper.open(); lsmBtree = (TestLsmBtree) indexDataflowHelper.getIndexInstance(); indexDataflowHelper.close(); txnCtx = nc.getTransactionManager().beginTransaction(nc.getTxnJobId(ctx), new TransactionOptions(ITransactionManager.AtomicityLevel.ENTITY_LEVEL)); insertOp = nc.getInsertPipeline(ctx, dataset, KEY_TYPES, RECORD_TYPE, META_TYPE, null, KEY_INDEXES, KEY_INDICATORS_LIST, storageManager, null).getLeft(); }
private void readIndex() throws HyracksDataException { primaryIndexDataflowHelpers = new IIndexDataflowHelper[NUM_PARTITIONS]; primaryIndexes = new TestLsmBtree[NUM_PARTITIONS]; for (int i = 0; i < NUM_PARTITIONS; i++) { IIndexDataflowHelperFactory factory = new IndexDataflowHelperFactory(nc.getStorageManager(), primaryIndexInfos[i].getFileSplitProvider()); primaryIndexDataflowHelpers[i] = factory.create(testCtxs[i].getJobletContext().getServiceContext(), i); primaryIndexDataflowHelpers[i].open(); primaryIndexes[i] = (TestLsmBtree) primaryIndexDataflowHelpers[i].getIndexInstance(); primaryIndexDataflowHelpers[i].close(); } secondaryIndexDataflowHelpers = new IIndexDataflowHelper[NUM_PARTITIONS]; secondaryIndexes = new TestLsmBtree[NUM_PARTITIONS]; for (int i = 0; i < NUM_PARTITIONS; i++) { IIndexDataflowHelperFactory factory = new IndexDataflowHelperFactory(nc.getStorageManager(), secondaryIndexInfo[i].getFileSplitProvider()); secondaryIndexDataflowHelpers[i] = factory.create(testCtxs[i].getJobletContext().getServiceContext(), i); secondaryIndexDataflowHelpers[i].open(); secondaryIndexes[i] = (TestLsmBtree) secondaryIndexDataflowHelpers[i].getIndexInstance(); secondaryIndexDataflowHelpers[i].close(); } }
@Before public void createIndex() throws Exception { PrimaryIndexInfo primaryIndexInfo = StorageTestUtils.createPrimaryIndex(nc, PARTITION); IndexDataflowHelperFactory iHelperFactory = new IndexDataflowHelperFactory(nc.getStorageManager(), primaryIndexInfo.getFileSplitProvider()); JobId jobId = nc.newJobId(); ctx = nc.createTestContext(jobId, PARTITION, false); indexDataflowHelper = iHelperFactory.create(ctx.getJobletContext().getServiceContext(), PARTITION); indexDataflowHelper.open(); lsmBtree = (TestLsmBtree) indexDataflowHelper.getIndexInstance(); indexDataflowHelper.close(); }
public IPushRuntime getFullScanPipeline(IFrameWriter countOp, IHyracksTaskContext ctx, Dataset dataset, IAType[] primaryKeyTypes, ARecordType recordType, ARecordType metaType, NoMergePolicyFactory mergePolicyFactory, Map<String, String> mergePolicyProperties, int[] filterFields, int[] primaryKeyIndexes, List<Integer> primaryKeyIndicators, StorageComponentProvider storageComponentProvider) throws HyracksDataException, AlgebricksException { IPushRuntime emptyTupleOp = new EmptyTupleSourceRuntimeFactory().createPushRuntime(ctx)[0]; JobSpecification spec = new JobSpecification(); PrimaryIndexInfo primaryIndexInfo = new PrimaryIndexInfo(dataset, primaryKeyTypes, recordType, metaType, mergePolicyFactory, mergePolicyProperties, filterFields, primaryKeyIndexes, primaryKeyIndicators); IIndexDataflowHelperFactory indexDataflowHelperFactory = new IndexDataflowHelperFactory( storageComponentProvider.getStorageManager(), primaryIndexInfo.getFileSplitProvider()); BTreeSearchOperatorDescriptor searchOpDesc = new BTreeSearchOperatorDescriptor(spec, primaryIndexInfo.rDesc, null, null, true, true, indexDataflowHelperFactory, false, false, null, NoOpOperationCallbackFactory.INSTANCE, filterFields, filterFields, false); BTreeSearchOperatorNodePushable searchOp = searchOpDesc.createPushRuntime(ctx, primaryIndexInfo.getSearchRecordDescriptorProvider(), ctx.getTaskAttemptId().getTaskId().getPartition(), 1); emptyTupleOp.setOutputFrameWriter(0, searchOp, primaryIndexInfo.getSearchRecordDescriptorProvider().getInputRecordDescriptor(null, 0)); searchOp.setOutputFrameWriter(0, countOp, primaryIndexInfo.rDesc); return emptyTupleOp; }
@Before public void createIndex() throws Exception { PrimaryIndexInfo primaryIndexInfo = StorageTestUtils.createPrimaryIndex(nc, PARTITION); IndexDataflowHelperFactory iHelperFactory = new IndexDataflowHelperFactory(nc.getStorageManager(), primaryIndexInfo.getFileSplitProvider()); JobId jobId = nc.newJobId(); ctx = nc.createTestContext(jobId, PARTITION, false); indexDataflowHelper = iHelperFactory.create(ctx.getJobletContext().getServiceContext(), PARTITION); indexDataflowHelper.open(); lsmBtree = (TestLsmBtree) indexDataflowHelper.getIndexInstance(); indexDataflowHelper.close(); txnCtx = nc.getTransactionManager().beginTransaction(nc.getTxnJobId(ctx), new TransactionOptions(ITransactionManager.AtomicityLevel.ENTITY_LEVEL)); insertOp = StorageTestUtils.getInsertPipeline(nc, ctx); indexPath = indexDataflowHelper.getResource().getPath(); }
@Before public void createIndex() throws Exception { PrimaryIndexInfo primaryIndexInfo = StorageTestUtils.createPrimaryIndex(nc, PARTITION); IndexDataflowHelperFactory iHelperFactory = new IndexDataflowHelperFactory(nc.getStorageManager(), primaryIndexInfo.getFileSplitProvider()); JobId jobId = nc.newJobId(); ctx = nc.createTestContext(jobId, PARTITION, false); indexDataflowHelper = iHelperFactory.create(ctx.getJobletContext().getServiceContext(), PARTITION); indexDataflowHelper.open(); lsmBtree = (TestLsmBtree) indexDataflowHelper.getIndexInstance(); indexDataflowHelper.close(); txnCtx = nc.getTransactionManager().beginTransaction(nc.getTxnJobId(ctx), new TransactionOptions(ITransactionManager.AtomicityLevel.ENTITY_LEVEL)); insertOp = StorageTestUtils.getInsertPipeline(nc, ctx, null); JobId abortJobId = nc.newJobId(); abortCtx = nc.createTestContext(abortJobId, PARTITION, false); abortTxnCtx = nc.getTransactionManager().beginTransaction(nc.getTxnJobId(abortCtx), new TransactionOptions(ITransactionManager.AtomicityLevel.ENTITY_LEVEL)); // abortOp is initialized by each test separately tupleGenerator = StorageTestUtils.getTupleGenerator(); }