public ILSMComponentIdGeneratorFactory getComponentIdGeneratorFactory() { return new DatasetLSMComponentIdGeneratorFactory(getDatasetId()); }
public IDatasetInfoProvider getDatasetInfoProvider() { return new DatasetInfoProvider(getDatasetId()); }
/** * get the IndexOperationTrackerFactory for a particular index on the dataset * * @param index * the index * @return an instance of {@link org.apache.hyracks.storage.am.lsm.common.api.ILSMOperationTrackerFactory} */ public ILSMOperationTrackerFactory getIndexOperationTrackerFactory(Index index) { return index.isPrimaryIndex() ? new PrimaryIndexOperationTrackerFactory(getDatasetId()) : new SecondaryIndexOperationTrackerFactory(getDatasetId()); }
public static Pair<ILSMMergePolicyFactory, Map<String, String>> getMergePolicyFactory(Dataset dataset, MetadataTransactionContext mdTxnCtx) throws AlgebricksException { String policyName = dataset.getCompactionPolicy(); CompactionPolicy compactionPolicy = MetadataManager.INSTANCE.getCompactionPolicy(mdTxnCtx, MetadataConstants.METADATA_DATAVERSE_NAME, policyName); String compactionPolicyFactoryClassName = compactionPolicy.getClassName(); ILSMMergePolicyFactory mergePolicyFactory; Map<String, String> properties = dataset.getCompactionPolicyProperties(); try { mergePolicyFactory = (ILSMMergePolicyFactory) Class.forName(compactionPolicyFactoryClassName).newInstance(); if (mergePolicyFactory.getName().compareTo(CorrelatedPrefixMergePolicyFactory.NAME) == 0) { properties.put(CorrelatedPrefixMergePolicyFactory.KEY_DATASET_ID, Integer.toString(dataset.getDatasetId())); } } catch (InstantiationException | IllegalAccessException | ClassNotFoundException e) { throw new AlgebricksException(e); } return new Pair<>(mergePolicyFactory, properties); }
public static void flush(IDatasetLifecycleManager dsLifecycleMgr, TestLsmBtree lsmBtree, Dataset dataset, boolean async) throws Exception { waitForOperations(lsmBtree); dsLifecycleMgr.flushDataset(dataset.getDatasetId(), async); }
protected LSMIndexBulkLoadOperatorDescriptor createTreeIndexBulkLoadOp(JobSpecification spec, int[] fieldPermutation, IIndexDataflowHelperFactory dataflowHelperFactory, float fillFactor) { IndexDataflowHelperFactory primaryIndexDataflowHelperFactory = new IndexDataflowHelperFactory( metadataProvider.getStorageComponentProvider().getStorageManager(), primaryFileSplitProvider); LSMIndexBulkLoadOperatorDescriptor treeIndexBulkLoadOp = new LSMIndexBulkLoadOperatorDescriptor(spec, secondaryRecDesc, fieldPermutation, fillFactor, false, numElementsHint, false, dataflowHelperFactory, primaryIndexDataflowHelperFactory, BulkLoadUsage.CREATE_INDEX, dataset.getDatasetId()); treeIndexBulkLoadOp.setSourceLocation(sourceLoc); AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, treeIndexBulkLoadOp, secondaryPartitionConstraint); return treeIndexBulkLoadOp; }
throws AlgebricksException { if (index.isPrimaryIndex()) { return op == IndexOperation.UPSERT ? new UpsertOperationCallbackFactory(getDatasetId(), primaryKeyFields, componentProvider.getTransactionSubsystemProvider(), Operation.get(op), index.resourceType()) : op == IndexOperation.DELETE || op == IndexOperation.INSERT ? new PrimaryIndexModificationOperationCallbackFactory(getDatasetId(), primaryKeyFields, componentProvider.getTransactionSubsystemProvider(), Operation.get(op), index.resourceType()) } else { return op == IndexOperation.DELETE || op == IndexOperation.INSERT || op == IndexOperation.UPSERT ? new SecondaryIndexModificationOperationCallbackFactory(getDatasetId(), primaryKeyFields, componentProvider.getTransactionSubsystemProvider(), Operation.get(op), index.resourceType())
public static void flushPartition(IDatasetLifecycleManager dslLifecycleMgr, TestLsmBtree lsmBtree, Dataset dataset, boolean async) throws Exception { waitForOperations(lsmBtree); PrimaryIndexOperationTracker opTracker = (PrimaryIndexOperationTracker) lsmBtree.getOperationTracker(); opTracker.setFlushOnExit(true); opTracker.flushIfNeeded(); long maxWaitTime = TimeUnit.MINUTES.toNanos(1); // 1min // wait for log record is flushed, i.e., the flush is scheduled long before = System.nanoTime(); while (opTracker.isFlushLogCreated()) { Thread.sleep(5); // NOSONAR: Test code with a timeout if (System.nanoTime() - before > maxWaitTime) { throw new IllegalStateException(TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - before) + "ms passed without scheduling the flush operation"); } } if (!async) { DatasetInfo dsInfo = dslLifecycleMgr.getDatasetInfo(dataset.getDatasetId()); dsInfo.waitForIO(); } }
protected IOperatorDescriptor createPrimaryIndexScanDiskComponentsOp(JobSpecification spec, MetadataProvider metadataProvider, RecordDescriptor outRecDesc) { ITransactionSubsystemProvider txnSubsystemProvider = TransactionSubsystemProvider.INSTANCE; ISearchOperationCallbackFactory searchCallbackFactory = new PrimaryIndexInstantSearchOperationCallbackFactory( dataset.getDatasetId(), dataset.getPrimaryBloomFilterFields(), txnSubsystemProvider, IRecoveryManager.ResourceType.LSM_BTREE); IndexDataflowHelperFactory indexHelperFactory = new IndexDataflowHelperFactory( metadataProvider.getStorageComponentProvider().getStorageManager(), primaryFileSplitProvider); LSMBTreeDiskComponentScanOperatorDescriptor primaryScanOp = new LSMBTreeDiskComponentScanOperatorDescriptor( spec, outRecDesc, indexHelperFactory, searchCallbackFactory); primaryScanOp.setSourceLocation(sourceLoc); AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, primaryScanOp, primaryPartitionConstraint); return primaryScanOp; }
final ITupleReference ref = rangeCursor.getTuple(); final Dataset ds = valueExtractor.getValue(txnId, ref); int datasetId = Math.max(ds.getDatasetId(), DatasetIdFactory.generateAlternatingDatasetId(ds.getDatasetId())); if (mostRecentDatasetId < datasetId) { mostRecentDatasetId = datasetId;
@Override public StorageComponentsDatasource toDatasource(IOptimizationContext context, AbstractFunctionCallExpression f) throws AlgebricksException { String dataverseName = getString(f.getArguments(), 0); String datasetName = getString(f.getArguments(), 1); MetadataProvider metadataProvider = (MetadataProvider) context.getMetadataProvider(); Dataset dataset = metadataProvider.findDataset(dataverseName, datasetName); if (dataset == null) { throw new CompilationException(ErrorCode.UNKNOWN_DATASET_IN_DATAVERSE, f.getSourceLocation(), datasetName, dataverseName); } return new StorageComponentsDatasource(context.getComputationNodeDomain(), dataset.getDatasetId()); } }
@Override public DatasetResourcesDatasource toDatasource(IOptimizationContext context, AbstractFunctionCallExpression f) throws AlgebricksException { String dataverseName = getString(f.getArguments(), 0); String datasetName = getString(f.getArguments(), 1); MetadataProvider metadataProvider = (MetadataProvider) context.getMetadataProvider(); Dataset dataset = metadataProvider.findDataset(dataverseName, datasetName); if (dataset == null) { throw new CompilationException(ErrorCode.UNKNOWN_DATASET_IN_DATAVERSE, f.getSourceLocation(), datasetName, dataverseName); } return new DatasetResourcesDatasource(context.getComputationNodeDomain(), dataset.getDatasetId()); } }
@After public void testRecovery() throws Exception { // right now we've inserted 1000 records to the index, and each record is at least 12 bytes. // thus, the memory component size is at least 12KB. List<Pair<IOption, Object>> opts = new ArrayList<>(); opts.add(Pair.of(Option.STORAGE_MEMORYCOMPONENT_GLOBALBUDGET, 20 * 1024 * 1024L)); opts.add(Pair.of(Option.STORAGE_MEMORYCOMPONENT_PAGESIZE, 1 * 1024)); // each memory component only gets 4 pages (we have 2 partitions, 2 memory components/partition) // and some reserved memory for metadata dataset opts.add(Pair.of(Option.STORAGE_MAX_ACTIVE_WRITABLE_DATASETS, 1024)); nc.setOpts(opts); initializeNc(false); initializeTestCtx(); readIndex(); // wait for ongoing flushes triggered by recovery to finish DatasetInfo dsInfo = dsLifecycleMgr.getDatasetInfo(dataset.getDatasetId()); dsInfo.waitForIO(); checkComponentIds(); // insert more records createInsertOps(); insertRecords(PARTITION_0, StorageTestUtils.RECORDS_PER_COMPONENT, StorageTestUtils.RECORDS_PER_COMPONENT, true); dsInfo.waitForIO(); checkComponentIds(); dropIndex(); // cleanup after each test case nc.deInit(true); nc.clearOpts(); }
ITransactionSubsystemProvider txnSubsystemProvider = TransactionSubsystemProvider.INSTANCE; ISearchOperationCallbackFactory searchCallbackFactory = new PrimaryIndexInstantSearchOperationCallbackFactory( dataset.getDatasetId(), dataset.getPrimaryBloomFilterFields(), txnSubsystemProvider, IRecoveryManager.ResourceType.LSM_BTREE); IndexDataflowHelperFactory indexHelperFactory = new IndexDataflowHelperFactory(
public static Dataset createTransactionDataset(Dataset dataset) { ExternalDatasetDetails originalDsd = (ExternalDatasetDetails) dataset.getDatasetDetails(); ExternalDatasetDetails dsd = new ExternalDatasetDetails(originalDsd.getAdapter(), originalDsd.getProperties(), originalDsd.getTimestamp(), TransactionState.BEGIN); return new Dataset(dataset.getDataverseName(), dataset.getDatasetName(), dataset.getItemTypeDataverseName(), dataset.getItemTypeName(), dataset.getNodeGroupName(), dataset.getCompactionPolicy(), dataset.getCompactionPolicyProperties(), dsd, dataset.getHints(), DatasetType.EXTERNAL, dataset.getDatasetId(), dataset.getPendingOp()); }
public static void flushDataset(IHyracksClientConnection hcc, MetadataProvider metadataProvider, Dataset dataset) throws Exception { CompilerProperties compilerProperties = metadataProvider.getApplicationContext().getCompilerProperties(); int frameSize = compilerProperties.getFrameSize(); JobSpecification spec = new JobSpecification(frameSize); RecordDescriptor[] rDescs = new RecordDescriptor[] { new RecordDescriptor(new ISerializerDeserializer[] {}) }; AlgebricksMetaOperatorDescriptor emptySource = new AlgebricksMetaOperatorDescriptor(spec, 0, 1, new IPushRuntimeFactory[] { new EmptyTupleSourceRuntimeFactory() }, rDescs); TxnId txnId = metadataProvider.getTxnIdFactory().create(); FlushDatasetOperatorDescriptor flushOperator = new FlushDatasetOperatorDescriptor(spec, txnId, dataset.getDatasetId()); spec.connect(new OneToOneConnectorDescriptor(spec), emptySource, 0, flushOperator, 0); Pair<IFileSplitProvider, AlgebricksPartitionConstraint> primarySplitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset, dataset.getDatasetName()); AlgebricksPartitionConstraint primaryPartitionConstraint = primarySplitsAndConstraint.second; AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, emptySource, primaryPartitionConstraint); JobEventListenerFactory jobEventListenerFactory = new JobEventListenerFactory(txnId, true); spec.setJobletEventListenerFactory(jobEventListenerFactory); JobUtils.runJob(hcc, spec, true); }
@Override public IResourceFactory getResourceFactory(MetadataProvider mdProvider, Dataset dataset, Index index, ARecordType recordType, ARecordType metaType, ILSMMergePolicyFactory mergePolicyFactory, Map<String, String> mergePolicyProperties, ITypeTraits[] filterTypeTraits, IBinaryComparatorFactory[] filterCmpFactories) throws AlgebricksException { int[] filterFields = IndexUtil.getFilterFields(dataset, index, filterTypeTraits); int[] btreeFields = IndexUtil.getBtreeFieldsIfFiltered(dataset, index); IStorageComponentProvider storageComponentProvider = mdProvider.getStorageComponentProvider(); ITypeTraits[] typeTraits = getTypeTraits(mdProvider, dataset, index, recordType, metaType); IBinaryComparatorFactory[] cmpFactories = getCmpFactories(mdProvider, dataset, index, recordType, metaType); int[] bloomFilterFields = getBloomFilterFields(dataset, index); double bloomFilterFalsePositiveRate = mdProvider.getStorageProperties().getBloomFilterFalsePositiveRate(); ILSMOperationTrackerFactory opTrackerFactory = dataset.getIndexOperationTrackerFactory(index); if (opTrackerFactory instanceof PrimaryIndexOperationTrackerFactory) { opTrackerFactory = new TestPrimaryIndexOperationTrackerFactory(dataset.getDatasetId()); } ILSMIOOperationCallbackFactory ioOpCallbackFactory = dataset.getIoOperationCallbackFactory(index); IStorageManager storageManager = storageComponentProvider.getStorageManager(); IMetadataPageManagerFactory metadataPageManagerFactory = storageComponentProvider.getMetadataPageManagerFactory(); ILSMIOOperationSchedulerProvider ioSchedulerProvider = storageComponentProvider.getIoOperationSchedulerProvider(); AsterixVirtualBufferCacheProvider vbcProvider = new AsterixVirtualBufferCacheProvider(dataset.getDatasetId()); return new TestLsmBtreeLocalResourceFactory(storageManager, typeTraits, cmpFactories, filterTypeTraits, filterCmpFactories, filterFields, opTrackerFactory, ioOpCallbackFactory, metadataPageManagerFactory, vbcProvider, ioSchedulerProvider, mergePolicyFactory, mergePolicyProperties, true, bloomFilterFields, bloomFilterFalsePositiveRate, index.isPrimaryIndex(), btreeFields); }
new LSMIndexBulkLoadOperatorNodePushable(secondaryIndexHelperFactory, primaryIndexHelperFactory, ctx, 0, fieldPermutation, 1.0F, false, numElementsHint, true, secondaryIndexInfo.rDesc, BulkLoadUsage.CREATE_INDEX, dataset.getDatasetId()); op.setOutputFrameWriter(0, new SinkRuntimeFactory().createPushRuntime(ctx)[0], null); return Pair.of(secondaryIndexInfo, op);
tupleAppender.write(insertOps[partition], true); dsLifecycleMgr.flushDataset(dataset.getDatasetId(), false); break; case INSERT_CLOSE:
pkFieldsInCommitOp[i] = diff + i; CommitRuntime commitOp = new CommitRuntime(ctx, getTxnJobId(ctx), dataset.getDatasetId(), pkFieldsInCommitOp, true, ctx.getTaskAttemptId().getTaskId().getPartition(), true); insertOp.setOutputFrameWriter(0, commitOp, upsertOutRecDesc);