@Override public int[] getPrimaryBloomFilterFields() { List<List<String>> partitioningKeys = getPrimaryKeys(); int numPrimaryKeys = partitioningKeys.size(); return IntStream.range(0, numPrimaryKeys).toArray(); }
public static List<LogicalVariable> getPrimaryKeyVarsFromPrimaryUnnestMap(Dataset dataset, ILogicalOperator unnestMapOp) { int numPrimaryKeys = dataset.getPrimaryKeys().size(); List<LogicalVariable> primaryKeyVars = new ArrayList<>(); List<LogicalVariable> sourceVars = null; // For a left outer join case, LEFT_OUTER_UNNEST_MAP operator is placed // instead of UNNEST_MAP operator. sourceVars = ((AbstractUnnestMapOperator) unnestMapOp).getVariables(); // Assumes the primary keys are located at the beginning. for (int i = 0; i < numPrimaryKeys; i++) { primaryKeyVars.add(sourceVars.get(i)); } return primaryKeyVars; }
public static int[] createFilterFields(Dataset dataset) { if (dataset.getDatasetType() == DatasetType.EXTERNAL) { return null; } List<String> filterField = getFilterField(dataset); if (filterField == null) { return null; } List<List<String>> partitioningKeys = dataset.getPrimaryKeys(); int numKeys = partitioningKeys.size(); int[] filterFields = new int[1]; filterFields[0] = numKeys + 1; return filterFields; }
public static int[] getBtreeFieldsIfFiltered(Dataset dataset, Index index) throws AlgebricksException { if (index.isPrimaryIndex()) { return DatasetUtil.createBTreeFieldsWhenThereisAFilter(dataset); } int numPrimaryKeys = dataset.getPrimaryKeys().size(); int numSecondaryKeys = index.getKeyFieldNames().size(); int[] btreeFields = new int[numSecondaryKeys + numPrimaryKeys]; for (int k = 0; k < btreeFields.length; k++) { btreeFields[k] = k; } return btreeFields; }
private void fillPKVars(DataSourceScanOperator dataScan, List<LogicalVariable> pkVars) { pkVars.clear(); DatasetDataSource datasetDataSource = (DatasetDataSource) dataScan.getDataSource(); pkVars.clear(); if (datasetDataSource.getDataset().getDatasetDetails() instanceof InternalDatasetDetails) { int numPKs = datasetDataSource.getDataset().getPrimaryKeys().size(); for (int i = 0; i < numPKs; i++) { pkVars.add(dataScan.getVariables().get(i)); } } }
private static int[] getPrimaryKeyPermutationForUpsert(Dataset dataset) { // upsertIndicatorVar + prev record int f = 2; // add the previous meta second if (dataset.hasMetaPart()) { f++; } // add the previous filter third int numFilterFields = DatasetUtil.getFilterField(dataset) == null ? 0 : 1; if (numFilterFields > 0) { f++; } int numPrimaryKeys = dataset.getPrimaryKeys().size(); int[] pkIndexes = new int[numPrimaryKeys]; for (int i = 0; i < pkIndexes.length; i++) { pkIndexes[i] = f; f++; } return pkIndexes; }
private static IOperatorDescriptor createPrimaryIndexUpsertOp(JobSpecification spec, MetadataProvider metadataProvider, Dataset source, Dataset target) throws AlgebricksException { int numKeys = source.getPrimaryKeys().size(); int numValues = source.hasMetaPart() ? 2 : 1; int[] fieldPermutation = IntStream.range(0, numKeys + numValues).toArray(); Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> upsertOpAndConstraints = DatasetUtil.createPrimaryIndexUpsertOp(spec, metadataProvider, target, source.getPrimaryRecordDescriptor(metadataProvider), fieldPermutation, MissingWriterFactory.INSTANCE); IOperatorDescriptor upsertOp = upsertOpAndConstraints.first; AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, upsertOp, upsertOpAndConstraints.second); return upsertOp; }
public static int getPositionOfPartitioningKeyField(Dataset dataset, List<String> fieldExpr, boolean fieldFromMeta) { List<Integer> keySourceIndicator = null; IDatasetDetails datasetDetails = dataset.getDatasetDetails(); if (datasetDetails.getDatasetType() == DatasetType.INTERNAL) { keySourceIndicator = ((InternalDatasetDetails) datasetDetails).getKeySourceIndicator(); } List<List<String>> partitioningKeys = dataset.getPrimaryKeys(); for (int i = 0; i < partitioningKeys.size(); i++) { List<String> partitioningKey = partitioningKeys.get(i); if (partitioningKey.equals(fieldExpr) && keySourceMatches(keySourceIndicator, i, fieldFromMeta)) { return i; } } return -1; }
public static int[] createBTreeFieldsWhenThereisAFilter(Dataset dataset) { if (dataset.getDatasetType() == DatasetType.EXTERNAL) { return null; } List<String> filterField = getFilterField(dataset); if (filterField == null) { return null; } List<List<String>> partitioningKeys = dataset.getPrimaryKeys(); int valueFields = dataset.hasMetaPart() ? 2 : 1; int[] btreeFields = new int[partitioningKeys.size() + valueFields]; for (int i = 0; i < btreeFields.length; ++i) { btreeFields[i] = i; } return btreeFields; }
public LoadableDataSource(Dataset targetDataset, IAType itemType, IAType metaItemType, String adapter, Map<String, String> properties) throws AlgebricksException, IOException { super(new DataSourceId("loadable_dv", "loadable_ds"), itemType, metaItemType, Type.LOADABLE, null); this.targetDataset = targetDataset; this.adapter = adapter; this.adapterProperties = properties; partitioningKeys = targetDataset.getPrimaryKeys(); ARecordType recType = (ARecordType) itemType; isPKAutoGenerated = ((InternalDatasetDetails) targetDataset.getDatasetDetails()).isAutogenerated(); if (isPKAutoGenerated) { // Since the key is auto-generated, we need to use another // record type (possibly nested) which has all fields except the PK recType = getStrippedPKType(new LinkedList<>(partitioningKeys.get(0)), recType); } schemaTypes = new IAType[] { recType }; }
private static int[] secondaryFilterFields(Dataset dataset, Index index, ITypeTraits[] filterTypeTraits) throws CompilationException { if (filterTypeTraits == null) { return empty; } int numPrimaryKeys = dataset.getPrimaryKeys().size(); int numSecondaryKeys = index.getKeyFieldNames().size(); switch (index.getIndexType()) { case BTREE: return new int[] { numPrimaryKeys + numSecondaryKeys }; case RTREE: case LENGTH_PARTITIONED_NGRAM_INVIX: case LENGTH_PARTITIONED_WORD_INVIX: case SINGLE_PARTITION_NGRAM_INVIX: case SINGLE_PARTITION_WORD_INVIX: break; default: throw new CompilationException(ErrorCode.COMPILATION_UNKNOWN_INDEX_TYPE, index.getIndexType().toString()); } return empty; }
/** * Gets the comparator factories for the primary key fields of this dataset. * * @param metadataProvider, * the metadata provider. * @return the comparator factories for the primary key fields of this dataset. * @throws AlgebricksException */ public IBinaryComparatorFactory[] getPrimaryComparatorFactories(MetadataProvider metadataProvider, ARecordType recordType, ARecordType metaType) throws AlgebricksException { IStorageComponentProvider storageComponentProvider = metadataProvider.getStorageComponentProvider(); IBinaryComparatorFactoryProvider cmpFactoryProvider = storageComponentProvider.getComparatorFactoryProvider(); List<List<String>> partitioningKeys = getPrimaryKeys(); int numPrimaryKeys = partitioningKeys.size(); IBinaryComparatorFactory[] cmpFactories = new IBinaryComparatorFactory[numPrimaryKeys]; List<Integer> indicators = null; if (hasMetaPart()) { indicators = ((InternalDatasetDetails) getDatasetDetails()).getKeySourceIndicator(); } for (int i = 0; i < numPrimaryKeys; i++) { IAType keyType = (indicators == null || indicators.get(i) == 0) ? recordType.getSubFieldType(partitioningKeys.get(i)) : metaType.getSubFieldType(partitioningKeys.get(i)); cmpFactories[i] = cmpFactoryProvider.getBinaryComparatorFactory(keyType, true); } return cmpFactories; }
/** * Gets the hash function factories for the primary key fields of this dataset. * * @param metadataProvider, * the metadata provider. * @return the hash function factories for the primary key fields of this dataset. * @throws AlgebricksException */ public IBinaryHashFunctionFactory[] getPrimaryHashFunctionFactories(MetadataProvider metadataProvider) throws AlgebricksException { ARecordType recordType = (ARecordType) metadataProvider.findType(this); ARecordType metaType = (ARecordType) metadataProvider.findMetaType(this); List<List<String>> partitioningKeys = getPrimaryKeys(); int numPrimaryKeys = partitioningKeys.size(); IBinaryHashFunctionFactory[] hashFuncFactories = new IBinaryHashFunctionFactory[numPrimaryKeys]; List<Integer> indicators = null; if (hasMetaPart()) { indicators = ((InternalDatasetDetails) getDatasetDetails()).getKeySourceIndicator(); } for (int i = 0; i < numPrimaryKeys; i++) { IAType keyType = (indicators == null || indicators.get(i) == 0) ? recordType.getSubFieldType(partitioningKeys.get(i)) : metaType.getSubFieldType(partitioningKeys.get(i)); hashFuncFactories[i] = BinaryHashFunctionFactoryProvider.INSTANCE.getBinaryHashFunctionFactory(keyType); } return hashFuncFactories; }
/** * Creates output variables for the given unnest-map or left-outer-unnestmap operator * that does a secondary index lookup. * The order: SK, PK, [Optional: the result of a instantTryLock on PK] */ public static void appendSecondaryIndexOutputVars(Dataset dataset, ARecordType recordType, ARecordType metaRecordType, Index index, IOptimizationContext context, List<LogicalVariable> dest, boolean requireResultOfInstantTryLock) throws AlgebricksException { int numPrimaryKeys; if (dataset.getDatasetType() == DatasetType.EXTERNAL) { numPrimaryKeys = IndexingConstants .getRIDSize(((ExternalDatasetDetails) dataset.getDatasetDetails()).getProperties()); } else { numPrimaryKeys = dataset.getPrimaryKeys().size(); } int numSecondaryKeys = KeyFieldTypeUtil.getNumSecondaryKeys(index, recordType, metaRecordType); // In case of an inverted-index search, secondary keys will not be generated. int numVars = isInvertedIndex(index) ? numPrimaryKeys : numPrimaryKeys + numSecondaryKeys; // If it's an index-only plan, add one more variable to put the result of instantTryLock on PK - // whether this lock can be granted on a primary key. // If it is granted, then we don't need to do a post verification (select). // If it is not granted, then we need to do a secondary index lookup, do a primary index lookup, and select. if (requireResultOfInstantTryLock) { numVars += 1; } for (int i = 0; i < numVars; i++) { dest.add(context.newVar()); } }
return FilesIndexDescription.EXTERNAL_FILE_INDEX_TYPE_TRAITS; int numPrimaryKeys = dataset.getPrimaryKeys().size(); int numSecondaryKeys = index.getKeyFieldNames().size(); ITypeTraitProvider typeTraitProvider = metadataProvider.getStorageComponentProvider().getTypeTraitProvider();
public ITypeTraits[] getPrimaryTypeTraits(MetadataProvider metadataProvider, ARecordType recordType, ARecordType metaType) throws AlgebricksException { IStorageComponentProvider storageComponentProvider = metadataProvider.getStorageComponentProvider(); ITypeTraitProvider ttProvider = storageComponentProvider.getTypeTraitProvider(); List<List<String>> partitioningKeys = getPrimaryKeys(); int numPrimaryKeys = partitioningKeys.size(); ITypeTraits[] typeTraits = new ITypeTraits[numPrimaryKeys + 1 + (hasMetaPart() ? 1 : 0)]; List<Integer> indicators = null; if (hasMetaPart()) { indicators = ((InternalDatasetDetails) getDatasetDetails()).getKeySourceIndicator(); } for (int i = 0; i < numPrimaryKeys; i++) { IAType keyType = datasetType == DatasetType.EXTERNAL ? IndexingConstants.getFieldType(i) : (indicators == null || indicators.get(i) == 0) ? recordType.getSubFieldType(partitioningKeys.get(i)) : metaType.getSubFieldType(partitioningKeys.get(i)); typeTraits[i] = ttProvider.getTypeTrait(keyType); } typeTraits[numPrimaryKeys] = ttProvider.getTypeTrait(recordType); if (hasMetaPart()) { typeTraits[numPrimaryKeys + 1] = ttProvider.getTypeTrait(metaType); } return typeTraits; }
private void setPrimaryRecDescAndComparators() throws AlgebricksException { List<List<String>> partitioningKeys = dataset.getPrimaryKeys(); ISerializerDeserializer[] primaryRecFields = new ISerializerDeserializer[numPrimaryKeys + 1 + (dataset.hasMetaPart() ? 1 : 0)]; ITypeTraits[] primaryTypeTraits = new ITypeTraits[numPrimaryKeys + 1 + (dataset.hasMetaPart() ? 1 : 0)]; primaryComparatorFactories = new IBinaryComparatorFactory[numPrimaryKeys]; primaryBloomFilterKeyFields = new int[numPrimaryKeys]; ISerializerDeserializerProvider serdeProvider = metadataProvider.getDataFormat().getSerdeProvider(); List<Integer> indicators = null; if (dataset.hasMetaPart()) { indicators = ((InternalDatasetDetails) dataset.getDatasetDetails()).getKeySourceIndicator(); } for (int i = 0; i < numPrimaryKeys; i++) { IAType keyType = (indicators == null || indicators.get(i) == 0) ? itemType.getSubFieldType(partitioningKeys.get(i)) : metaType.getSubFieldType(partitioningKeys.get(i)); primaryRecFields[i] = serdeProvider.getSerializerDeserializer(keyType); primaryComparatorFactories[i] = BinaryComparatorFactoryProvider.INSTANCE.getBinaryComparatorFactory(keyType, true); primaryTypeTraits[i] = TypeTraitProvider.INSTANCE.getTypeTrait(keyType); primaryBloomFilterKeyFields[i] = i; } primaryRecFields[numPrimaryKeys] = payloadSerde; primaryTypeTraits[numPrimaryKeys] = TypeTraitProvider.INSTANCE.getTypeTrait(itemType); if (dataset.hasMetaPart()) { primaryRecFields[numPrimaryKeys + 1] = payloadSerde; primaryTypeTraits[numPrimaryKeys + 1] = TypeTraitProvider.INSTANCE.getTypeTrait(metaType); } primaryRecDesc = new RecordDescriptor(primaryRecFields, primaryTypeTraits); }
private static void populateDataToRebalanceTarget(Dataset source, Dataset target, MetadataProvider metadataProvider, IHyracksClientConnection hcc) throws Exception { JobSpecification spec = new JobSpecification(); TxnId txnId = metadataProvider.getTxnIdFactory().create(); JobEventListenerFactory jobEventListenerFactory = new JobEventListenerFactory(txnId, true); spec.setJobletEventListenerFactory(jobEventListenerFactory); // The pipeline starter. IOperatorDescriptor starter = DatasetUtil.createDummyKeyProviderOp(spec, source, metadataProvider); // Creates primary index scan op. IOperatorDescriptor primaryScanOp = DatasetUtil.createPrimaryIndexScanOp(spec, metadataProvider, source); // Creates secondary BTree upsert op. IOperatorDescriptor upsertOp = createPrimaryIndexUpsertOp(spec, metadataProvider, source, target); // The final commit operator. IOperatorDescriptor commitOp = createUpsertCommitOp(spec, metadataProvider, target); // Connects empty-tuple-source and scan. spec.connect(new OneToOneConnectorDescriptor(spec), starter, 0, primaryScanOp, 0); // Connects scan and upsert. int numKeys = target.getPrimaryKeys().size(); int[] keys = IntStream.range(0, numKeys).toArray(); IConnectorDescriptor connectorDescriptor = new MToNPartitioningConnectorDescriptor(spec, new FieldHashPartitionComputerFactory(keys, target.getPrimaryHashFunctionFactories(metadataProvider))); spec.connect(connectorDescriptor, primaryScanOp, 0, upsertOp, 0); // Connects upsert and sink. spec.connect(new OneToOneConnectorDescriptor(spec), upsertOp, 0, commitOp, 0); // Executes the job. JobUtils.runJob(hcc, spec, true); }
secondaryFileSplitProvider = secondarySplitsAndConstraint.first; secondaryPartitionConstraint = secondarySplitsAndConstraint.second; numPrimaryKeys = dataset.getPrimaryKeys().size(); if (dataset.getDatasetType() == DatasetType.INTERNAL) { filterFieldName = DatasetUtil.getFilterField(dataset);
return FilesIndexDescription.FILES_INDEX_COMP_FACTORIES; int numPrimaryKeys = dataset.getPrimaryKeys().size(); int numSecondaryKeys = index.getKeyFieldNames().size(); IBinaryComparatorFactoryProvider cmpFactoryProvider =