public static List<String> getFilterField(Dataset dataset) { return ((InternalDatasetDetails) dataset.getDatasetDetails()).getFilterField(); }
public static IBinaryComparatorFactory[] getComparatorFactories(Dataset dataset) { ExternalDatasetDetails dsd = (ExternalDatasetDetails) dataset.getDatasetDetails(); return IndexingConstants.getComparatorFactories(dsd.getProperties().get(IndexingConstants.KEY_INPUT_FORMAT)); }
public static int getRIDSize(Dataset dataset) { ExternalDatasetDetails dsd = (ExternalDatasetDetails) dataset.getDatasetDetails(); return IndexingConstants.getRIDSize(dsd.getProperties().get(IndexingConstants.KEY_INPUT_FORMAT)); }
@Override public List<List<String>> getPrimaryKeys() { if (getDatasetType() == DatasetType.EXTERNAL) { return IndexingConstants.getRIDKeys(((ExternalDatasetDetails) getDatasetDetails()).getProperties()); } return ((InternalDatasetDetails) getDatasetDetails()).getPartitioningKey(); }
private void fillPKVars(DataSourceScanOperator dataScan, List<LogicalVariable> pkVars) { pkVars.clear(); DatasetDataSource datasetDataSource = (DatasetDataSource) dataScan.getDataSource(); pkVars.clear(); if (datasetDataSource.getDataset().getDatasetDetails() instanceof InternalDatasetDetails) { int numPKs = datasetDataSource.getDataset().getPrimaryKeys().size(); for (int i = 0; i < numPKs; i++) { pkVars.add(dataScan.getVariables().get(i)); } } }
private static void appendExternalRecPrimaryKeys(Dataset dataset, List<Object> target) throws AlgebricksException { int numPrimaryKeys = IndexingConstants.getRIDSize(((ExternalDatasetDetails) dataset.getDatasetDetails()).getProperties()); for (int i = 0; i < numPrimaryKeys; i++) { target.add(IndexingConstants.getFieldType(i)); } }
public static int getPositionOfPartitioningKeyField(Dataset dataset, List<String> fieldExpr, boolean fieldFromMeta) { List<Integer> keySourceIndicator = null; IDatasetDetails datasetDetails = dataset.getDatasetDetails(); if (datasetDetails.getDatasetType() == DatasetType.INTERNAL) { keySourceIndicator = ((InternalDatasetDetails) datasetDetails).getKeySourceIndicator(); } List<List<String>> partitioningKeys = dataset.getPrimaryKeys(); for (int i = 0; i < partitioningKeys.size(); i++) { List<String> partitioningKey = partitioningKeys.get(i); if (partitioningKey.equals(fieldExpr) && keySourceMatches(keySourceIndicator, i, fieldFromMeta)) { return i; } } return -1; }
/** * Get the types of primary key (partitioning key) fields * * @param dataset, * the dataset to consider. * @param recordType, * the main record type. * @param metaRecordType * the auxiliary meta record type. * @return a list of IATypes, one for each corresponding primary key field. * @throws AlgebricksException */ public static List<IAType> getPartitoningKeyTypes(Dataset dataset, ARecordType recordType, ARecordType metaRecordType) throws AlgebricksException { if (dataset.getDatasetType() != DatasetType.INTERNAL) { return null; } InternalDatasetDetails datasetDetails = (InternalDatasetDetails) dataset.getDatasetDetails(); return getPartitioningKeyTypes(datasetDetails, recordType, metaRecordType); }
public LoadableDataSource(Dataset targetDataset, IAType itemType, IAType metaItemType, String adapter, Map<String, String> properties) throws AlgebricksException, IOException { super(new DataSourceId("loadable_dv", "loadable_ds"), itemType, metaItemType, Type.LOADABLE, null); this.targetDataset = targetDataset; this.adapter = adapter; this.adapterProperties = properties; partitioningKeys = targetDataset.getPrimaryKeys(); ARecordType recType = (ARecordType) itemType; isPKAutoGenerated = ((InternalDatasetDetails) targetDataset.getDatasetDetails()).isAutogenerated(); if (isPKAutoGenerated) { // Since the key is auto-generated, we need to use another // record type (possibly nested) which has all fields except the PK recType = getStrippedPKType(new LinkedList<>(partitioningKeys.get(0)), recType); } schemaTypes = new IAType[] { recType }; }
protected void writeDatasetDetailsRecordType(IARecordBuilder recordBuilder, Dataset dataset, DataOutput dataOutput) throws HyracksDataException { dataset.getDatasetDetails().writeDatasetDetailsRecordType(fieldValue.getDataOutput()); switch (dataset.getDatasetType()) { case INTERNAL: recordBuilder.addField(MetadataRecordTypes.DATASET_ARECORD_INTERNALDETAILS_FIELD_INDEX, fieldValue); break; case EXTERNAL: recordBuilder.addField(MetadataRecordTypes.DATASET_ARECORD_EXTERNALDETAILS_FIELD_INDEX, fieldValue); break; } }
@Override public void computeDeliveredProperties(ILogicalOperator op, IOptimizationContext context) throws AlgebricksException { DataSource ds = new DatasetDataSource(datasetId, dataset, recordType, null /*external dataset doesn't have meta records.*/, Type.EXTERNAL_DATASET, dataset.getDatasetDetails(), context.getComputationNodeDomain()); IDataSourcePropertiesProvider dspp = ds.getPropertiesProvider(); AbstractScanOperator as = (AbstractScanOperator) op; deliveredProperties = dspp.computePropertiesVector(as.getVariables()); }
public ITypeTraits[] getPrimaryTypeTraits(MetadataProvider metadataProvider, ARecordType recordType, ARecordType metaType) throws AlgebricksException { IStorageComponentProvider storageComponentProvider = metadataProvider.getStorageComponentProvider(); ITypeTraitProvider ttProvider = storageComponentProvider.getTypeTraitProvider(); List<List<String>> partitioningKeys = getPrimaryKeys(); int numPrimaryKeys = partitioningKeys.size(); ITypeTraits[] typeTraits = new ITypeTraits[numPrimaryKeys + 1 + (hasMetaPart() ? 1 : 0)]; List<Integer> indicators = null; if (hasMetaPart()) { indicators = ((InternalDatasetDetails) getDatasetDetails()).getKeySourceIndicator(); } for (int i = 0; i < numPrimaryKeys; i++) { IAType keyType = datasetType == DatasetType.EXTERNAL ? IndexingConstants.getFieldType(i) : (indicators == null || indicators.get(i) == 0) ? recordType.getSubFieldType(partitioningKeys.get(i)) : metaType.getSubFieldType(partitioningKeys.get(i)); typeTraits[i] = ttProvider.getTypeTrait(keyType); } typeTraits[numPrimaryKeys] = ttProvider.getTypeTrait(recordType); if (hasMetaPart()) { typeTraits[numPrimaryKeys + 1] = ttProvider.getTypeTrait(metaType); } return typeTraits; }
public static Index getPrimaryIndex(Dataset dataset) { InternalDatasetDetails id = (InternalDatasetDetails) dataset.getDatasetDetails(); return new Index(dataset.getDataverseName(), dataset.getDatasetName(), dataset.getDatasetName(), DatasetConfig.IndexType.BTREE, id.getPartitioningKey(), id.getKeySourceIndicator(), id.getPrimaryKeyType(), false, false, true, dataset.getPendingOp()); }
private void setPrimaryRecDescAndComparators() throws AlgebricksException { List<List<String>> partitioningKeys = dataset.getPrimaryKeys(); ISerializerDeserializer[] primaryRecFields = new ISerializerDeserializer[numPrimaryKeys + 1 + (dataset.hasMetaPart() ? 1 : 0)]; ITypeTraits[] primaryTypeTraits = new ITypeTraits[numPrimaryKeys + 1 + (dataset.hasMetaPart() ? 1 : 0)]; primaryComparatorFactories = new IBinaryComparatorFactory[numPrimaryKeys]; primaryBloomFilterKeyFields = new int[numPrimaryKeys]; ISerializerDeserializerProvider serdeProvider = metadataProvider.getDataFormat().getSerdeProvider(); List<Integer> indicators = null; if (dataset.hasMetaPart()) { indicators = ((InternalDatasetDetails) dataset.getDatasetDetails()).getKeySourceIndicator(); } for (int i = 0; i < numPrimaryKeys; i++) { IAType keyType = (indicators == null || indicators.get(i) == 0) ? itemType.getSubFieldType(partitioningKeys.get(i)) : metaType.getSubFieldType(partitioningKeys.get(i)); primaryRecFields[i] = serdeProvider.getSerializerDeserializer(keyType); primaryComparatorFactories[i] = BinaryComparatorFactoryProvider.INSTANCE.getBinaryComparatorFactory(keyType, true); primaryTypeTraits[i] = TypeTraitProvider.INSTANCE.getTypeTrait(keyType); primaryBloomFilterKeyFields[i] = i; } primaryRecFields[numPrimaryKeys] = payloadSerde; primaryTypeTraits[numPrimaryKeys] = TypeTraitProvider.INSTANCE.getTypeTrait(itemType); if (dataset.hasMetaPart()) { primaryRecFields[numPrimaryKeys + 1] = payloadSerde; primaryTypeTraits[numPrimaryKeys + 1] = TypeTraitProvider.INSTANCE.getTypeTrait(metaType); } primaryRecDesc = new RecordDescriptor(primaryRecFields, primaryTypeTraits); }
/** * This method create an indexing operator that index records in HDFS * * @param jobSpec * @param itemType * @param dataset * @param files * @param indexerDesc * @param sourceLoc * @return * @throws AlgebricksException * @throws HyracksDataException * @throws Exception */ private static Pair<ExternalScanOperatorDescriptor, AlgebricksPartitionConstraint> getIndexingOperator( MetadataProvider metadataProvider, JobSpecification jobSpec, IAType itemType, Dataset dataset, List<ExternalFile> files, RecordDescriptor indexerDesc, SourceLocation sourceLoc) throws HyracksDataException, AlgebricksException { ExternalDatasetDetails externalDatasetDetails = (ExternalDatasetDetails) dataset.getDatasetDetails(); Map<String, String> configuration = externalDatasetDetails.getProperties(); IAdapterFactory adapterFactory = AdapterFactoryProvider.getIndexingAdapterFactory( metadataProvider.getApplicationContext().getServiceContext(), externalDatasetDetails.getAdapter(), configuration, (ARecordType) itemType, files, true, null); ExternalScanOperatorDescriptor scanOp = new ExternalScanOperatorDescriptor(jobSpec, indexerDesc, adapterFactory); scanOp.setSourceLocation(sourceLoc); return new Pair<>(scanOp, adapterFactory.getPartitionConstraint()); }
public static Dataset createTransactionDataset(Dataset dataset) { ExternalDatasetDetails originalDsd = (ExternalDatasetDetails) dataset.getDatasetDetails(); ExternalDatasetDetails dsd = new ExternalDatasetDetails(originalDsd.getAdapter(), originalDsd.getProperties(), originalDsd.getTimestamp(), TransactionState.BEGIN); return new Dataset(dataset.getDataverseName(), dataset.getDatasetName(), dataset.getItemTypeDataverseName(), dataset.getItemTypeName(), dataset.getNodeGroupName(), dataset.getCompactionPolicy(), dataset.getCompactionPolicyProperties(), dsd, dataset.getHints(), DatasetType.EXTERNAL, dataset.getDatasetId(), dataset.getPendingOp()); }
public static DataSource lookupSourceInMetadata(IClusterStateManager clusterStateManager, MetadataTransactionContext mdTxnCtx, DataSourceId aqlId) throws AlgebricksException { Dataset dataset = findDataset(mdTxnCtx, aqlId.getDataverseName(), aqlId.getDatasourceName()); if (dataset == null) { throw new AlgebricksException("Datasource with id " + aqlId + " was not found."); } IAType itemType = findType(mdTxnCtx, dataset.getItemTypeDataverseName(), dataset.getItemTypeName()); IAType metaItemType = findType(mdTxnCtx, dataset.getMetaItemTypeDataverseName(), dataset.getMetaItemTypeName()); INodeDomain domain = findNodeDomain(clusterStateManager, mdTxnCtx, dataset.getNodeGroupName()); byte datasourceType = dataset.getDatasetType().equals(DatasetType.EXTERNAL) ? DataSource.Type.EXTERNAL_DATASET : DataSource.Type.INTERNAL_DATASET; return new DatasetDataSource(aqlId, dataset, itemType, metaItemType, datasourceType, dataset.getDatasetDetails(), domain); } }
private DatasetDataSource validateDatasetInfo(MetadataProvider metadataProvider, String dataverseName, String datasetName, SourceLocation sourceLoc) throws AlgebricksException { Dataset dataset = metadataProvider.findDataset(dataverseName, datasetName); if (dataset == null) { throw new CompilationException(ErrorCode.UNKNOWN_DATASET_IN_DATAVERSE, sourceLoc, datasetName, dataverseName); } if (dataset.getDatasetType() == DatasetType.EXTERNAL) { throw new CompilationException(ErrorCode.COMPILATION_ERROR, sourceLoc, "Cannot write output to an external dataset."); } DataSourceId sourceId = new DataSourceId(dataverseName, datasetName); IAType itemType = metadataProvider.findType(dataset.getItemTypeDataverseName(), dataset.getItemTypeName()); IAType metaItemType = metadataProvider.findType(dataset.getMetaItemTypeDataverseName(), dataset.getMetaItemTypeName()); INodeDomain domain = metadataProvider.findNodeDomain(dataset.getNodeGroupName()); return new DatasetDataSource(sourceId, dataset, itemType, metaItemType, DataSource.Type.INTERNAL_DATASET, dataset.getDatasetDetails(), domain); }
@Override public void addDataset(TxnId txnId, Dataset dataset) throws AlgebricksException, RemoteException { try { // Insert into the 'dataset' dataset. DatasetTupleTranslator tupleReaderWriter = tupleTranslatorProvider.getDatasetTupleTranslator(true); ITupleReference datasetTuple = tupleReaderWriter.getTupleFromMetadataEntity(dataset); insertTupleIntoIndex(txnId, MetadataPrimaryIndexes.DATASET_DATASET, datasetTuple); if (dataset.getDatasetType() == DatasetType.INTERNAL) { // Add the primary index for the dataset. InternalDatasetDetails id = (InternalDatasetDetails) dataset.getDatasetDetails(); Index primaryIndex = new Index(dataset.getDataverseName(), dataset.getDatasetName(), dataset.getDatasetName(), IndexType.BTREE, id.getPrimaryKey(), id.getKeySourceIndicator(), id.getPrimaryKeyType(), false, false, true, dataset.getPendingOp()); addIndex(txnId, primaryIndex); } } catch (HyracksDataException e) { if (e.getComponent().equals(ErrorCode.HYRACKS) && e.getErrorCode() == ErrorCode.DUPLICATE_KEY) { throw new AlgebricksException("A dataset with this name " + dataset.getDatasetName() + " already exists in dataverse '" + dataset.getDataverseName() + "'.", e); } else { throw new AlgebricksException(e); } } }
private void addDataset(ICcApplicationContext appCtx, Dataset source, int datasetPostfix, boolean abort) throws Exception { Dataset dataset = new Dataset(source.getDataverseName(), "ds_" + datasetPostfix, source.getDataverseName(), source.getDatasetType().name(), source.getNodeGroupName(), NoMergePolicyFactory.NAME, null, source.getDatasetDetails(), source.getHints(), DatasetConfig.DatasetType.INTERNAL, datasetPostfix, 0); MetadataProvider metadataProvider = new MetadataProvider(appCtx, null); final MetadataTransactionContext writeTxn = MetadataManager.INSTANCE.beginTransaction(); metadataProvider.setMetadataTxnContext(writeTxn); try { MetadataManager.INSTANCE.addDataset(writeTxn, dataset); if (abort) { MetadataManager.INSTANCE.abortTransaction(writeTxn); } else { MetadataManager.INSTANCE.commitTransaction(writeTxn); } } finally { metadataProvider.getLocks().unlock(); } } }