public static int getRIDSize(Dataset dataset) { ExternalDatasetDetails dsd = (ExternalDatasetDetails) dataset.getDatasetDetails(); return IndexingConstants.getRIDSize(dsd.getProperties().get(IndexingConstants.KEY_INPUT_FORMAT)); }
public static IBinaryComparatorFactory[] getComparatorFactories(Dataset dataset) { ExternalDatasetDetails dsd = (ExternalDatasetDetails) dataset.getDatasetDetails(); return IndexingConstants.getComparatorFactories(dsd.getProperties().get(IndexingConstants.KEY_INPUT_FORMAT)); }
private static void appendExternalRecPrimaryKeys(Dataset dataset, List<Object> target) throws AlgebricksException { int numPrimaryKeys = IndexingConstants.getRIDSize(((ExternalDatasetDetails) dataset.getDatasetDetails()).getProperties()); for (int i = 0; i < numPrimaryKeys; i++) { target.add(IndexingConstants.getFieldType(i)); } }
public static List<ExternalFile> getSnapshotFromExternalFileSystem(Dataset dataset) throws AlgebricksException { ArrayList<ExternalFile> files = new ArrayList<>(); ExternalDatasetDetails datasetDetails = (ExternalDatasetDetails) dataset.getDatasetDetails(); try { // Create the file system object FileSystem fs = getFileSystemObject(datasetDetails.getProperties()); // Get paths of dataset String path = datasetDetails.getProperties().get(ExternalDataConstants.KEY_PATH); String[] paths = path.split(","); // Add fileStatuses to files for (String aPath : paths) { FileStatus[] fileStatuses = fs.listStatus(new Path(aPath)); for (int i = 0; i < fileStatuses.length; i++) { int nextFileNumber = files.size(); handleFile(dataset, files, fs, fileStatuses[i], nextFileNumber); } } // Close file system fs.close(); if (files.isEmpty()) { throw new AlgebricksException("File Snapshot retrieved from external file system is empty"); } return files; } catch (Exception e) { LOGGER.warn("Exception while trying to get snapshot from external system", e); throw new AlgebricksException("Unable to get list of HDFS files " + e); } }
@Override public List<List<String>> getPrimaryKeys() { if (getDatasetType() == DatasetType.EXTERNAL) { return IndexingConstants.getRIDKeys(((ExternalDatasetDetails) getDatasetDetails()).getProperties()); } return ((InternalDatasetDetails) getDatasetDetails()).getPartitioningKey(); }
/** * Creates output variables for the given unnest-map or left-outer-unnestmap operator * that does a secondary index lookup. * The order: SK, PK, [Optional: the result of a instantTryLock on PK] */ public static void appendSecondaryIndexOutputVars(Dataset dataset, ARecordType recordType, ARecordType metaRecordType, Index index, IOptimizationContext context, List<LogicalVariable> dest, boolean requireResultOfInstantTryLock) throws AlgebricksException { int numPrimaryKeys; if (dataset.getDatasetType() == DatasetType.EXTERNAL) { numPrimaryKeys = IndexingConstants .getRIDSize(((ExternalDatasetDetails) dataset.getDatasetDetails()).getProperties()); } else { numPrimaryKeys = dataset.getPrimaryKeys().size(); } int numSecondaryKeys = KeyFieldTypeUtil.getNumSecondaryKeys(index, recordType, metaRecordType); // In case of an inverted-index search, secondary keys will not be generated. int numVars = isInvertedIndex(index) ? numPrimaryKeys : numPrimaryKeys + numSecondaryKeys; // If it's an index-only plan, add one more variable to put the result of instantTryLock on PK - // whether this lock can be granted on a primary key. // If it is granted, then we don't need to do a post verification (select). // If it is not granted, then we need to do a secondary index lookup, do a primary index lookup, and select. if (requireResultOfInstantTryLock) { numVars += 1; } for (int i = 0; i < numVars; i++) { dest.add(context.newVar()); } }
if (dataset.getDatasetType() == DatasetType.EXTERNAL) { numPrimaryKeys = IndexingConstants .getRIDSize(((ExternalDatasetDetails) dataset.getDatasetDetails()).getProperties()); } else { numPrimaryKeys = dataset.getPrimaryKeys().size();
/** * This method create an indexing operator that index records in HDFS * * @param jobSpec * @param itemType * @param dataset * @param files * @param indexerDesc * @param sourceLoc * @return * @throws AlgebricksException * @throws HyracksDataException * @throws Exception */ private static Pair<ExternalScanOperatorDescriptor, AlgebricksPartitionConstraint> getIndexingOperator( MetadataProvider metadataProvider, JobSpecification jobSpec, IAType itemType, Dataset dataset, List<ExternalFile> files, RecordDescriptor indexerDesc, SourceLocation sourceLoc) throws HyracksDataException, AlgebricksException { ExternalDatasetDetails externalDatasetDetails = (ExternalDatasetDetails) dataset.getDatasetDetails(); Map<String, String> configuration = externalDatasetDetails.getProperties(); IAdapterFactory adapterFactory = AdapterFactoryProvider.getIndexingAdapterFactory( metadataProvider.getApplicationContext().getServiceContext(), externalDatasetDetails.getAdapter(), configuration, (ARecordType) itemType, files, true, null); ExternalScanOperatorDescriptor scanOp = new ExternalScanOperatorDescriptor(jobSpec, indexerDesc, adapterFactory); scanOp.setSourceLocation(sourceLoc); return new Pair<>(scanOp, adapterFactory.getPartitionConstraint()); }
public static Dataset createTransactionDataset(Dataset dataset) { ExternalDatasetDetails originalDsd = (ExternalDatasetDetails) dataset.getDatasetDetails(); ExternalDatasetDetails dsd = new ExternalDatasetDetails(originalDsd.getAdapter(), originalDsd.getProperties(), originalDsd.getTimestamp(), TransactionState.BEGIN); return new Dataset(dataset.getDataverseName(), dataset.getDatasetName(), dataset.getItemTypeDataverseName(), dataset.getItemTypeName(), dataset.getNodeGroupName(), dataset.getCompactionPolicy(), dataset.getCompactionPolicyProperties(), dsd, dataset.getHints(), DatasetType.EXTERNAL, dataset.getDatasetId(), dataset.getPendingOp()); }
ExternalDatasetDetails datasetDetails = (ExternalDatasetDetails) dataset.getDatasetDetails(); LookupAdapterFactory<?> adapterFactory = AdapterFactoryProvider.getLookupAdapterFactory( getApplicationContext().getServiceContext(), datasetDetails.getProperties(), itemType, ridIndexes, retainInput, retainMissing, context.getMissingWriterFactory()); String fileIndexName = IndexingConstants.getFilesIndexName(dataset.getDatasetName());
edd.getAdapter(), edd.getProperties(), (ARecordType) itemType, null); return metadataProvider.buildExternalDatasetDataScannerRuntime(jobSpec, itemType, adapterFactory); case INTERNAL: