public static SearchQuery getSplitsQuery(DatasetConfig datasetConfig) { Preconditions.checkNotNull(datasetConfig.getReadDefinition()); long splitVersion = Preconditions.checkNotNull(datasetConfig.getReadDefinition().getSplitVersion()); return getSplitsQuery(datasetConfig.getId(), splitVersion); }
final DatasetSplitId splitId = DatasetSplitId.of(dataset, newSplit, dataset.getReadDefinition().getSplitVersion()); newSplit.setSplitVersion(dataset.getReadDefinition().getSplitVersion()); builder.put(splitId, newSplit);
/** * Create a range for the current split version of the given dataset * * @param datasetConfig the dataset config * @return a range which would contain all split ids for this dataset and its current split version */ public static Range<DatasetSplitId> getCurrentSplitRange(DatasetConfig datasetConfig){ final long splitVersion = datasetConfig.getReadDefinition().getSplitVersion(); return getSplitRange(datasetConfig.getId(), splitVersion); }
public static SplitsPointer of(NamespaceService namespaceService, DatasetConfig datasetConfig) { final EntityId datasetId = Preconditions.checkNotNull(datasetConfig.getId()); final ReadDefinition readDefinition = Preconditions.checkNotNull(datasetConfig.getReadDefinition()); final long splitVersion = readDefinition.getSplitVersion(); int splitsCount = namespaceService.getSplitCount(new FindByCondition().setCondition(DatasetSplitId.getSplitsQuery(datasetConfig))); return new DatasetSplitsPointer(namespaceService, datasetId, splitVersion, splitsCount); }
/** * UNSAFE! Use {@code DatasetSplitId#getSplitRange(EntityId, long)} instead */ public static FindByRange<DatasetSplitId> unsafeGetSplitsRange(DatasetConfig config) { final long splitVersion = config.getReadDefinition().getSplitVersion(); final long nextSplitVersion = splitVersion + 1; final String datasetId = config.getId().getId(); // Unsafe way of constructing dataset split id!!! final DatasetSplitId start = new DatasetSplitId(SPLIT_ID_JOINER.join(datasetId, splitVersion, ""), datasetId, splitVersion, ""); final DatasetSplitId end = new DatasetSplitId(SPLIT_ID_JOINER.join(datasetId, nextSplitVersion, ""), datasetId, splitVersion, ""); return new FindByRange<DatasetSplitId>() .setStart(start, true) .setEnd(end, false); } /**
private void fixSplits(final KVStore<DatasetSplitId, DatasetSplit> splitsStore, DatasetConfig config) { final long version = config.getReadDefinition().getSplitVersion(); // Get old splits final FindByRange<DatasetSplitId> query = DatasetSplitId.unsafeGetSplitsRange(config); for (Entry<DatasetSplitId, DatasetSplit> entry : splitsStore.find(query)) { final DatasetSplitId oldId = entry.getKey(); final DatasetSplit split = entry.getValue(); // Generate new Id and compare with old id final DatasetSplitId newId = DatasetSplitId.of(config, split, version); if (oldId.equals(newId)) { continue; } // Delete the previous entry and add a new one splitsStore.delete(oldId); splitsStore.put(newId, split.setVersion(null)); } }
final DatasetConfig dataset = container.getDataset(); final ReadDefinition readDefinition = dataset.getReadDefinition(); if (readDefinition == null || readDefinition.getSplitVersion() == null) { continue;
Preconditions.checkNotNull(dataset.getReadDefinition()); if (dataset.getReadDefinition() != null && dataset.getReadDefinition().getSplitVersion() != null && !compareSplits(dataset, splits, splitsStore.find(DatasetSplitId.getSplitsRange(dataset)))) { addOrUpdateDataset(datasetPath, dataset, attributes); existingDatasetConfig.getReadDefinition().getSplitVersion() != null && existingDatasetConfig.getReadDefinition().getSplitVersion() > nextSplitVersion) { deleteSplits(splitIds); break;
@Override public void upgrade(UpgradeContext context) throws Exception { final KVStoreProvider storeProvider = context.getKVStoreProvider(); final KVStore<byte[], NameSpaceContainer> namespace = storeProvider.getStore(NamespaceServiceImpl.NamespaceStoreCreator.class); final KVStore<DatasetSplitId, DatasetSplit> splitsStore = storeProvider.getStore(NamespaceServiceImpl.DatasetSplitCreator.class); int fixedSplitIds = 0; // namespace#find() returns entries ordered by depth, so sources will // be processed before folders, which will be processed before datasets for(Map.Entry<byte[], NameSpaceContainer> entry: namespace.find()) { final NameSpaceContainer container = entry.getValue(); if (container.getType() != NameSpaceContainer.Type.DATASET) { continue; } DatasetConfig config = entry.getValue().getDataset(); if (config.getType() == DatasetType.VIRTUAL_DATASET) { continue; } if (config.getReadDefinition() == null || config.getReadDefinition().getSplitVersion() == null) { continue; } if (!DatasetSplitId.mayRequireNewDatasetId(config)) { // Datasets which do not contain reserved characters are fine continue; } fixSplits(splitsStore, config); } System.out.printf(" Updated %d dataset splits with new ids.%n", fixedSplitIds); }
public static DatasetSplitId of(DatasetConfig config, String key) { return DatasetSplitId.ofUnsafe(config.getId(), config.getReadDefinition().getSplitVersion(), key); }
assertTrue(!NamespaceServiceImpl.compareSplits(datasetConfig, splits, ns.findSplits(new IndexedStore.FindByCondition().setCondition(DatasetSplitId.getSplitsQuery(datasetConfig))))); Long newSplitVersion = datasetConfig.getReadDefinition().getSplitVersion(); assertTrue(newSplitVersion > lastSplitVersion); lastSplitVersion = newSplitVersion; assertTrue(!NamespaceServiceImpl.compareSplits(datasetConfig, splits, ns.findSplits(new IndexedStore.FindByCondition().setCondition(DatasetSplitId.getSplitsQuery(datasetConfig))))); assertEquals(newSplitVersion, datasetConfig.getReadDefinition().getSplitVersion()); assertTrue(!NamespaceServiceImpl.compareSplits(datasetConfig, splits, ns.findSplits(new IndexedStore.FindByCondition().setCondition(DatasetSplitId.getSplitsQuery(datasetConfig))))); newSplitVersion = datasetConfig.getReadDefinition().getSplitVersion(); assertTrue(newSplitVersion > lastSplitVersion); lastSplitVersion = newSplitVersion; assertTrue(!NamespaceServiceImpl.compareSplits(datasetConfig, splits, ns.findSplits(new IndexedStore.FindByCondition().setCondition(DatasetSplitId.getSplitsQuery(datasetConfig))))); newSplitVersion = datasetConfig.getReadDefinition().getSplitVersion(); assertTrue(newSplitVersion > lastSplitVersion); lastSplitVersion = newSplitVersion; assertTrue(!NamespaceServiceImpl.compareSplits(datasetConfig, splits, ns.findSplits(new IndexedStore.FindByCondition().setCondition(DatasetSplitId.getSplitsQuery(datasetConfig))))); newSplitVersion = datasetConfig.getReadDefinition().getSplitVersion(); assertTrue(newSplitVersion > lastSplitVersion);