public static DatasetConfig toDatasetConfig(FileConfig fileConfig, DatasetType datasetType, String owner, EntityId id) { final DatasetConfig datasetConfig = new DatasetConfig(); datasetConfig.setOwner(owner); datasetConfig.setFullPathList(fileConfig.getFullPathList()); datasetConfig.setName(fileConfig.getName()); datasetConfig.setOwner(fileConfig.getOwner()); datasetConfig.setTag(fileConfig.getTag()); datasetConfig.setType(datasetType); datasetConfig.setCreatedAt(fileConfig.getCtime()); datasetConfig.setId(id); datasetConfig.setPhysicalDataset(new com.dremio.service.namespace.dataset.proto.PhysicalDataset().setFormatSettings(fileConfig)); return datasetConfig; }
/** * Carry over few properties from old dataset config to new one * @param oldConfig old dataset config from namespace * @param newConfig new dataset config thats about to be saved in namespace */ public static void copyFromOldConfig(DatasetConfig oldConfig, DatasetConfig newConfig) { if(oldConfig == null) { return; } newConfig.setId(oldConfig.getId()); newConfig.setTag(oldConfig.getTag()); newConfig.setCreatedAt(oldConfig.getCreatedAt()); newConfig.setType(oldConfig.getType()); newConfig.setFullPathList(oldConfig.getFullPathList()); newConfig.setOwner(oldConfig.getOwner()); // make sure to copy the acceleration settings from old to new config // newConfig may contain upgrade fileFormat physical settings if (oldConfig.getPhysicalDataset() != null) { if (newConfig.getPhysicalDataset() == null) { newConfig.setPhysicalDataset(new PhysicalDataset()); } } }
private void saveInHomeSpace(NamespaceService namespaceService, SourceTableDefinition accessor, DatasetConfig nsConfig) { Preconditions.checkNotNull(nsConfig); final NamespaceKey key = new NamespaceKey(nsConfig.getFullPathList()); try{ // use key from namespace config DatasetConfig srcConfig = accessor.getDataset(); if (nsConfig.getId() == null) { nsConfig.setId(srcConfig.getId()); } // Merge namespace config with config obtained from underlying filesystem used to store user uploaded files. // Set schema, read definition and state from source accessor nsConfig.setRecordSchema(srcConfig.getRecordSchema()); nsConfig.setSchemaVersion(srcConfig.getSchemaVersion()); nsConfig.setReadDefinition(srcConfig.getReadDefinition()); // get splits from source List<DatasetSplit> splits = accessor.getSplits(); namespaceService.addOrUpdateDataset(key, nsConfig, splits); }catch(Exception ex){ logger.warn("Failure while retrieving and saving dataset {}.", key, ex); } }
public DatasetDetails( DatasetConfig datasetConfig, int jobCount, int descendants, DatasetContainer parentDatasetContainer ) { this.displayFullPath = datasetConfig.getFullPathList(); this.owner = datasetConfig.getOwner(); this.jobCount = jobCount; this.descendants = descendants; this.createdAt = datasetConfig.getCreatedAt(); this.parentDatasetContainer = parentDatasetContainer; }
public ResourceTreeEntity(DatasetConfig datasetConfig) throws UnsupportedEncodingException { // TODO File system folder datasets can further be explored. this.type = getResourceType(datasetConfig.getType()); this.name = datasetConfig.getName(); this.fullPath = datasetConfig.getFullPathList(); this.url = null; }
@Override public DatasetConfig getDataset() throws Exception { final DatasetConfig dataset; if(oldDataset == null) { dataset = new DatasetConfig() .setFullPathList(key.getPathComponents()) .setId(new EntityId(UUID.randomUUID().toString())) .setType(DatasetType.PHYSICAL_DATASET); } else { dataset = oldDataset; } return dataset .setName(key.getName()) .setReadDefinition(new ReadDefinition() .setScanStats(new ScanStats().setRecordCount(100l) .setScanFactor(ScanCostFactor.OTHER.getFactor()))) .setOwner(SystemUser.SYSTEM_USERNAME) .setPhysicalDataset(new PhysicalDataset()) .setRecordSchema(getSchema().toByteString()) .setSchemaVersion(DatasetHelper.CURRENT_VERSION); }
public static VirtualDatasetVersion toVirtualDatasetVersion(VirtualDatasetUI virtualDatasetUI) { final VirtualDatasetVersion vvds = new VirtualDatasetVersion(); final DatasetConfig datasetConfig = new DatasetConfig(); final VirtualDataset virtualDataset = toVirtualDataset(virtualDatasetUI); vvds.setLastTransform(virtualDatasetUI.getLastTransform()); vvds.setState(virtualDatasetUI.getState()); vvds.setPreviousVersion(virtualDatasetUI.getPreviousVersion()); vvds.setNamed(virtualDatasetUI.getIsNamed()); vvds.setDerivation(virtualDatasetUI.getDerivation()); datasetConfig.setName(virtualDatasetUI.getName()); datasetConfig.setOwner(virtualDatasetUI.getOwner()); datasetConfig.setType(DatasetType.VIRTUAL_DATASET); datasetConfig.setCreatedAt(virtualDatasetUI.getCreatedAt()); datasetConfig.setFullPathList(virtualDatasetUI.getFullPathList()); datasetConfig.setTag(virtualDatasetUI.getSavedTag()); datasetConfig.setVirtualDataset(virtualDataset); datasetConfig.setRecordSchema(virtualDatasetUI.getRecordSchema()); if (virtualDatasetUI.getId() != null) { datasetConfig.setId(new EntityId(virtualDatasetUI.getId())); } vvds.setDataset(datasetConfig); return vvds; }
public static void addDS(NamespaceService ns, String name) throws Exception { final NamespaceKey dsPath = new NamespaceKey(PathUtils.parseFullPath(name)); final DatasetConfig ds = new DatasetConfig(); final VirtualDataset vds = new VirtualDataset(); vds.setVersion(DatasetVersion.newVersion()); ds.setType(DatasetType.VIRTUAL_DATASET); ds.setVirtualDataset(vds); ds.setFullPathList(dsPath.getPathComponents()); ds.setName(dsPath.getName()); ns.addOrUpdateDataset(dsPath, ds); }
dataset = oldConfig; }else { dataset = new DatasetConfig() .setPhysicalDataset(new PhysicalDataset()) .setId(new EntityId().setId(UUID.randomUUID().toString())) .setFullPathList(name.getPathComponents()) .setType(DatasetType.PHYSICAL_DATASET) .setName(typeName); dataset.setReadDefinition(new ReadDefinition()); final SchemaMerger merger = new SchemaMerger(new NamespaceKey(dataset.getFullPathList()).toString()); final BatchSchema schema; if(oldConfig == null || DatasetHelper.getSchemaBytes(oldConfig) == null){ mergeResult = merger.merge(mapping, sampledSchema); dataset.setRecordSchema(ByteString.copyFrom(mergeResult.getSchema().toByteString().toByteArray())); .setExtendedProperty(ByteString.copyFrom(tableAttributes.toByteArray())) .setSplitVersion(0L); dataset.setReadDefinition(readDefinition);
.setFullPathList(datasetPath.getPathComponents()) .setType(DatasetType.PHYSICAL_DATASET) .setName(tableName) .setOwner(user) .setPhysicalDataset(new PhysicalDataset()) .setRecordSchema(batchSchema.toByteString()) .setReadDefinition(new ReadDefinition() .setPartitionColumnsList(partitionColumns) .setSortColumnsList(FluentIterable.from(table.getSd().getSortCols()) HiveReaderProtoUtil.encodePropertiesAsDictionary(tableExtended); datasetConfig.getReadDefinition().setExtendedProperty(ByteString.copyFrom(tableExtended.build().toByteArray()));
void shallowSave(SourceTableDefinition accessor) throws NamespaceException{ NamespaceKey key = accessor.getName(); DatasetConfig shallow = new DatasetConfig(); shallow.setId(new EntityId().setId(UUID.randomUUID().toString())); shallow.setCreatedAt(System.currentTimeMillis()); shallow.setName(key.getName()); shallow.setFullPathList(key.getPathComponents()); shallow.setType(accessor.getType()); shallow.setSchemaVersion(DatasetHelper.CURRENT_VERSION); systemUserNamespaceService.addOrUpdateDataset(key, shallow); }
private DatasetConfig saveDataset(List<String> path, DatasetType type, Function<DatasetConfig, DatasetConfig> transformer) throws NamespaceException { final NamespaceKey key = new NamespaceKey(path); final byte[] binaryKey = NamespaceServiceImpl.getKey(key); final Optional<DatasetConfig> oldDataset = Optional.ofNullable(namespaceStore.get(binaryKey)).map(NameSpaceContainer::getDataset); final DatasetConfig datasetConfig = transformer.apply(new DatasetConfig() .setId(oldDataset.map(DatasetConfig::getId).orElse(new EntityId().setId(UUID.randomUUID().toString()))) .setName(path.get(path.size() - 1)) .setFullPathList(path) .setType(type) .setTag(oldDataset.map(DatasetConfig::getTag).orElse(null)) .setOwner("dremio")); final NameSpaceContainer container = new NameSpaceContainer() .setType(NameSpaceContainer.Type.DATASET) .setFullPathList(path) .setDataset(datasetConfig); namespaceStore.put(binaryKey, container); return datasetConfig; }
private DatasetConfig addDataset(IndexedStore<byte[], NameSpaceContainer> namespace, IndexedStore<DatasetSplitId, DatasetSplit> splitsStore, String id, List<String> path, int splits) { DatasetConfig ds = new DatasetConfig() .setId(new EntityId(id)) .setName(last(path)) .setFullPathList(path) .setType(DatasetType.PHYSICAL_DATASET) .setReadDefinition(new ReadDefinition().setSplitVersion(42L)); namespace.put( NamespaceServiceImpl.getKey(new NamespaceKey(path)), new NameSpaceContainer().setType(NameSpaceContainer.Type.DATASET).setFullPathList(path).setDataset(ds)); for(int i = 0; i < splits; i++) { final String splitKey = Integer.toString(i); DatasetSplit split = new DatasetSplit() .setSplitVersion(42L) .setSplitKey(splitKey); // Generate an older dataset split id DatasetSplitId splitId = UnsafeDatasetSplitIdHelper.of(ds, splitKey); splitsStore.put(splitId, split); } return ds; }
public void addPhysicalDataset(final DatasetPath path, final DatasetType type) throws Exception { NamespaceKey datasetPath = path.toNamespaceKey(); final DatasetConfig datasetConfig = new DatasetConfig(); datasetConfig.setName(datasetPath.getName()); datasetConfig.setType(type); datasetConfig.setPhysicalDataset(new PhysicalDataset()); getNamespaceService().tryCreatePhysicalDataset(datasetPath, datasetConfig); }
@Test public void testDeleteDataset() throws Exception { Dataset dataset = new Dataset( "dataset-id", Dataset.DatasetType.VIRTUAL_DATASET, Arrays.asList("source", "path"), null, 0L, "1", null, "sql", null, null, null ); NameSpaceContainer namespaceContainer = new NameSpaceContainer(); namespaceContainer.setType(NameSpaceContainer.Type.DATASET); DatasetConfig datasetConfig = new DatasetConfig(); datasetConfig.setId(new EntityId(dataset.getId())); datasetConfig.setType(VIRTUAL_DATASET); datasetConfig.setFullPathList(dataset.getPath()); datasetConfig.setTag(dataset.getTag()); namespaceContainer.setDataset(datasetConfig); when(namespaceService.getEntityById(dataset.getId())).thenReturn(namespaceContainer); DremioTable dremioTable = mock(DremioTable.class); when(dremioTable.getDatasetConfig()).thenReturn(datasetConfig); when(catalog.getTable(any(String.class))).thenReturn(dremioTable); catalogServiceHelper.deleteCatalogItem(dataset.getId(), "1"); verify(namespaceService, times(1)).deleteDataset(new NamespaceKey(dataset.getPath()), datasetConfig.getTag()); }
@Test public void testGetDatasetCatalogEntityById() throws Exception { DatasetConfig datasetConfig = new DatasetConfig(); datasetConfig.setId(new EntityId("dataset-id")); datasetConfig.setFullPathList(Collections.singletonList("path")); datasetConfig.setType(VIRTUAL_DATASET); VirtualDataset virtualDataset = new VirtualDataset(); virtualDataset.setSql(""); datasetConfig.setVirtualDataset(virtualDataset); NameSpaceContainer namespaceContainer = new NameSpaceContainer(); namespaceContainer.setType(NameSpaceContainer.Type.DATASET); namespaceContainer.setDataset(datasetConfig); when(namespaceService.getEntityById(datasetConfig.getId().getId())).thenReturn(namespaceContainer); ReflectionSettings reflectionSettings = mock(ReflectionSettings.class); when(reflectionSettings.getStoredReflectionSettings(any(NamespaceKey.class))).thenReturn(Optional.<AccelerationSettings>absent()); when(reflectionServiceHelper.getReflectionSettings()).thenReturn(reflectionSettings); DremioTable dremioTable = mock(DremioTable.class); when(dremioTable.getDatasetConfig()).thenReturn(datasetConfig); when(catalog.getTable(any(String.class))).thenReturn(dremioTable); Optional<CatalogEntity> entity = catalogServiceHelper.getCatalogEntityById(datasetConfig.getId().getId()); assertTrue(entity.isPresent()); CatalogEntity catalogEntity = entity.get(); assertTrue(catalogEntity instanceof Dataset); Dataset dataset = (Dataset) catalogEntity; assertEquals(dataset.getId(), datasetConfig.getId().getId()); }
@Test public void testIdWithPercentageFromConfig() throws Exception { DatasetConfig datasetConfig = new DatasetConfig() .setId(new EntityId().setId("ds1%test")) .setReadDefinition(new ReadDefinition().setSplitVersion(0L)); DatasetSplitId split1 = DatasetSplitId.of(datasetConfig, new DatasetSplit().setSplitKey("s1"), 0L); DatasetSplitId split2 = DatasetSplitId.of(datasetConfig, new DatasetSplit().setSplitKey("s2"), 0L); DatasetSplitId split3 = DatasetSplitId.of(datasetConfig, new DatasetSplit().setSplitKey("s3"), 0L); assertEquals("ds1%25test_0_s1", split1.getSplitId()); assertEquals("ds1%25test_0_s2", split2.getSplitId()); assertEquals("ds1%25test_0_s3", split3.getSplitId()); }
private DatasetBuilder(HiveClient client, String user, NamespaceKey datasetPath, boolean ignoreAuthzErrors, StatsEstimationParameters statsParams, HiveConf hiveConf, String dbName, String tableName, Table table, DatasetConfig oldConfig){ if(oldConfig == null){ datasetConfig = new DatasetConfig() .setPhysicalDataset(new PhysicalDataset()) .setId(new EntityId().setId(UUID.randomUUID().toString())); } else { datasetConfig = oldConfig; // We're rewriting the read definition. Delete the old one. oldConfig.setReadDefinition(null); } this.client = client; this.user = user; this.datasetPath = datasetPath; this.hiveConf = hiveConf; this.table = table; this.dbName = dbName; this.tableName = tableName; this.ignoreAuthzErrors = ignoreAuthzErrors; this.statsParams = statsParams; }
private void loadIfNecessary() { if(datasetPointer != null){ return; } SplitsPointer splitsPointer; if(datasetConfig.getReadDefinition() != null) { splitsPointer = DatasetSplitsPointer.of(ns, datasetConfig); } else { try{ final DatasetConfig newDatasetConfig = datasetAccessor.getDataset(); newDatasetConfig.setId(datasetConfig.getId()); newDatasetConfig.setTag(datasetConfig.getTag()); List<DatasetSplit> splits = datasetAccessor.getSplits(); ns.addOrUpdateDataset(getName(), newDatasetConfig, splits); datasetConfig = newDatasetConfig; splitsPointer = MaterializedSplitsPointer.of(splits, splits.size()); } catch (Exception e) { throw new RuntimeException(e); } } datasetPointer = new TableMetadataImpl(pluginId, datasetConfig, user, splitsPointer); }
public static PhysicalDatasetConfig toPhysicalDatasetConfig(DatasetConfig datasetConfig) { checkNotNull(datasetConfig.getPhysicalDataset()); final com.dremio.service.namespace.dataset.proto.PhysicalDataset physicalDataset = datasetConfig.getPhysicalDataset(); final PhysicalDatasetConfig physicalDatasetConfig = new PhysicalDatasetConfig(); physicalDatasetConfig.setFormatSettings(physicalDataset.getFormatSettings()); physicalDatasetConfig.setFullPathList(datasetConfig.getFullPathList()); physicalDatasetConfig.setType(datasetConfig.getType()); physicalDatasetConfig.setName(datasetConfig.getName()); physicalDatasetConfig.setTag(datasetConfig.getTag()); physicalDatasetConfig.setId(datasetConfig.getId().getId()); return physicalDatasetConfig; }