@Override public DatasetConfig getDataset() throws Exception { if (datasetConfig != null) { return datasetConfig; } datasetConfig = buildDataset(); readDefinition = buildMetadata(); datasetConfig.setReadDefinition(readDefinition); if(datasetConfig.getId() == null){ datasetConfig.setId(new EntityId(UUID.randomUUID().toString())); } return datasetConfig; }
private DatasetBuilder(HiveClient client, String user, NamespaceKey datasetPath, boolean ignoreAuthzErrors, StatsEstimationParameters statsParams, HiveConf hiveConf, String dbName, String tableName, Table table, DatasetConfig oldConfig){ if(oldConfig == null){ datasetConfig = new DatasetConfig() .setPhysicalDataset(new PhysicalDataset()) .setId(new EntityId().setId(UUID.randomUUID().toString())); } else { datasetConfig = oldConfig; // We're rewriting the read definition. Delete the old one. oldConfig.setReadDefinition(null); } this.client = client; this.user = user; this.datasetPath = datasetPath; this.hiveConf = hiveConf; this.table = table; this.dbName = dbName; this.tableName = tableName; this.ignoreAuthzErrors = ignoreAuthzErrors; this.statsParams = statsParams; }
@Override public void upgrade(UpgradeContext context) throws Exception { final NamespaceService namespaceService = new NamespaceServiceImpl(context.getKVStoreProvider()); try { for (SourceConfig source : namespaceService.getSources()) { if (!"HIVE".equalsIgnoreCase(ConnectionReader.toType(source))) { continue; } System.out.printf(" Handling Hive source %s%n", source.getName()); for (NamespaceKey datasetPath : namespaceService.getAllDatasets(new NamespaceKey(source.getName()))) { final DatasetConfig datasetConfig = namespaceService.getDataset(datasetPath); if (datasetConfig.getReadDefinition() == null || datasetConfig.getReadDefinition().getExtendedProperty() == null) { continue; } System.out.printf(" Clearing read definition of table %s%n", datasetPath.getSchemaPath()); datasetConfig.setReadDefinition(null); namespaceService.addOrUpdateDataset(datasetPath, datasetConfig); } } } catch (NamespaceException e) { throw new RuntimeException("Hive121BasedInputSplits failed", e); } }
NamespaceService ns = mock(NamespaceService.class); when(ns.getDataset(any())) .thenReturn(new DatasetConfig().setReadDefinition(new ReadDefinition()));
new DatasetConfig() .setTag("0") .setReadDefinition(new ReadDefinition()) );
private void saveInHomeSpace(NamespaceService namespaceService, SourceTableDefinition accessor, DatasetConfig nsConfig) { Preconditions.checkNotNull(nsConfig); final NamespaceKey key = new NamespaceKey(nsConfig.getFullPathList()); try{ // use key from namespace config DatasetConfig srcConfig = accessor.getDataset(); if (nsConfig.getId() == null) { nsConfig.setId(srcConfig.getId()); } // Merge namespace config with config obtained from underlying filesystem used to store user uploaded files. // Set schema, read definition and state from source accessor nsConfig.setRecordSchema(srcConfig.getRecordSchema()); nsConfig.setSchemaVersion(srcConfig.getSchemaVersion()); nsConfig.setReadDefinition(srcConfig.getReadDefinition()); // get splits from source List<DatasetSplit> splits = accessor.getSplits(); namespaceService.addOrUpdateDataset(key, nsConfig, splits); }catch(Exception ex){ logger.warn("Failure while retrieving and saving dataset {}.", key, ex); } }
public void savePhysicalDataset(List<String> path, DatasetType type, long splitVersion, int count) throws NamespaceException { final ReadDefinition readDefinition = new ReadDefinition() .setSplitVersion(splitVersion); final DatasetConfig datasetConfig = saveDataset(path, type, config -> config.setReadDefinition(readDefinition)); generateSplits(splitVersion, count) .forEach(split -> splitsStore.put(DatasetSplitId.of(datasetConfig, split, splitVersion), split)); }
@Override public DatasetConfig getDataset() throws Exception { final DatasetConfig dataset; if(oldDataset == null) { dataset = new DatasetConfig() .setFullPathList(key.getPathComponents()) .setId(new EntityId(UUID.randomUUID().toString())) .setType(DatasetType.PHYSICAL_DATASET); } else { dataset = oldDataset; } return dataset .setName(key.getName()) .setReadDefinition(new ReadDefinition() .setScanStats(new ScanStats().setRecordCount(100l) .setScanFactor(ScanCostFactor.OTHER.getFactor()))) .setOwner(SystemUser.SYSTEM_USERNAME) .setPhysicalDataset(new PhysicalDataset()) .setRecordSchema(getSchema().toByteString()) .setSchemaVersion(DatasetHelper.CURRENT_VERSION); }
@Override public DatasetConfig getDataset() { final DatasetConfig dataset; if(oldDataset == null) { dataset = new DatasetConfig() .setFullPathList(key.getPathComponents()) .setId(new EntityId(UUID.randomUUID().toString())) .setType(DatasetType.PHYSICAL_DATASET); } else { dataset = oldDataset; } return dataset .setName(key.getName()) .setReadDefinition(new ReadDefinition() .setScanStats(new ScanStats().setRecordCount(100l) .setScanFactor(ScanCostFactor.OTHER.getFactor()))) .setOwner(SystemUser.SYSTEM_USERNAME) .setPhysicalDataset(new PhysicalDataset()) .setRecordSchema(getSchema().toByteString()) .setSchemaVersion(DatasetHelper.CURRENT_VERSION); }
private static SourceTableDefinition newDataset(final String dsPath) { final List<String> path = SqlUtils.parseSchemaPath(dsPath); SourceTableDefinition ret = mock(SourceTableDefinition.class); NamespaceKey datasetName = new NamespaceKey(path); when(ret.getName()).thenReturn(datasetName); BatchSchema schema = BatchSchema.newBuilder() .addField(new Field("string", FieldType.nullable(ArrowType.Utf8.INSTANCE), null)) .build(); DatasetConfig dsConfig = new DatasetConfig() .setName(Util.last(path)) .setFullPathList(path) .setType(DatasetType.PHYSICAL_DATASET_SOURCE_FILE) .setRecordSchema(ByteString.EMPTY) .setPhysicalDataset( new PhysicalDataset() .setFormatSettings(null)) .setSchemaVersion(DatasetHelper.CURRENT_VERSION) .setRecordSchema(schema.toByteString()) .setReadDefinition(new ReadDefinition()); try { when(ret.getDataset()).thenReturn(dsConfig); } catch (Exception ignored) { } when(ret.getType()).thenReturn(DatasetType.PHYSICAL_DATASET_SOURCE_FILE); when(ret.isSaveable()).thenReturn(true); return ret; }
); datasetConfig.setReadDefinition(readDefinition);
.setPhysicalDataset(new PhysicalDataset()) .setRecordSchema(batchSchema.toByteString()) .setReadDefinition(new ReadDefinition() .setPartitionColumnsList(partitionColumns) .setSortColumnsList(FluentIterable.from(table.getSd().getSortCols())
@Test public void checkReadSignatureWithUnchangedState() throws Exception { final SourceConfig inspectorConfig = new SourceConfig() .setType(INSPECTOR) .setName(INSPECTOR) .setMetadataPolicy(CatalogService.DEFAULT_METADATA_POLICY) .setConfig(new Inspector().toBytesString()); // create one; lock required final ManagedStoragePlugin plugin; try (AutoCloseable ignored = plugins.writeLock()) { plugin = plugins.create(inspectorConfig); } CheckedFuture<SourceState, Exception> state = plugin.startAsync(); state.get(); plugin.initiateMetadataRefresh(); plugin.refresh(CatalogService.UpdateType.FULL, CatalogService.DEFAULT_METADATA_POLICY); readDefinition.setReadSignature(BYTESTRING_UNCHANGED_WITH_DATASET); datasetConfig.setReadDefinition(readDefinition); // This should return a non-null SourceTableDefinition that differs from the constant // one we have defined before. SourceTableDefinition tbl = plugin.getTable(null, datasetConfig, false); assertNotNull(tbl); assertNotEquals(MOCK_TABLE_DEFINITION, tbl); // This signature should return the mock definition. readDefinition.setReadSignature(BYTESTRING_UNCHANGED_WITHOUT_DATASET); datasetConfig.setReadDefinition(readDefinition); tbl = plugin.getTable(null, datasetConfig, false); assertEquals(MOCK_TABLE_DEFINITION, tbl); plugins.deleteSource(inspectorConfig); }
private DatasetConfig addDataset(IndexedStore<byte[], NameSpaceContainer> namespace, IndexedStore<DatasetSplitId, DatasetSplit> splitsStore, String id, List<String> path, int splits) { DatasetConfig ds = new DatasetConfig() .setId(new EntityId(id)) .setName(last(path)) .setFullPathList(path) .setType(DatasetType.PHYSICAL_DATASET) .setReadDefinition(new ReadDefinition().setSplitVersion(42L)); namespace.put( NamespaceServiceImpl.getKey(new NamespaceKey(path)), new NameSpaceContainer().setType(NameSpaceContainer.Type.DATASET).setFullPathList(path).setDataset(ds)); for(int i = 0; i < splits; i++) { final String splitKey = Integer.toString(i); DatasetSplit split = new DatasetSplit() .setSplitVersion(42L) .setSplitKey(splitKey); // Generate an older dataset split id DatasetSplitId splitId = UnsafeDatasetSplitIdHelper.of(ds, splitKey); splitsStore.put(splitId, split); } return ds; }
@Test public void testIdFromConfig() throws Exception { DatasetConfig datasetConfig = new DatasetConfig() .setId(new EntityId().setId("ds1")) .setReadDefinition(new ReadDefinition().setSplitVersion(0L)); DatasetSplitId split1 = DatasetSplitId.of(datasetConfig, new DatasetSplit().setSplitKey("s1"), 0L); DatasetSplitId split2 = DatasetSplitId.of(datasetConfig, new DatasetSplit().setSplitKey("s2"), 0L); DatasetSplitId split3 = DatasetSplitId.of(datasetConfig, new DatasetSplit().setSplitKey("s3"), 0L); assertEquals("ds1_0_s1", split1.getSplitId()); assertEquals("ds1_0_s2", split2.getSplitId()); assertEquals("ds1_0_s3", split3.getSplitId()); }
@Test public void testIdWithUnderscoreFromConfig() throws Exception { DatasetConfig datasetConfig = new DatasetConfig() .setId(new EntityId().setId("ds1_test")) .setReadDefinition(new ReadDefinition().setSplitVersion(0L)); DatasetSplitId split1 = DatasetSplitId.of(datasetConfig, new DatasetSplit().setSplitKey("s1"), 0L); DatasetSplitId split2 = DatasetSplitId.of(datasetConfig, new DatasetSplit().setSplitKey("s2"), 0L); DatasetSplitId split3 = DatasetSplitId.of(datasetConfig, new DatasetSplit().setSplitKey("s3"), 0L); assertEquals("ds1%5Ftest_0_s1", split1.getSplitId()); assertEquals("ds1%5Ftest_0_s2", split2.getSplitId()); assertEquals("ds1%5Ftest_0_s3", split3.getSplitId()); }
@Test public void testIdWithPercentageFromConfig() throws Exception { DatasetConfig datasetConfig = new DatasetConfig() .setId(new EntityId().setId("ds1%test")) .setReadDefinition(new ReadDefinition().setSplitVersion(0L)); DatasetSplitId split1 = DatasetSplitId.of(datasetConfig, new DatasetSplit().setSplitKey("s1"), 0L); DatasetSplitId split2 = DatasetSplitId.of(datasetConfig, new DatasetSplit().setSplitKey("s2"), 0L); DatasetSplitId split3 = DatasetSplitId.of(datasetConfig, new DatasetSplit().setSplitKey("s3"), 0L); assertEquals("ds1%25test_0_s1", split1.getSplitId()); assertEquals("ds1%25test_0_s2", split2.getSplitId()); assertEquals("ds1%25test_0_s3", split3.getSplitId()); }
@Test public void checkReadSignatureWithDeletedState() throws Exception { final NamespaceKey sourceKey = new NamespaceKey(INSPECTOR); final SourceConfig inspectorConfig = new SourceConfig() .setType(INSPECTOR) .setName(INSPECTOR) .setMetadataPolicy(CatalogService.DEFAULT_METADATA_POLICY) .setConfig(new Inspector().toBytesString()); // create one; lock required final ManagedStoragePlugin plugin; try (AutoCloseable ignored = plugins.writeLock()) { plugin = plugins.create(inspectorConfig); } CheckedFuture<SourceState, Exception> state = plugin.startAsync(); state.get(); plugin.initiateMetadataRefresh(); plugin.refresh(CatalogService.UpdateType.FULL, CatalogService.DEFAULT_METADATA_POLICY); readDefinition.setReadSignature(BYTESTRING_DELETED); datasetConfig.setReadDefinition(readDefinition); assertNull(plugin.getTable(null, datasetConfig, false)); plugins.deleteSource(inspectorConfig); }
datasetConfig.setReadDefinition(readDefinition);
datasetConfig.setAccelerationId("accl"); datasetConfig.setOwner("dremio"); datasetConfig.setReadDefinition(readDefinition);