@Override public List<Entry<String, ByteString>> getSharedData() { return Collections.singletonList( new SimpleEntry<>( HIVE_ATTRIBUTE_KEY, ByteString.copyFrom(dataset.getReadDefinition().getExtendedProperty().asReadOnlyByteBuffer()) ) ); }
@Override public Double getRowCount() { return (double) dataset.getReadDefinition().getScanStats().getRecordCount(); }
public ElasticTableXattr getExtendedAttributes(){ if(extendedAttributes == null){ try { extendedAttributes = ElasticTableXattr.parseFrom(tableMetadata.getReadDefinition().getExtendedProperty().toByteArray()); } catch (InvalidProtocolBufferException e) { throw Throwables.propagate(e); } } return extendedAttributes; }
private HiveTableXattr getExtended(){ if(extended == null){ try { extended = HiveTableXattr.parseFrom(getTableMetadata().getReadDefinition().getExtendedProperty().toByteArray()); } catch (InvalidProtocolBufferException e) { throw Throwables.propagate(e); } } return extended; }
@Override public boolean matches(RelOptRuleCall call) { final HiveScanDrel scan = call.rel(1); if (scan.getFilter() != null) { return false; } try { final HiveTableXattr tableXattr = HiveTableXattr.parseFrom(scan.getTableMetadata().getReadDefinition().getExtendedProperty().toByteArray()); final Optional<String> inputFormat = HiveReaderProtoUtil.getTableInputFormat(tableXattr); return inputFormat.isPresent() && inputFormat.get().equals(OrcInputFormat.class.getCanonicalName()); } catch (InvalidProtocolBufferException e) { logger.warn("Failure while attempting to deserialize hive table attributes.", e); } return false; }
@Override public boolean matches(RelOptRuleCall call) { final ScanRelBase scan = call.rel(1); if (scan.getPluginId().getType().equals(pluginType)) { try { if(scan.getTableMetadata().getSplitRatio() == 1.0d){ final List<String> partitionColumns = scan.getTableMetadata().getReadDefinition().getPartitionColumnsList(); return partitionColumns != null && !partitionColumns.isEmpty(); } } catch (NamespaceException e) { logger.warn("Unable to calculate split.", e); return false; } } return false; }
@Override public boolean matches(RelOptRuleCall call) { final ScanRelBase scan = call.rel(2); if (scan.getPluginId().getType().equals(pluginType)) { try { if(scan.getTableMetadata().getSplitRatio() == 1.0d){ final List<String> partitionColumns = scan.getTableMetadata().getReadDefinition().getPartitionColumnsList(); return partitionColumns != null && !partitionColumns.isEmpty(); } } catch (NamespaceException e) { logger.warn("Unable to calculate split.", e); return false; } } return false; }
@Override public SubScan getSpecificScan(List<SplitWork> work) throws ExecutionSetupException { List<DatasetSplit> splitWork = FluentIterable.from(work).transform(new Function<SplitWork, DatasetSplit>(){ @Override public DatasetSplit apply(SplitWork input) { return input.getSplit(); }}).toList(); return new ElasticsearchSubScan( getUserName(), getDataset().getStoragePluginId(), spec, splitWork, getColumns(), Iterables.getOnlyElement(getReferencedTables()), getSchema(), getDataset().getReadDefinition().getExtendedProperty() ); }
@Override public SubScan getSpecificScan(List<SplitWork> work) throws ExecutionSetupException { final BatchSchema schema = cachedRelDataType == null ? getDataset().getSchema(): BatchSchema.fromCalciteRowType(cachedRelDataType); // Create an abridged version of the splits to save network bytes. List<DatasetSplit> splits = work.stream().map( workSplit -> ProtostuffUtil.copy(workSplit.getSplit()) .setExtendedProperty(convertToScanXAttr(workSplit.getSplit().getExtendedProperty())) ).collect(Collectors.toList()); return new ParquetSubScan(dataset.getFormatSettings(), splits, getUserName(), schema, getDataset().getName().getPathComponents(), filter == null ? null : filter.getConditions(), dataset.getStoragePluginId(), columns, dataset.getReadDefinition().getPartitionColumnsList(), globalDictionaryEncodedColumns, dataset.getReadDefinition().getExtendedProperty()); }
@Override public SubScan getSpecificScan(List<SplitWork> work) throws ExecutionSetupException { final List<DatasetSplit> splits = new ArrayList<>(work.size()); final BatchSchema schema = getDataset().getSchema(); for(SplitWork split : work){ splits.add(split.getSplit()); } return new EasySubScan( getDataset().getFormatSettings(), splits, getUserName(), schema, getDataset().getName().getPathComponents(), dataset.getStoragePluginId(), columns, getDataset().getReadDefinition().getPartitionColumnsList(), getDataset().getReadDefinition().getExtendedProperty()); }
@Override public SubScan getSpecificScan(List<SplitWork> work) throws ExecutionSetupException { List<DatasetSplit> splits = new ArrayList<>(work.size()); BatchSchema schema = getDataset().getSchema(); for(SplitWork split : work){ splits.add(split.getSplit()); } boolean storageImpersonationEnabled = dataset.getStoragePluginId().getCapabilities().getCapability(SourceCapabilities.STORAGE_IMPERSONATION); String userName = storageImpersonationEnabled ? getUserName() : ImpersonationUtil.getProcessUserName(); final ReadDefinition readDefinition = dataset.getReadDefinition(); return new HiveSubScan(splits, userName, schema, dataset.getName().getPathComponents(), filter, dataset.getStoragePluginId(), columns, readDefinition.getPartitionColumnsList()); }
final Map<String, Integer> partitionColumnsToIdMap = Maps.newHashMap(); int index = 0; for (String column : scanRel.getTableMetadata().getReadDefinition().getPartitionColumnsList()) { partitionColumnsToIdMap.put(column, index++);
private PrelWithDictionaryInfo visitParquetScanPrel(ParquetScanPrel parquetScanPrel, Void value) throws RuntimeException { final ReadDefinition readDefinition = parquetScanPrel.getTableMetadata().getReadDefinition();
double workCost = getCostAdjustmentFactor() * (rowCount * fieldCount * getTableMetadata().getReadDefinition().getScanStats().getScanFactor()) * DremioCost.SCAN_CPU_COST_MULTIPLIER;