com.dremio.exec.store.SampleMutator java code examples

public CoercionReader(OperatorContext context,
           List<SchemaPath> columns,
           RecordReader inner,
           BatchSchema targetSchema) {
 super(context, columns);
 this.mutator = new SampleMutator(context.getAllocator());
 this.incoming = mutator.getContainer();
 this.inner = inner;
 this.outgoing = new VectorContainer(context.getAllocator());
 this.targetSchema = targetSchema;
 this.exprs = new ArrayList<>(targetSchema.getFieldCount());
 for (Field field : targetSchema.getFields()) {
  final FieldReference inputRef = FieldReference.getWithQuotedRef(field.getName());
  final CompleteType targetType = CompleteType.fromField(field);
  if (targetType.isUnion() || targetType.isComplex()) {
   // we are assuming that map and list fields won't need coercion but inner reader may rely on sampling
   // a handful of rows to figure out the schema and if the list/map is empty in those rows, the schema will be
   // incomplete
   exprs.add(new NamedExpression(inputRef, inputRef));
   // one way to fix this issue is to add the target field in the incoming container and rely on
   // schema learning to handle any changes we hit when reading from the underlying reader
   mutator.addField(field, TypeHelper.getValueVectorClass(field));
  } else {
   final MajorType majorType = MajorTypeHelper.getMajorTypeForField(field);
   LogicalExpression cast = FunctionCallFactory.createCast(majorType, inputRef);
   exprs.add(new NamedExpression(cast, inputRef));
  }
  //TODO check that the expression type is a subset of the targetSchema type
 }
}

@Override
public void allocate(Map<String, ValueVector> vectorMap) throws OutOfMemoryException {
 super.allocate(vectorMap);
 inner.allocate(mutator.getFieldVectorMap());
}

@Override
public int next() {
 int recordCount = inner.next();
 if (mutator.isSchemaChanged()) {
  newSchema();
 }
 incoming.setAllCount(recordCount);
 if (DEBUG_PRINT) {
  FragmentHandle h = context.getFragmentHandle();
  outgoing.buildSchema();
  String op = String.format("CoercionReader:%d:%d:%d --> (%d), %s", h.getMajorFragmentId(), h.getMinorFragmentId(), context.getStats().getOperatorId(), recordCount, outgoing.getSchema());
  System.out.println(op);
  BatchPrinter.printBatch(mutator.getContainer());
 }
 if (projector != null) {
  projector.projectRecords(recordCount);
  for (final ValueVector v : allocationVectors) {
   v.setValueCount(recordCount);
  }
 }
 return recordCount;
}

   readDefinition,
   pluginConfig);
 final SampleMutator mutator = new SampleMutator(sampleAllocator)
 ) {
reader.setup(mutator);
reader.next();
mutator.getContainer().buildSchema(SelectionVectorMode.NONE);
return mutator.getContainer().getSchema();

 BufferAllocator sampleAllocator = context.getAllocator().newChildAllocator("sample-alloc", 0, Long.MAX_VALUE);
 OperatorContextImpl operatorContext = new OperatorContextImpl(context.getConfig(), sampleAllocator, context.getOptionManager(), 1000);
 SampleMutator mutator = new SampleMutator(sampleAllocator)
){
 final Optional<FileStatus> firstFileO = selection.getFirstFile();
   mutator.allocate(100);
   mutator.getContainer().buildSchema(BatchSchema.SelectionVectorMode.NONE);
   return mutator.getContainer().getSchema();

 @Test
 public void testFileNotFound() {
  FileSplit split = mock(FileSplit.class);
  when(split.getPath()).thenReturn(new Path("/notExist/notExitFile"));
  TextParsingSettings settings = mock(TextParsingSettings.class);
  when(settings.isHeaderExtractionEnabled()).thenReturn(true);
  SchemaPath column = mock(SchemaPath.class);
  List<SchemaPath> columns = new ArrayList<>(1);
  columns.add(column);
  SabotContext context = mock(SabotContext.class);
  BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
  when(context.getAllocator()).thenReturn(allocator);
  Path path = new Path("/notExist");
  try (BufferAllocator sampleAllocator = context.getAllocator().newChildAllocator("sample-alloc", 0, Long.MAX_VALUE);
     OperatorContextImpl operatorContext = new OperatorContextImpl(context.getConfig(), sampleAllocator, context.getOptionManager(), 1000);
     FileSystemWrapper dfs = FileSystemWrapper.get(path, new Configuration());
     SampleMutator mutator = new SampleMutator(sampleAllocator);
     CompliantTextRecordReader reader = new CompliantTextRecordReader(split, dfs, operatorContext, settings, columns);
  ){
   reader.setup(mutator);
  } catch (Exception e) {
   // java.io.FileNotFoundException is expected, but memory leak is not expected.
   assertTrue(e.getCause() instanceof FileNotFoundException);
  }

  allocator.close();
 }
}

 BufferAllocator sampleAllocator = context.getAllocator().newChildAllocator("sample-alloc", 0, Long.MAX_VALUE);
 OperatorContextImpl operatorContext = new OperatorContextImpl(context.getConfig(), sampleAllocator, context.getOptionManager(), 1000);
 SampleMutator mutator = new SampleMutator(sampleAllocator)
){
 final ImplicitFilesystemColumnFinder explorer = new ImplicitFilesystemColumnFinder(context.getOptionManager(), dfs, GroupScan.ALL_COLUMNS);
  reader.setup(mutator);
  Map<String, ValueVector> fieldVectorMap = new HashMap<>();
  for (VectorWrapper<?> vw : mutator.getContainer()) {
   fieldVectorMap.put(vw.getField().getName(), vw.getValueVector());
  mutator.getContainer().buildSchema(BatchSchema.SelectionVectorMode.NONE);
  return mutator.getContainer().getSchema();

private BatchSchema getSampledSchema(HTableDescriptor descriptor, DatasetConfig oldConfig) throws Exception {
 BatchSchema oldSchema = null;
 ByteString bytes = oldConfig != null ? DatasetHelper.getSchemaBytes(oldConfig) : null;
 if(bytes != null) {
  oldSchema = BatchSchema.deserialize(bytes);
 }
 final HBaseSubScanSpec spec = new HBaseSubScanSpec(getNamespace(), getTableName(), null, null, null);
 try (
   BufferAllocator allocator = context.getAllocator().newChildAllocator("hbase-sample", 0, Long.MAX_VALUE);
   SampleMutator mutator = new SampleMutator(allocator);
   HBaseRecordReader reader = new HBaseRecordReader(connect.getConnection(), spec, GroupScan.ALL_COLUMNS, null, true);
   ) {
  reader.setNumRowsPerBatch(100);
  if(oldSchema != null) {
   oldSchema.materializeVectors(GroupScan.ALL_COLUMNS, mutator);
  }
  // add row key.
  mutator.addField(CompleteType.VARBINARY.toField(HBaseRecordReader.ROW_KEY), ValueVector.class);
  // add all column families.
  for (HColumnDescriptor col : descriptor.getFamilies()) {
   mutator.addField(CompleteType.struct().toField(col.getNameAsString()), ValueVector.class);
  }
  reader.setup(mutator);
  reader.next();
  mutator.getContainer().buildSchema(SelectionVectorMode.NONE);
  return mutator.getContainer().getSchema();
 } catch (ExecutionSetupException e) {
  throw UserException.dataReadError(e).message("Unable to sample schema for table %s.", key).build(logger);
 }
}

mutator.isSchemaChanged();
 String op = String.format("CoercionReader:%d:%d:%d, %s --> %s", h.getMajorFragmentId(), h.getMinorFragmentId(), context.getStats().getOperatorId(), incoming.getSchema(), outgoing.getSchema());
 System.out.println(op);
 mutator.getContainer().setAllCount(2);
 BatchPrinter.printBatch(mutator.getContainer());

Javadoc

Used for sampling, etc TODO rename this class, since it used for more than sampling now

Most used methods

Popular in Java

Parsing JSON documents to java classes using gson
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
startActivity (Activity)
getExternalFilesDir (Context)
HttpURLConnection (java.net)
An URLConnection for HTTP (RFC 2616 [http://tools.ietf.org/html/rfc2616]) used to send and receive d
SocketException (java.net)
This SocketException may be thrown during socket creation or setting options, and is the superclass
SocketTimeoutException (java.net)
This exception is thrown when a timeout expired on a socket read or accept operation.
Permission (java.security)
Legacy security code; do not use.
Logger (org.slf4j)
The org.slf4j.Logger interface is the main user entry point of SLF4J API. It is expected that loggin
Option (scala)
Best IntelliJ plugins

How to useSampleMutator in com.dremio.exec.store

Best Java code snippets using com.dremio.exec.store.SampleMutator (Showing top 9 results out of 315)

How to use
SampleMutator
in
com.dremio.exec.store