org.apache.parquet.column.ColumnDescriptor.toString java code examples

public UnknownColumnException(ColumnDescriptor descriptor) {
 super("Column not found: " + descriptor.toString());
 this.descriptor = descriptor;
}

public UnknownColumnException(ColumnDescriptor descriptor) {
 super("Column not found: " + descriptor.toString());
 this.descriptor = descriptor;
}

private void updateStats(PageHeader pageHeader, String op, long start, long time, long bytesin, long bytesout) {
 String pageType = "Data Page";
 if (pageHeader.type == PageType.DICTIONARY_PAGE) {
  pageType = "Dictionary Page";
 }
 logger.trace("ParquetTrace,{},{},{},{},{},{},{},{}", op, pageType.toString(),
   this.parentColumnReader.parentReader.hadoopPath,
   this.parentColumnReader.columnDescriptor.toString(), start, bytesin, bytesout, time);
 if (pageHeader.type != PageType.DICTIONARY_PAGE) {
  if (bytesin == bytesout) {
   this.stats.timePageLoads += time;
   this.stats.numPageLoads++;
   this.stats.totalPageReadBytes += bytesin;
  } else {
   this.stats.timePagesDecompressed += time;
   this.stats.numPagesDecompressed++;
   this.stats.totalDecompressedBytes += bytesin;
  }
 } else {
  if (bytesin == bytesout) {
   this.stats.timeDictPageLoads += time;
   this.stats.numDictPageLoads++;
   this.stats.totalDictPageReadBytes += bytesin;
  } else {
   this.stats.timeDictPagesDecompressed += time;
   this.stats.numDictPagesDecompressed++;
   this.stats.totalDictDecompressedBytes += bytesin;
  }
 }
}

protected void updateStats(PageHeader pageHeader, String op, long start, long time, long bytesin, long bytesout) {
 String pageType = "Data Page";
 if (pageHeader.type == PageType.DICTIONARY_PAGE) {
  pageType = "Dictionary Page";
 }
 logger.trace("ParquetTrace,{},{},{},{},{},{},{},{}", op, pageType,
   this.parentColumnReader.parentReader.hadoopPath,
   this.parentColumnReader.columnDescriptor.toString(), start, bytesin, bytesout, time);
 if (pageHeader.type != PageType.DICTIONARY_PAGE) {
  if (bytesin == bytesout) {
   this.stats.timeDataPageLoads.addAndGet(time);
   this.stats.numDataPageLoads.incrementAndGet();
   this.stats.totalDataPageReadBytes.addAndGet(bytesin);
  } else {
   this.stats.timeDataPagesDecompressed.addAndGet(time);
   this.stats.numDataPagesDecompressed.incrementAndGet();
   this.stats.totalDataDecompressedBytes.addAndGet(bytesin);
  }
 } else {
  if (bytesin == bytesout) {
   this.stats.timeDictPageLoads.addAndGet(time);
   this.stats.numDictPageLoads.incrementAndGet();
   this.stats.totalDictPageReadBytes.addAndGet(bytesin);
  } else {
   this.stats.timeDictPagesDecompressed.addAndGet(time);
   this.stats.numDictPagesDecompressed.incrementAndGet();
   this.stats.totalDictDecompressedBytes.addAndGet(bytesin);
  }
 }
}

 public static void printDictionary(ColumnDescriptor columnDescriptor, Dictionary localDictionary) {
  System.out.println("Dictionary for column " + columnDescriptor.toString());
  for (int i = 0; i < localDictionary.getMaxId(); ++i) {
   switch (columnDescriptor.getType()) {
    case INT32:
     System.out.println(format("%d: %d", i, localDictionary.decodeToInt(i)));
     break;
    case INT64:
     System.out.println(format("%d: %d", i, localDictionary.decodeToLong(i)));
     break;
    case INT96:
    case BINARY:
    case FIXED_LEN_BYTE_ARRAY:
     System.out.println(format("%d: %s", i, new String(localDictionary.decodeToBinary(i).getBytesUnsafe())));
     break;
    case FLOAT:
     System.out.println(format("%d: %f", i, localDictionary.decodeToFloat(i)));
     break;
    case DOUBLE:
     System.out.println(format("%d: %f", i, localDictionary.decodeToDouble(i)));
     break;
    case BOOLEAN:
     System.out.println(format("%d: %b", i, localDictionary.decodeToBoolean(i)));
     break;
    default:
     break;
   }
  }
 }
}

/**
 * Builds a global dictionary for parquet table for BINARY or FIXED_LEN_BYTE_ARRAY column types.
 * It will remove exiting dictionaries if present and create new ones.
 * @param fs filesystem
 * @param tableDir root directory for given table that has parquet files
 * @param bufferAllocator memory allocator
 * @return GlobalDictionariesInfo that has dictionary version, root path and columns along with path to dictionary files.
 * @throws IOException
 */
public static GlobalDictionariesInfo createGlobalDictionaries(FileSystem fs, Path tableDir, BufferAllocator bufferAllocator) throws IOException {
 final FileStatus[] statuses = fs.listStatus(tableDir, PARQUET_FILES_FILTER);
 final Map<ColumnDescriptor, Path> globalDictionaries = Maps.newHashMap();
 final Map<ColumnDescriptor, List<Dictionary>> allDictionaries = readLocalDictionaries(fs, statuses, bufferAllocator);
 final long dictionaryVersion = getDictionaryVersion(fs, tableDir) + 1;
 final Path tmpDictionaryRootDir = createTempRootDir(fs, tableDir, dictionaryVersion);
 logger.debug("Building global dictionaries for columns {} with version {}", allDictionaries.keySet(), dictionaryVersion);
 // Sort all local dictionaries and write it to file with an index if needed
 for (Map.Entry<ColumnDescriptor, List<Dictionary>> entry : allDictionaries.entrySet()) {
  final ColumnDescriptor columnDescriptor = entry.getKey();
  final Path dictionaryFile = dictionaryFilePath(tmpDictionaryRootDir, columnDescriptor);
  logger.debug("Creating a new global dictionary for {} with version {}", columnDescriptor.toString(), dictionaryVersion);
  createDictionaryFile(fs, dictionaryFile, columnDescriptor, entry.getValue(), null, bufferAllocator);
  globalDictionaries.put(columnDescriptor, dictionaryFile);
 }
 final Path finalDictionaryRootDir = createDictionaryVersionedRootPath(fs, tableDir, dictionaryVersion, tmpDictionaryRootDir);
 return new GlobalDictionariesInfo(globalDictionaries, finalDictionaryRootDir,  dictionaryVersion);
}

public static void main(String []args) {
 try (final BufferAllocator bufferAllocator = new RootAllocator(VM.getMaxDirectMemory())) {
  final Path tableDir  = new Path(args[0]);
  final FileSystem fs = tableDir.getFileSystem(new Configuration());
  if (fs.exists(tableDir) && fs.isDirectory(tableDir)) {
   Map<ColumnDescriptor, Path> dictionaryEncodedColumns = createGlobalDictionaries(fs, tableDir, bufferAllocator).getColumnsToDictionaryFiles();
   long version = getDictionaryVersion(fs, tableDir);
   Path dictionaryRootDir = getDictionaryVersionedRootPath(fs, tableDir, version);
   for (ColumnDescriptor columnDescriptor: dictionaryEncodedColumns.keySet()) {
    final VectorContainer data = readDictionary(fs, dictionaryRootDir, columnDescriptor, bufferAllocator);
    System.out.println("Dictionary for column [" + columnDescriptor.toString() + " size " + data.getRecordCount());
    BatchPrinter.printBatch(data);
    data.clear();
   }
  }
 } catch (IOException ioe) {
  logger.error("Failed ", ioe);
 }
}

public void processPages(long recordsToReadInThisPass) throws IOException {
 reset();
 if(recordsToReadInThisPass>0) {
  do {
   determineSize(recordsToReadInThisPass);
  } while (valuesReadInCurrentPass < recordsToReadInThisPass && pageReader.hasPage());
 }
 logger.trace("Column Reader: {} - Values read in this pass: {} - ",
   this.getColumnDescriptor().toString(), valuesReadInCurrentPass);
 valueVec.getMutator().setValueCount(valuesReadInCurrentPass);
}

logger.debug("Creating a new global dictionary for {} with version {}", columnDescriptor.toString(), nextDictionaryVersion);
createDictionaryFile(fs, newDictionaryFile, columnDescriptor, entry.getValue(), null, bufferAllocator);
globalDictionaries.put(columnDescriptor, newDictionaryFile);
 logger.debug("Updating global dictionary for {} with version {}", columnDescriptor.toString(), nextDictionaryVersion);
 createDictionaryFile(fs, newDictionaryFile, columnDescriptor, entry.getValue(), vectorContainer, bufferAllocator);
 globalDictionaries.put(columnDescriptor, newDictionaryFile);

/**
 * Get the page header and the pageData (uncompressed) for the next page
 */
protected void nextInternal() throws IOException{
 Stopwatch timer = Stopwatch.createUnstarted();
 // next, we need to decompress the bytes
 // TODO - figure out if we need multiple dictionary pages, I believe it may be limited to one
 // I think we are clobbering parts of the dictionary if there can be multiple pages of dictionary
 do {
  long start=dataReader.getPos();
  timer.start();
  pageHeader = Util.readPageHeader(dataReader);
  long timeToRead = timer.elapsed(TimeUnit.NANOSECONDS);
  long pageHeaderBytes=dataReader.getPos()-start;
  this.updateStats(pageHeader, "Page Header", start, timeToRead, pageHeaderBytes, pageHeaderBytes);
  logger.trace("ParquetTrace,{},{},{},{},{},{},{},{}","Page Header Read","",
    this.parentColumnReader.parentReader.hadoopPath,
    this.parentColumnReader.columnDescriptor.toString(), start, 0, 0, timeToRead);
  timer.reset();
  if (pageHeader.getType() == PageType.DICTIONARY_PAGE) {
   readDictionaryPage(pageHeader, parentColumnReader);
  }
 } while (pageHeader.getType() == PageType.DICTIONARY_PAGE);
 int compressedSize = pageHeader.getCompressed_page_size();
 int uncompressedSize = pageHeader.getUncompressed_page_size();
 pageData = readPage(pageHeader, compressedSize, uncompressedSize);
}

logger.trace("ParquetTrace,{},{},{},{},{},{},{},{}","Page Header Read","",
  this.parentColumnReader.parentReader.hadoopPath,
  this.parentColumnReader.columnDescriptor.toString(), start, 0, 0, timeToRead);
timer.reset();
if (pageHeader.getType() == PageType.DICTIONARY_PAGE) {

Popular methods of ColumnDescriptor

Popular in Java

Running tasks concurrently on multiple threads
getSupportFragmentManager (FragmentActivity)
setScale (BigDecimal)
onRequestPermissionsResult (Fragment)
FileNotFoundException (java.io)
Thrown when a file specified by a program cannot be found.
Charset (java.nio.charset)
A charset is a named mapping between Unicode characters and byte sequences. Every Charset can decode
Comparator (java.util)
A Comparator is used to compare two objects to determine their ordering with respect to each other.
SSLHandshakeException (javax.net.ssl)
The exception that is thrown when a handshake could not be completed successfully.
JTable (javax.swing)
Reflections (org.reflections)
Reflections one-stop-shop objectReflections scans your classpath, indexes the metadata, allows you t
Top plugins for WebStorm

How to use toStringmethodin org.apache.parquet.column.ColumnDescriptor

Best Java code snippets using org.apache.parquet.column.ColumnDescriptor.toString (Showing top 11 results out of 315)

How to use
toString
method
in
org.apache.parquet.column.ColumnDescriptor