public RCFileScanner(Configuration conf, final Schema schema, final TableMeta meta, final FileFragment fragment) throws IOException { super(conf, schema, meta, fragment); conf.setInt("io.file.buffer.size", 4096); //TODO remove startOffset = fragment.getStartKey(); endOffset = startOffset + fragment.getEndKey(); start = 0; }
private boolean fillBuffer() throws IOException { if (numBytesRead >= fragment.getEndKey()) { eof = true; return false; } int currentDataSize = buffer.remaining(); buffer.compact(); int bytesRead = channel.read(buffer); if (bytesRead == -1) { eof = true; return false; } else { buffer.flip(); long realRemaining = fragment.getEndKey() - numBytesRead; numBytesRead += bytesRead; if (realRemaining < bufferSize) { int newLimit = currentDataSize + (int) realRemaining; if(newLimit > bufferSize) { newLimit = bufferSize; } buffer.limit(newLimit); } return true; } }
@Override public float getProgress() { try { if(!more) { return 1.0f; } long filePos = getPosition(); if (startOffset == filePos) { return 0.0f; } else { //if scanner read the header, filePos moved to zero return Math.min(1.0f, (float)(Math.max(filePos - startOffset, 0)) / (float)(fragment.getEndKey())); } } catch (IOException e) { LOG.error(e.getMessage(), e); return 0.0f; } }
public String getId() { return fragment.getPath().getName() + ":" + fragment.getStartKey() + ":" + fragment.getEndKey() + "_" + System.currentTimeMillis(); }
@Override public String toString() { return "\"fragment\": {\"id\": \""+ tableName +"\", \"path\": " +getPath() + "\", \"start\": " + this.getStartKey() + ",\"length\": " + getEndKey() + "}" ; }
@Override public float getProgress() { try { tableStats.setNumRows(recordCount); long filePos = 0; if (channel != null) { filePos = channel.position(); tableStats.setReadBytes(filePos); } if(eof || channel == null) { tableStats.setReadBytes(fragment.getEndKey()); return 1.0f; } if (filePos == 0) { return 0.0f; } else { return Math.min(1.0f, ((float)filePos / fragment.getEndKey().floatValue())); } } catch (IOException e) { LOG.error(e.getMessage(), e); return 0.0f; } } }
public RowFileScanner(Configuration conf, final Schema schema, final TableMeta meta, final FileFragment fragment) throws IOException { super(conf, schema, meta, fragment); SYNC_INTERVAL = conf.getInt(ConfVars.ROWFILE_SYNC_INTERVAL.varname, ConfVars.ROWFILE_SYNC_INTERVAL.defaultIntVal) * SYNC_SIZE; nullFlags = new BitArray(schema.size()); tupleHeaderSize = nullFlags.bytesLength() + (2 * Short.SIZE / 8); this.start = fragment.getStartKey(); this.end = this.start + fragment.getEndKey(); }
public RCFileScanner(final Configuration conf, final Schema schema, final TableMeta meta, final FileFragment fragment) throws IOException { super(conf, meta, schema, fragment); this.start = fragment.getStartKey(); this.end = start + fragment.getEndKey(); key = new LongWritable(); column = new BytesRefArrayWritable(); String nullCharacters = StringEscapeUtils.unescapeJava(this.meta.getOption(NULL, NullDatum.DEFAULT_TEXT)); if (StringUtils.isEmpty(nullCharacters)) { nullChars = NullDatum.get().asTextBytes(); } else { nullChars = nullCharacters.getBytes(); } }
fileLimit = fragment.getStartKey() + fragment.getEndKey(); // fileLimit is less than or equal to fileSize tableStats.setNumBytes(fragment.getEndKey()); + ", fragment size :" + fragment.getEndKey() + ", fileLimit: " + fileLimit); if (fragment.getEndKey() < 64 * StorageUnit.KB) { bufferSize = fragment.getEndKey().intValue(); } else { bufferSize = 64 * StorageUnit.KB;
@Override protected boolean initFirstScan(int maxBytesPerSchedule) throws IOException { synchronized(this) { eof = false; first = true; if(sin == null) { FSDataInputStream fin = fs.open(fragment.getPath(), 128 * 1024); sin = new ScheduledInputStream(fragment.getPath(), fin, fragment.getStartKey(), fragment.getEndKey(), fs.getLength(fragment.getPath())); startOffset = fragment.getStartKey(); length = fragment.getEndKey(); if (startOffset > 0) { startOffset--; // prev line feed } } } return true; }
@Override protected boolean initFirstScan(int maxBytesPerSchedule) throws IOException { synchronized(lock) { first = true; this.maxBytesPerSchedule = maxBytesPerSchedule; if(sin == null) { sin = new ScheduledInputStream( fragment.getPath(), fs.open(fragment.getPath()), fragment.getStartKey(), fragment.getEndKey(), fs.getLength(fragment.getPath())); this.in = new RCFile.Reader(fragment.getPath(), sin, fs, fs.getConf()); Text text = this.in.getMetadata().get(new Text(SERDE)); try { String serdeClass; if(text != null && !text.toString().isEmpty()){ serdeClass = text.toString(); } else{ serdeClass = this.meta.getOption(SERDE, BinarySerializerDeserializer.class.getName()); } serde = (SerializerDeserializer) Class.forName(serdeClass).newInstance(); } catch (Exception e) { LOG.error(e.getMessage(), e); throw new IOException(e); } } } return true; }
@Override public void close() throws IOException { if (tableStats != null) { tableStats.setReadBytes(fragment.getEndKey()); tableStats.setNumRows(recordCount); } StorageUtil.closeBuffer(buffer); IOUtils.cleanup(LOG, channel, fis); }
public MergeScanner(Configuration conf, Schema schema, TableMeta meta, List<FileFragment> rawFragmentList, Schema target) throws IOException { this.conf = conf; this.schema = schema; this.meta = meta; this.target = target; this.fragments = new ArrayList<FileFragment>(); long numBytes = 0; for (FileFragment eachFileFragment: rawFragmentList) { numBytes += eachFileFragment.getEndKey(); if (eachFileFragment.getEndKey() > 0) { fragments.add(eachFileFragment); } } // it should keep the input order. Otherwise, it causes wrong result of sort queries. this.reset(); if (currentScanner != null) { this.projectable = currentScanner.isProjectable(); this.selectable = currentScanner.isSelectable(); } tableStats = new TableStats(); tableStats.setNumBytes(numBytes); tableStats.setNumBlocks(fragments.size()); for(Column eachColumn: schema.getColumns()) { ColumnStats columnStats = new ColumnStats(eachColumn); tableStats.addColumnStat(columnStats); } }
public void init() throws IOException { closed.set(false); firstSchdeuled.set(true); if(!inited) { smContext.requestFileScan(this); } inited = true; progress = 0.0f; tableStats = new TableStats(); if (fragment != null) { tableStats.setNumBytes(fragment.getEndKey()); tableStats.setNumBlocks(1); } if (schema != null) { for(Column eachColumn: schema.getColumns()) { ColumnStats columnStats = new ColumnStats(eachColumn); tableStats.addColumnStat(columnStats); } } }
public void init() throws IOException { inited = true; progress = 0.0f; if (fragment != null) { tableStats.setNumBytes(fragment.getEndKey()); tableStats.setNumBlocks(1); } if (schema != null) { for(Column eachColumn: schema.getColumns()) { ColumnStats columnStats = new ColumnStats(eachColumn); tableStats.addColumnStat(columnStats); } } }
this.end = start + fragment.getEndKey();
end = startOffset + fragment.getEndKey();
@Override public Scanner getScanner(TableMeta meta, Schema schema, Fragment fragment, Schema target) throws IOException { if (fragment instanceof FileFragment) { FileFragment fileFragment = (FileFragment)fragment; if (fileFragment.getEndKey() == 0) { Scanner scanner = new NullScanner(conf, schema, meta, fileFragment); scanner.setTarget(target.toArray()); return scanner; } } Scanner scanner; Class<? extends Scanner> scannerClass = getScannerClass(meta.getStoreType()); if (scannerClass == null) { throw new IOException("Unknown Storage Type: " + meta.getStoreType()); } scanner = newScannerInstance(scannerClass, conf, schema, meta, fragment); if (scanner.isProjectable()) { scanner.setTarget(target.toArray()); } if(scanner instanceof FileScannerV2) { ((FileScannerV2)scanner).setStorageManagerContext(context); } return scanner; }
@Override public Scanner getScanner(TableMeta meta, Schema schema, Fragment fragment, Schema target) throws IOException { if (fragment instanceof FileFragment) { FileFragment fileFragment = (FileFragment)fragment; if (fileFragment.getEndKey() == 0) { Scanner scanner = new NullScanner(conf, schema, meta, fileFragment); scanner.setTarget(target.toArray()); return scanner; } } Scanner scanner; Class<? extends Scanner> scannerClass = getScannerClass(meta.getStoreType()); scanner = newScannerInstance(scannerClass, conf, schema, meta, fragment); if (scanner.isProjectable()) { scanner.setTarget(target.toArray()); } return scanner; } }