public OrcOiBatchToRowReader(RecordReader<NullWritable, VectorizedRowBatch> vrbReader, VectorizedRowBatchCtx vrbCtx, List<Integer> includedCols) { super(vrbReader, vrbCtx, includedCols); this.recordIdentifier = new RecordIdentifier(); this.isNull = true; }
private int compareTo(RecordIdentifier other) { if (other == null) { return -1; } otherKey.set(other.getWriteId(), other.getBucketProperty(), other.getRowId()); return compareTo(otherKey); } @Override
@Test public void TestOrdering() throws Exception { RecordIdentifier left = new RecordIdentifier(100, 200, 1200); RecordIdentifier right = new RecordIdentifier(); right.setValues(100L, 200, 1000L); assertTrue(right.compareTo(left) < 0); assertTrue(left.compareTo(right) > 0); left.set(right); assertTrue(right.compareTo(left) == 0); right.setRowId(2000); assertTrue(right.compareTo(left) > 0); left.setValues(1, 2, 3); right.setValues(100, 2, 3); assertTrue(left.compareTo(right) < 0); assertTrue(right.compareTo(left) > 0); left.setValues(1, 2, 3); right.setValues(1, 100, 3); assertTrue(left.compareTo(right) < 0); assertTrue(right.compareTo(left) > 0); }
@Override public String toString() { return "RecordIdentifier(" + writeId + ", " + bucketToString(bucketId) + "," + getRowId() +")"; } public static String bucketToString(int bucketId) {
@Override public int compareTo(RecordIdentifier other) { if (other.getClass() != RecordIdentifier.class) { //WTF? assumes that other instanceof OrcRawRecordMerger.ReaderKey??? return -other.compareTo(this); } return compareToInternal(other); }
boolean isInSequence(RecordIdentifier recordIdentifier) { if (lastWriteId != null && recordIdentifier.getWriteId() < lastWriteId) { LOG.debug("Non-sequential write ID. Expected >{}, recordIdentifier={}", lastWriteId, recordIdentifier); return false; } else if (lastWriteId != null && recordIdentifier.getWriteId() == lastWriteId && lastRowId != null && recordIdentifier.getRowId() <= lastRowId) { LOG.debug("Non-sequential row ID. Expected >{}, recordIdentifier={}", lastRowId, recordIdentifier); return false; } lastWriteId = recordIdentifier.getWriteId(); lastRowId = recordIdentifier.getRowId(); return true; }
Record record = new Record(new RecordIdentifier(recordIdentifier.getWriteId(), recordIdentifier.getBucketProperty(), recordIdentifier.getRowId()), value.toString()); System.out.println(record); records.add(record);
static RecordIdentifier[] parseKeyIndex(Reader reader) { String[] stripes; try { ByteBuffer val = reader.getMetadataValue(OrcRecordUpdater.ACID_KEY_INDEX_NAME) .duplicate(); stripes = utf8Decoder.decode(val).toString().split(";"); } catch (CharacterCodingException e) { throw new IllegalArgumentException("Bad string encoding for " + OrcRecordUpdater.ACID_KEY_INDEX_NAME, e); } RecordIdentifier[] result = new RecordIdentifier[stripes.length]; for(int i=0; i < stripes.length; ++i) { if (stripes[i].length() != 0) { String[] parts = stripes[i].split(","); result[i] = new RecordIdentifier(); result[i].setValues(Long.parseLong(parts[0]), Integer.parseInt(parts[1]), Long.parseLong(parts[2])); } } return result; }
minKey.setRowId(minKey.getRowId() + rowIdOffset); } else { newMinKey = new RecordIdentifier(writeId, bucketProperty,rowIdOffset - 1); maxKey.setRowId(maxKey.getRowId() + rowIdOffset); newMaxKey = new RecordIdentifier(writeId, bucketProperty, rowIdOffset + reader.getNumberOfRows() - 1);
RecordIdentifier minKey = new RecordIdentifier(syntheticProps.getSyntheticWriteId(), syntheticProps.getBucketProperty(), minRowId); RecordIdentifier maxKey = new RecordIdentifier(syntheticProps.getSyntheticWriteId(), syntheticProps.getBucketProperty(), maxRowId > 0? maxRowId - 1: 0); setSARG(keyIntervalTmp, deleteEventReaderOptions, minKey.getBucketProperty(), maxKey.getBucketProperty(), minKey.getRowId(), maxKey.getRowId());
RecordIdentifier ri = new RecordIdentifier(1, 2, 3); assertEquals(1, ri.compareTo(left)); assertEquals(-1, left.compareTo(ri)); assertEquals(false, ri.equals(left)); assertEquals(false, left.equals(ri));
/** * Copies relevant fields from {@code ri} to {@code struct} * @param ri * @param struct must be of size Field.values().size() */ public static void toArray(RecordIdentifier ri, Object[] struct) { assert struct != null && struct.length == Field.values().length; if(ri == null) { Arrays.fill(struct, null); return; } struct[Field.transactionId.ordinal()] = ri.getTransactionId(); struct[Field.bucketId.ordinal()] = ri.getBucketId(); struct[Field.rowId.ordinal()] = ri.getRowId(); } }
@Override public void set(RecordIdentifier other) { super.set(other); currentWriteId = ((ReaderKey) other).currentWriteId; isDeleteEvent = ((ReaderKey) other).isDeleteEvent; }
@Override public void readFields(DataInput dataInput) throws IOException { boolean notNull = dataInput.readBoolean(); if (notNull) { partVal = new Text(); partVal.readFields(dataInput); } notNull = dataInput.readBoolean(); if (notNull) { recId = new RecordIdentifier(); recId.readFields(dataInput); } } @Override
public RecordIdentifier getRecordIdentifier() { RecordIdentifier copy = new RecordIdentifier(); copy.set(recordIdentifier); LOG.debug("Fetched recordIdentifier={}", recordIdentifier); return copy; }
@Test public void testHashEquals() throws Exception { long origTxn = ThreadLocalRandom.current().nextLong(1, 10000000000L); int bucketId = ThreadLocalRandom.current().nextInt(1, 512); long rowId = ThreadLocalRandom.current().nextLong(1, 10000000000L); long currTxn = origTxn + ThreadLocalRandom.current().nextLong(0, 10000000000L); RecordIdentifier left = new RecordIdentifier(origTxn, bucketId, rowId); RecordIdentifier right = new RecordIdentifier(origTxn, bucketId, rowId); OrcRawRecordMerger.ReaderKey rkLeft = new OrcRawRecordMerger.ReaderKey(origTxn, bucketId, rowId, currTxn); OrcRawRecordMerger.ReaderKey rkRight = new OrcRawRecordMerger.ReaderKey(origTxn, bucketId, rowId, currTxn); assertEquals("RecordIdentifier.equals", left, right); assertEquals("RecordIdentifier.hashCode", left.hashCode(), right.hashCode()); assertEquals("ReaderKey", rkLeft, rkLeft); assertEquals("ReaderKey.hashCode", rkLeft.hashCode(), rkRight.hashCode()); //debatable if this is correct, but that's how it's implemented assertNotEquals("RecordIdentifier <> ReaderKey", left, rkRight); } }
RecordIdentifier ri = new RecordIdentifier(maxTxn - 1, bucket, i); ri.write(out); out.writeBytes("mary had a little lamb its fleece was white as snow\n");
@Override public String toString() { return "{originalTxn: " + transactionId + ", bucket: " + bucketId + ", row: " + getRowId() + "}"; } }
@Override public boolean equals(Object other) { return super.equals(other) && currentWriteId == ((ReaderKey) other).currentWriteId; } @Override
/** * Convert a RecordIdentifier. This is done so that we can use the RecordIdentifier in place * of the bucketing column. * @param i RecordIdentifier to convert * @return value of the bucket identifier */ public IntWritable evaluate(RecordIdentifier i) { if (i == null) { return null; } else { BucketCodec decoder = BucketCodec.determineVersion(i.getBucketProperty()); intWritable.set(decoder.decodeWriterId(i.getBucketProperty())); return intWritable; } }