@Test public void testTsvParserParseRowKey() throws BadTsvLineException { TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a,HBASE_TS_KEY", "\t"); assertEquals(0, parser.getRowKeyColumnIndex()); byte[] line = Bytes.toBytes("rowkey\tval_a\t1234"); Pair<Integer, Integer> rowKeyOffsets = parser.parseRowKey(line, line.length); assertEquals(0, rowKeyOffsets.getFirst().intValue()); assertEquals(6, rowKeyOffsets.getSecond().intValue()); try { line = Bytes.toBytes("\t\tval_a\t1234"); parser.parseRowKey(line, line.length); fail("Should get BadTsvLineException on empty rowkey."); } catch (BadTsvLineException b) { parser = new TsvParser("col_a,HBASE_ROW_KEY,HBASE_TS_KEY", "\t"); assertEquals(1, parser.getRowKeyColumnIndex()); line = Bytes.toBytes("val_a\trowkey\t1234"); rowKeyOffsets = parser.parseRowKey(line, line.length); assertEquals(6, rowKeyOffsets.getFirst().intValue()); assertEquals(6, rowKeyOffsets.getSecond().intValue()); try { line = Bytes.toBytes("val_a"); rowKeyOffsets = parser.parseRowKey(line, line.length); fail("Should get BadTsvLineException when number of columns less than rowkey position."); } catch (BadTsvLineException b) { parser = new TsvParser("col_a,HBASE_TS_KEY,HBASE_ROW_KEY", "\t"); assertEquals(2, parser.getRowKeyColumnIndex()); line = Bytes.toBytes("val_a\t1234\trowkey"); rowKeyOffsets = parser.parseRowKey(line, line.length);
byte[] lineBytes = line.getBytes(); try { ImportTsv.TsvParser.ParsedLine parsed = parser.parse(lineBytes, line.getLength()); if (i == parser.getRowKeyColumnIndex() || i == parser.getTimestampKeyColumnIndex() || i == parser.getAttributesKeyColumnIndex() || i == parser.getCellVisibilityColumnIndex() || i == parser.getCellTTLColumnIndex()) { continue; parsed.getRowKeyLength(), parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i), tags); KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
public ParsedLine parse(byte[] lineBytes, int length) throws BadTsvLineException { // Enumerate separator offsets ArrayList<Integer> tabOffsets = new ArrayList<>(maxColumnCount); for (int i = 0; i < length; i++) { if (lineBytes[i] == separatorByte) { tabOffsets.add(i); } } if (tabOffsets.isEmpty()) { throw new BadTsvLineException("No delimiter"); } tabOffsets.add(length); if (tabOffsets.size() > maxColumnCount) { throw new BadTsvLineException("Excessive columns"); } else if (tabOffsets.size() <= getRowKeyColumnIndex()) { throw new BadTsvLineException("No row key"); } else if (hasTimestamp() && tabOffsets.size() <= getTimestampKeyColumnIndex()) { throw new BadTsvLineException("No timestamp"); } else if (hasAttributes() && tabOffsets.size() <= getAttributesKeyColumnIndex()) { throw new BadTsvLineException("No attributes specified"); } else if (hasCellVisibility() && tabOffsets.size() <= getCellVisibilityColumnIndex()) { throw new BadTsvLineException("No cell visibility specified"); } else if (hasCellTTL() && tabOffsets.size() <= getCellTTLColumnIndex()) { throw new BadTsvLineException("No cell TTL specified"); } return new ParsedLine(tabOffsets, lineBytes); }
byte[] lineBytes = line.getBytes(); try { ImportTsv.TsvParser.ParsedLine parsed = parser.parse(lineBytes, line.getLength()); if (i == parser.getRowKeyColumnIndex() || i == parser.getTimestampKeyColumnIndex() || i == parser.getAttributesKeyColumnIndex() || i == parser.getCellVisibilityColumnIndex() || i == parser.getCellTTLColumnIndex()) { continue; parsed.getRowKeyLength(), parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i), tags); KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
byte[] lineBytes = line.getBytes(); try { ImportTsv.TsvParser.ParsedLine parsed = parser.parse(lineBytes, line.getLength()); if (i == parser.getRowKeyColumnIndex() || i == parser.getTimestampKeyColumnIndex() || i == parser.getAttributesKeyColumnIndex() || i == parser.getCellVisibilityColumnIndex() || i == parser.getCellTTLColumnIndex()) { continue; parsed.getRowKeyLength(), parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i), tags); KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
ImportTsv.TsvParser.ParsedLine parsed = parser.parse( lineBytes, value.getLength()); ImmutableBytesWritable rowKey = if (i == parser.getRowKeyColumnIndex() || i == parser.getTimestampKeyColumnIndex() || i == parser.getAttributesKeyColumnIndex() || i == parser.getCellVisibilityColumnIndex() || i == parser.getCellTTLColumnIndex()) { continue;
byte[] lineBytes = line.getBytes(); try { ImportTsv.TsvParser.ParsedLine parsed = parser.parse(lineBytes, line.getLength()); if (i == parser.getRowKeyColumnIndex() || i == parser.getTimestampKeyColumnIndex() || i == parser.getAttributesKeyColumnIndex() || i == parser.getCellVisibilityColumnIndex() || i == parser.getCellTTLColumnIndex()) { continue; parsed.getRowKeyLength(), parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i), tags); KeyValue kv = KeyValueUtil.ensureKeyValueTypeForMR(cell);
ImportTsv.TsvParser.ParsedLine parsed = parser.parse( lineBytes, value.getLength()); ImmutableBytesWritable rowKey = if (i == parser.getRowKeyColumnIndex() || i == parser.getTimestampKeyColumnIndex()) { continue; parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, KeyValue.Type.Put,
parser = new TsvParser("HBASE_ROW_KEY", "\t"); assertNull(parser.getFamily(0)); assertNull(parser.getQualifier(0)); assertEquals(0, parser.getRowKeyColumnIndex()); assertFalse(parser.hasTimestamp()); parser = new TsvParser("HBASE_ROW_KEY,col1:scol1", "\t"); assertNull(parser.getFamily(0)); assertNull(parser.getQualifier(0)); assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1)); assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1)); assertEquals(0, parser.getRowKeyColumnIndex()); assertFalse(parser.hasTimestamp()); parser = new TsvParser("HBASE_ROW_KEY,col1:scol1,col1:scol2", "\t"); assertNull(parser.getFamily(0)); assertNull(parser.getQualifier(0)); assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1)); assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1)); assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(2)); assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(2)); assertEquals(0, parser.getRowKeyColumnIndex()); assertFalse(parser.hasTimestamp()); parser = new TsvParser("HBASE_ROW_KEY,col1:scol1,HBASE_TS_KEY,col1:scol2", "\t"); assertNull(parser.getFamily(0)); assertNull(parser.getQualifier(0)); assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1)); assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1));
ImportTsv.TsvParser.ParsedLine parsed = parser.parse( lineBytes, value.getLength()); ImmutableBytesWritable rowKey = if (i == parser.getRowKeyColumnIndex() || i == parser.getTimestampKeyColumnIndex() || i == parser.getAttributesKeyColumnIndex() || i == parser.getCellVisibilityColumnIndex() || i == parser.getCellTTLColumnIndex() || (skipEmptyColumns && parsed.getColumnLength(i) == 0)) { continue;
ImportTsv.TsvParser.ParsedLine parsed = parser.parse( lineBytes, value.getLength()); ImmutableBytesWritable rowKey = if (i == parser.getRowKeyColumnIndex() || i == parser.getTimestampKeyColumnIndex() || i == parser.getAttributesKeyColumnIndex() || i == parser.getCellVisibilityColumnIndex() || i == parser.getCellTTLColumnIndex() || (skipEmptyColumns && parsed.getColumnLength(i) == 0)) { continue;
parser = new TsvParser("HBASE_ROW_KEY", "\t"); assertNull(parser.getFamily(0)); assertNull(parser.getQualifier(0)); assertEquals(0, parser.getRowKeyColumnIndex()); assertFalse(parser.hasTimestamp()); parser = new TsvParser("HBASE_ROW_KEY,col1:scol1", "\t"); assertNull(parser.getFamily(0)); assertNull(parser.getQualifier(0)); assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1)); assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1)); assertEquals(0, parser.getRowKeyColumnIndex()); assertFalse(parser.hasTimestamp()); parser = new TsvParser("HBASE_ROW_KEY,col1:scol1,col1:scol2", "\t"); assertNull(parser.getFamily(0)); assertNull(parser.getQualifier(0)); assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1)); assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1)); assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(2)); assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(2)); assertEquals(0, parser.getRowKeyColumnIndex()); assertFalse(parser.hasTimestamp()); parser = new TsvParser("HBASE_ROW_KEY,col1:scol1,HBASE_TS_KEY,col1:scol2", "\t"); assertNull(parser.getFamily(0)); assertNull(parser.getQualifier(0)); assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1)); assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1));
@Test public void testTsvParseAttributesKey() throws BadTsvLineException { TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a,HBASE_TS_KEY,HBASE_ATTRIBUTES_KEY", "\t"); assertEquals(0, parser.getRowKeyColumnIndex()); byte[] line = Bytes.toBytes("rowkey\tval_a\t1234\tkey=>value"); ParsedLine parse = parser.parse(line, line.length); assertEquals(18, parse.getAttributeKeyOffset()); assertEquals(3, parser.getAttributesKeyColumnIndex()); String attributes[] = parse.getIndividualAttributes(); assertEquals(attributes[0], "key=>value"); try { line = Bytes.toBytes("rowkey\tval_a\t1234"); parser.parse(line, line.length); fail("Should get BadTsvLineException on empty rowkey."); } catch (BadTsvLineException b) { parser = new TsvParser("HBASE_ATTRIBUTES_KEY,col_a,HBASE_ROW_KEY,HBASE_TS_KEY", "\t"); assertEquals(2, parser.getRowKeyColumnIndex()); line = Bytes.toBytes("key=>value\tval_a\trowkey\t1234"); parse = parser.parse(line, line.length); assertEquals(0, parse.getAttributeKeyOffset()); assertEquals(0, parser.getAttributesKeyColumnIndex()); attributes = parse.getIndividualAttributes(); assertEquals(attributes[0], "key=>value"); try { line = Bytes.toBytes("val_a"); ParsedLine parse2 = parser.parse(line, line.length); fail("Should get BadTsvLineException when number of columns less than rowkey position."); } catch (BadTsvLineException b) {
if (i == length || lineBytes[i] == separatorByte) { endPos = i - 1; if (rkColumnIndex++ == getRowKeyColumnIndex()) { if ((endPos + 1) == startPos) { throw new BadTsvLineException("Empty value for ROW KEY.");
@Test public void testTsvParseAttributesKey() throws BadTsvLineException { TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a,HBASE_TS_KEY,HBASE_ATTRIBUTES_KEY", "\t"); assertEquals(0, parser.getRowKeyColumnIndex()); byte[] line = Bytes.toBytes("rowkey\tval_a\t1234\tkey=>value"); ParsedLine parse = parser.parse(line, line.length); assertEquals(18, parse.getAttributeKeyOffset()); assertEquals(3, parser.getAttributesKeyColumnIndex()); String attributes[] = parse.getIndividualAttributes(); assertEquals(attributes[0], "key=>value"); try { line = Bytes.toBytes("rowkey\tval_a\t1234"); parser.parse(line, line.length); fail("Should get BadTsvLineException on empty rowkey."); } catch (BadTsvLineException b) { parser = new TsvParser("HBASE_ATTRIBUTES_KEY,col_a,HBASE_ROW_KEY,HBASE_TS_KEY", "\t"); assertEquals(2, parser.getRowKeyColumnIndex()); line = Bytes.toBytes("key=>value\tval_a\trowkey\t1234"); parse = parser.parse(line, line.length); assertEquals(0, parse.getAttributeKeyOffset()); assertEquals(0, parser.getAttributesKeyColumnIndex()); attributes = parse.getIndividualAttributes(); assertEquals(attributes[0], "key=>value"); try { line = Bytes.toBytes("val_a"); ParsedLine parse2 = parser.parse(line, line.length); fail("Should get BadTsvLineException when number of columns less than rowkey position."); } catch (BadTsvLineException b) {
if (hfileOutPath == null) { cell = new KeyValue(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(), parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, KeyValue.Type.Put, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i)); if (cellVisibilityExpr != null) { parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i), tags);
if (i == length || lineBytes[i] == separatorByte) { endPos = i - 1; if (rkColumnIndex++ == getRowKeyColumnIndex()) { if ((endPos + 1) == startPos) { throw new BadTsvLineException("Empty value for ROW KEY.");
if (i == length || lineBytes[i] == separatorByte) { endPos = i - 1; if (rkColumnIndex++ == getRowKeyColumnIndex()) { if ((endPos + 1) == startPos) { throw new BadTsvLineException("Empty value for ROW KEY.");
public void map(LongWritable offset, Text value, Context context) throws IOException { try { Pair<Integer,Integer> rowKeyOffests = parser.parseRowKey(value.getBytes(), value.getLength()); ImmutableBytesWritable rowKey = new ImmutableBytesWritable( value.getBytes(), rowKeyOffests.getFirst(), rowKeyOffests.getSecond());
@Test public void testTsvParserParseRowKey() throws BadTsvLineException { TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a,HBASE_TS_KEY", "\t"); assertEquals(0, parser.getRowKeyColumnIndex()); byte[] line = Bytes.toBytes("rowkey\tval_a\t1234"); Pair<Integer, Integer> rowKeyOffsets = parser.parseRowKey(line, line.length); assertEquals(0, rowKeyOffsets.getFirst().intValue()); assertEquals(6, rowKeyOffsets.getSecond().intValue()); try { line = Bytes.toBytes("\t\tval_a\t1234"); parser.parseRowKey(line, line.length); fail("Should get BadTsvLineException on empty rowkey."); } catch (BadTsvLineException b) { parser = new TsvParser("col_a,HBASE_ROW_KEY,HBASE_TS_KEY", "\t"); assertEquals(1, parser.getRowKeyColumnIndex()); line = Bytes.toBytes("val_a\trowkey\t1234"); rowKeyOffsets = parser.parseRowKey(line, line.length); assertEquals(6, rowKeyOffsets.getFirst().intValue()); assertEquals(6, rowKeyOffsets.getSecond().intValue()); try { line = Bytes.toBytes("val_a"); rowKeyOffsets = parser.parseRowKey(line, line.length); fail("Should get BadTsvLineException when number of columns less than rowkey position."); } catch (BadTsvLineException b) { parser = new TsvParser("col_a,HBASE_TS_KEY,HBASE_ROW_KEY", "\t"); assertEquals(2, parser.getRowKeyColumnIndex()); line = Bytes.toBytes("val_a\t1234\trowkey"); rowKeyOffsets = parser.parseRowKey(line, line.length);