/** * Handles initializing this class with objects specific to it (i.e., the parser). * Common initialization that might be leveraged by a subclass is done in * <code>doSetup</code>. Hence a subclass may choose to override this method * and call <code>doSetup</code> as well before handling it's own custom params. * * @param context */ @Override protected void setup(Context context) { doSetup(context); Configuration conf = context.getConfiguration(); parser = new ImportTsv.TsvParser(conf.get(ImportTsv.COLUMNS_CONF_KEY), separator); if (parser.getRowKeyColumnIndex() == -1) { throw new RuntimeException("No row key column specified"); } }
/** * Handles initializing this class with objects specific to it (i.e., the parser). * Common initialization that might be leveraged by a subsclass is done in * <code>doSetup</code>. Hence a subclass may choose to override this method * and call <code>doSetup</code> as well before handling it's own custom params. * * @param context */ @Override protected void setup(Context context) { doSetup(context); conf = context.getConfiguration(); parser = new ImportTsv.TsvParser(conf.get(ImportTsv.COLUMNS_CONF_KEY), separator); if (parser.getRowKeyColumnIndex() == -1) { throw new RuntimeException("No row key column specified"); } this.kvCreator = new CellCreator(conf); tags = new ArrayList<>(); }
if (i == parser.getRowKeyColumnIndex() || i == parser.getTimestampKeyColumnIndex() || i == parser.getAttributesKeyColumnIndex() || i == parser.getCellVisibilityColumnIndex() || i == parser.getCellTTLColumnIndex() || (skipEmptyColumns
/** * Handles initializing this class with objects specific to it (i.e., the parser). * Common initialization that might be leveraged by a subsclass is done in * <code>doSetup</code>. Hence a subclass may choose to override this method * and call <code>doSetup</code> as well before handling it's own custom params. * * @param context */ @Override protected void setup(Context context) { doSetup(context); Configuration conf = context.getConfiguration(); parser = new ImportTsv.TsvParser(conf.get(ImportTsv.COLUMNS_CONF_KEY), separator); if (parser.getRowKeyColumnIndex() == -1) { throw new RuntimeException("No row key column specified"); } }
@Test public void testTsvParser() throws BadTsvLineException { TsvParser parser = new TsvParser("col_a,col_b:qual,HBASE_ROW_KEY,col_d", "\t"); assertBytesEquals(Bytes.toBytes("col_a"), parser.getFamily(0)); assertBytesEquals(HConstants.EMPTY_BYTE_ARRAY, parser.getQualifier(0)); assertBytesEquals(Bytes.toBytes("col_b"), parser.getFamily(1)); assertBytesEquals(Bytes.toBytes("qual"), parser.getQualifier(1)); assertNull(parser.getFamily(2)); assertNull(parser.getQualifier(2)); assertEquals(2, parser.getRowKeyColumnIndex()); assertEquals(TsvParser.DEFAULT_TIMESTAMP_COLUMN_INDEX, parser.getTimestampKeyColumnIndex()); byte[] line = Bytes.toBytes("val_a\tval_b\tval_c\tval_d"); ParsedLine parsed = parser.parse(line, line.length); checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line))); }
/** * Handles initializing this class with objects specific to it (i.e., the parser). * Common initialization that might be leveraged by a subclass is done in * <code>doSetup</code>. Hence a subclass may choose to override this method * and call <code>doSetup</code> as well before handling it's own custom params. * * @param context */ @Override protected void setup(Context context) { doSetup(context); Configuration conf = context.getConfiguration(); parser = new ImportTsv.TsvParser(conf.get(ImportTsv.COLUMNS_CONF_KEY), separator); if (parser.getRowKeyColumnIndex() == -1) { throw new RuntimeException("No row key column specified"); } }
@Test public void testTsvParserWithTimestamp() throws BadTsvLineException { TsvParser parser = new TsvParser("HBASE_ROW_KEY,HBASE_TS_KEY,col_a,", "\t"); assertNull(parser.getFamily(0)); assertNull(parser.getQualifier(0)); assertNull(parser.getFamily(1)); assertNull(parser.getQualifier(1)); assertBytesEquals(Bytes.toBytes("col_a"), parser.getFamily(2)); assertBytesEquals(HConstants.EMPTY_BYTE_ARRAY, parser.getQualifier(2)); assertEquals(0, parser.getRowKeyColumnIndex()); assertEquals(1, parser.getTimestampKeyColumnIndex()); byte[] line = Bytes.toBytes("rowkey\t1234\tval_a"); ParsedLine parsed = parser.parse(line, line.length); assertEquals(1234l, parsed.getTimestamp(-1)); checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line))); }
@Test public void testTsvParser() throws BadTsvLineException { TsvParser parser = new TsvParser("col_a,col_b:qual,HBASE_ROW_KEY,col_d", "\t"); assertBytesEquals(Bytes.toBytes("col_a"), parser.getFamily(0)); assertBytesEquals(HConstants.EMPTY_BYTE_ARRAY, parser.getQualifier(0)); assertBytesEquals(Bytes.toBytes("col_b"), parser.getFamily(1)); assertBytesEquals(Bytes.toBytes("qual"), parser.getQualifier(1)); assertNull(parser.getFamily(2)); assertNull(parser.getQualifier(2)); assertEquals(2, parser.getRowKeyColumnIndex()); assertEquals(TsvParser.DEFAULT_TIMESTAMP_COLUMN_INDEX, parser.getTimestampKeyColumnIndex()); byte[] line = Bytes.toBytes("val_a\tval_b\tval_c\tval_d"); ParsedLine parsed = parser.parse(line, line.length); checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line))); }
@Test public void testTsvParserWithCellVisibilityCol() throws BadTsvLineException { TsvParser parser = new TsvParser( "HBASE_ROW_KEY,col_a,HBASE_TS_KEY,HBASE_ATTRIBUTES_KEY,HBASE_CELL_VISIBILITY", "\t"); assertEquals(0, parser.getRowKeyColumnIndex()); assertEquals(4, parser.getCellVisibilityColumnIndex()); byte[] line = Bytes.toBytes("rowkey\tval_a\t1234\tkey=>value\tPRIVATE&SECRET"); ParsedLine parse = parser.parse(line, line.length); assertEquals(18, parse.getAttributeKeyOffset()); assertEquals(3, parser.getAttributesKeyColumnIndex()); String attributes[] = parse.getIndividualAttributes(); assertEquals(attributes[0], "key=>value"); assertEquals(29, parse.getCellVisibilityColumnOffset()); }
@Test public void testTsvParserWithCellVisibilityCol() throws BadTsvLineException { TsvParser parser = new TsvParser( "HBASE_ROW_KEY,col_a,HBASE_TS_KEY,HBASE_ATTRIBUTES_KEY,HBASE_CELL_VISIBILITY", "\t"); assertEquals(0, parser.getRowKeyColumnIndex()); assertEquals(4, parser.getCellVisibilityColumnIndex()); byte[] line = Bytes.toBytes("rowkey\tval_a\t1234\tkey=>value\tPRIVATE&SECRET"); ParsedLine parse = parser.parse(line, line.length); assertEquals(18, parse.getAttributeKeyOffset()); assertEquals(3, parser.getAttributesKeyColumnIndex()); String attributes[] = parse.getIndividualAttributes(); assertEquals(attributes[0], "key=>value"); assertEquals(29, parse.getCellVisibilityColumnOffset()); }
if (i == parser.getRowKeyColumnIndex() || i == parser.getTimestampKeyColumnIndex() || i == parser.getAttributesKeyColumnIndex() || i == parser.getCellVisibilityColumnIndex() || i == parser.getCellTTLColumnIndex()) {
assertNull(parser.getFamily(0)); assertNull(parser.getQualifier(0)); assertEquals(0, parser.getRowKeyColumnIndex()); assertFalse(parser.hasTimestamp()); assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(1)); assertBytesEquals(Bytes.toBytes("scol1"), parser.getQualifier(1)); assertEquals(0, parser.getRowKeyColumnIndex()); assertFalse(parser.hasTimestamp()); assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(2)); assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(2)); assertEquals(0, parser.getRowKeyColumnIndex()); assertFalse(parser.hasTimestamp()); assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(3)); assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(3)); assertEquals(0, parser.getRowKeyColumnIndex()); assertTrue(parser.hasTimestamp()); assertEquals(2, parser.getTimestampKeyColumnIndex()); assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(3)); assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(3)); assertEquals(0, parser.getRowKeyColumnIndex()); assertTrue(parser.hasTimestamp()); assertEquals(2, parser.getTimestampKeyColumnIndex()); assertBytesEquals(Bytes.toBytes("col1"), parser.getFamily(3)); assertBytesEquals(Bytes.toBytes("scol2"), parser.getQualifier(3));
@Test public void testTsvParseAttributesKey() throws BadTsvLineException { TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a,HBASE_TS_KEY,HBASE_ATTRIBUTES_KEY", "\t"); assertEquals(0, parser.getRowKeyColumnIndex()); byte[] line = Bytes.toBytes("rowkey\tval_a\t1234\tkey=>value"); ParsedLine parse = parser.parse(line, line.length); assertEquals(2, parser.getRowKeyColumnIndex()); line = Bytes.toBytes("key=>value\tval_a\trowkey\t1234"); parse = parser.parse(line, line.length); assertEquals(3, parser.getRowKeyColumnIndex()); line = Bytes.toBytes("val_a\tkey0=>value0,key1=>value1,key2=>value2\t1234\trowkey"); parse = parser.parse(line, line.length);
public ParsedLine parse(byte[] lineBytes, int length) throws BadTsvLineException { // Enumerate separator offsets ArrayList<Integer> tabOffsets = new ArrayList<>(maxColumnCount); for (int i = 0; i < length; i++) { if (lineBytes[i] == separatorByte) { tabOffsets.add(i); } } if (tabOffsets.isEmpty()) { throw new BadTsvLineException("No delimiter"); } tabOffsets.add(length); if (tabOffsets.size() > maxColumnCount) { throw new BadTsvLineException("Excessive columns"); } else if (tabOffsets.size() <= getRowKeyColumnIndex()) { throw new BadTsvLineException("No row key"); } else if (hasTimestamp() && tabOffsets.size() <= getTimestampKeyColumnIndex()) { throw new BadTsvLineException("No timestamp"); } else if (hasAttributes() && tabOffsets.size() <= getAttributesKeyColumnIndex()) { throw new BadTsvLineException("No attributes specified"); } else if (hasCellVisibility() && tabOffsets.size() <= getCellVisibilityColumnIndex()) { throw new BadTsvLineException("No cell visibility specified"); } else if (hasCellTTL() && tabOffsets.size() <= getCellTTLColumnIndex()) { throw new BadTsvLineException("No cell TTL specified"); } return new ParsedLine(tabOffsets, lineBytes); }
@Test public void testTsvParserParseRowKey() throws BadTsvLineException { TsvParser parser = new TsvParser("HBASE_ROW_KEY,col_a,HBASE_TS_KEY", "\t"); assertEquals(0, parser.getRowKeyColumnIndex()); byte[] line = Bytes.toBytes("rowkey\tval_a\t1234"); Pair<Integer, Integer> rowKeyOffsets = parser.parseRowKey(line, line.length); assertEquals(1, parser.getRowKeyColumnIndex()); line = Bytes.toBytes("val_a\trowkey\t1234"); rowKeyOffsets = parser.parseRowKey(line, line.length); assertEquals(2, parser.getRowKeyColumnIndex()); line = Bytes.toBytes("val_a\t1234\trowkey"); rowKeyOffsets = parser.parseRowKey(line, line.length);
if (i == length || lineBytes[i] == separatorByte) { endPos = i - 1; if (rkColumnIndex++ == getRowKeyColumnIndex()) { if ((endPos + 1) == startPos) { throw new BadTsvLineException("Empty value for ROW KEY.");
@Test public void testTsvParserWithTimestamp() throws BadTsvLineException { TsvParser parser = new TsvParser("HBASE_ROW_KEY,HBASE_TS_KEY,col_a,", "\t"); assertNull(parser.getFamily(0)); assertNull(parser.getQualifier(0)); assertNull(parser.getFamily(1)); assertNull(parser.getQualifier(1)); assertBytesEquals(Bytes.toBytes("col_a"), parser.getFamily(2)); assertBytesEquals(HConstants.EMPTY_BYTE_ARRAY, parser.getQualifier(2)); assertEquals(0, parser.getRowKeyColumnIndex()); assertEquals(1, parser.getTimestampKeyColumnIndex()); byte[] line = Bytes.toBytes("rowkey\t1234\tval_a"); ParsedLine parsed = parser.parse(line, line.length); assertEquals(1234l, parsed.getTimestamp(-1)); checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line))); }
/** * Handles initializing this class with objects specific to it (i.e., the parser). * Common initialization that might be leveraged by a subsclass is done in * <code>doSetup</code>. Hence a subclass may choose to override this method * and call <code>doSetup</code> as well before handling it's own custom params. * * @param context */ @Override protected void setup(Context context) { Configuration conf = context.getConfiguration(); doSetup(context, conf); parser = new ImportTsv.TsvParser(conf.get(ImportTsv.COLUMNS_CONF_KEY), separator); if (parser.getRowKeyColumnIndex() == -1) { throw new RuntimeException("No row key column specified"); } this.kvCreator = new CellCreator(conf); }
@Test public void testTsvParser() throws BadTsvLineException { TsvParser parser = new TsvParser("col_a,col_b:qual,HBASE_ROW_KEY,col_d", "\t"); assertBytesEquals(Bytes.toBytes("col_a"), parser.getFamily(0)); assertBytesEquals(HConstants.EMPTY_BYTE_ARRAY, parser.getQualifier(0)); assertBytesEquals(Bytes.toBytes("col_b"), parser.getFamily(1)); assertBytesEquals(Bytes.toBytes("qual"), parser.getQualifier(1)); assertNull(parser.getFamily(2)); assertNull(parser.getQualifier(2)); assertEquals(2, parser.getRowKeyColumnIndex()); assertEquals(TsvParser.DEFAULT_TIMESTAMP_COLUMN_INDEX, parser.getTimestampKeyColumnIndex()); byte[] line = Bytes.toBytes("val_a\tval_b\tval_c\tval_d"); ParsedLine parsed = parser.parse(line, line.length); checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line))); }
@Test public void testTsvParserWithCellVisibilityCol() throws BadTsvLineException { TsvParser parser = new TsvParser( "HBASE_ROW_KEY,col_a,HBASE_TS_KEY,HBASE_ATTRIBUTES_KEY,HBASE_CELL_VISIBILITY", "\t"); assertEquals(0, parser.getRowKeyColumnIndex()); assertEquals(4, parser.getCellVisibilityColumnIndex()); byte[] line = Bytes.toBytes("rowkey\tval_a\t1234\tkey=>value\tPRIVATE&SECRET"); ParsedLine parse = parser.parse(line, line.length); assertEquals(18, parse.getAttributeKeyOffset()); assertEquals(3, parser.getAttributesKeyColumnIndex()); String attributes[] = parse.getIndividualAttributes(); assertEquals(attributes[0], "key=>value"); assertEquals(29, parse.getCellVisibilityColumnOffset()); }