/** * Convert a line of TSV text into an HBase table row. */ @Override public void map(LongWritable offset, Text value, Context context) throws IOException { try { Pair<Integer,Integer> rowKeyOffests = parser.parseRowKey(value.getBytes(), value.getLength()); ImmutableBytesWritable rowKey = new ImmutableBytesWritable( value.getBytes(), rowKeyOffests.getFirst(), rowKeyOffests.getSecond()); context.write(rowKey, value); } catch (ImportTsv.TsvParser.BadTsvLineException|IllegalArgumentException badLine) { if (logBadLines) { System.err.println(value); } System.err.println("Bad line at offset: " + offset.get() + ":\n" + badLine.getMessage()); if (skipBadLines) { incrementBadLineCount(1); return; } throw new IOException(badLine); } catch (InterruptedException e) { LOG.error("Interrupted while emitting TSV text", e); Thread.currentThread().interrupt(); } } }
assertEquals(0, parser.getRowKeyColumnIndex()); byte[] line = Bytes.toBytes("rowkey\tval_a\t1234"); Pair<Integer, Integer> rowKeyOffsets = parser.parseRowKey(line, line.length); assertEquals(0, rowKeyOffsets.getFirst().intValue()); assertEquals(6, rowKeyOffsets.getSecond().intValue()); try { line = Bytes.toBytes("\t\tval_a\t1234"); parser.parseRowKey(line, line.length); fail("Should get BadTsvLineException on empty rowkey."); } catch (BadTsvLineException b) { assertEquals(1, parser.getRowKeyColumnIndex()); line = Bytes.toBytes("val_a\trowkey\t1234"); rowKeyOffsets = parser.parseRowKey(line, line.length); assertEquals(6, rowKeyOffsets.getFirst().intValue()); assertEquals(6, rowKeyOffsets.getSecond().intValue()); try { line = Bytes.toBytes("val_a"); rowKeyOffsets = parser.parseRowKey(line, line.length); fail("Should get BadTsvLineException when number of columns less than rowkey position."); } catch (BadTsvLineException b) { assertEquals(2, parser.getRowKeyColumnIndex()); line = Bytes.toBytes("val_a\t1234\trowkey"); rowKeyOffsets = parser.parseRowKey(line, line.length); assertEquals(11, rowKeyOffsets.getFirst().intValue()); assertEquals(6, rowKeyOffsets.getSecond().intValue());
/** * Convert a line of TSV text into an HBase table row. */ @Override public void map(LongWritable offset, Text value, Context context) throws IOException { try { Pair<Integer,Integer> rowKeyOffests = parser.parseRowKey(value.getBytes(), value.getLength()); ImmutableBytesWritable rowKey = new ImmutableBytesWritable( value.getBytes(), rowKeyOffests.getFirst(), rowKeyOffests.getSecond()); context.write(rowKey, value); } catch (ImportTsv.TsvParser.BadTsvLineException|IllegalArgumentException badLine) { if (logBadLines) { System.err.println(value); } System.err.println("Bad line at offset: " + offset.get() + ":\n" + badLine.getMessage()); if (skipBadLines) { incrementBadLineCount(1); return; } throw new IOException(badLine); } catch (InterruptedException e) { e.printStackTrace(); Thread.currentThread().interrupt(); } } }
public void map(LongWritable offset, Text value, Context context) throws IOException { try { Pair<Integer,Integer> rowKeyOffests = parser.parseRowKey(value.getBytes(), value.getLength()); ImmutableBytesWritable rowKey = new ImmutableBytesWritable( value.getBytes(), rowKeyOffests.getFirst(), rowKeyOffests.getSecond());
assertEquals(0, parser.getRowKeyColumnIndex()); byte[] line = Bytes.toBytes("rowkey\tval_a\t1234"); Pair<Integer, Integer> rowKeyOffsets = parser.parseRowKey(line, line.length); assertEquals(0, rowKeyOffsets.getFirst().intValue()); assertEquals(6, rowKeyOffsets.getSecond().intValue()); try { line = Bytes.toBytes("\t\tval_a\t1234"); parser.parseRowKey(line, line.length); fail("Should get BadTsvLineException on empty rowkey."); } catch (BadTsvLineException b) { assertEquals(1, parser.getRowKeyColumnIndex()); line = Bytes.toBytes("val_a\trowkey\t1234"); rowKeyOffsets = parser.parseRowKey(line, line.length); assertEquals(6, rowKeyOffsets.getFirst().intValue()); assertEquals(6, rowKeyOffsets.getSecond().intValue()); try { line = Bytes.toBytes("val_a"); rowKeyOffsets = parser.parseRowKey(line, line.length); fail("Should get BadTsvLineException when number of columns less than rowkey position."); } catch (BadTsvLineException b) { assertEquals(2, parser.getRowKeyColumnIndex()); line = Bytes.toBytes("val_a\t1234\trowkey"); rowKeyOffsets = parser.parseRowKey(line, line.length); assertEquals(11, rowKeyOffsets.getFirst().intValue()); assertEquals(6, rowKeyOffsets.getSecond().intValue());
assertEquals(0, parser.getRowKeyColumnIndex()); byte[] line = Bytes.toBytes("rowkey\tval_a\t1234"); Pair<Integer, Integer> rowKeyOffsets = parser.parseRowKey(line, line.length); assertEquals(0, rowKeyOffsets.getFirst().intValue()); assertEquals(6, rowKeyOffsets.getSecond().intValue()); try { line = Bytes.toBytes("\t\tval_a\t1234"); parser.parseRowKey(line, line.length); fail("Should get BadTsvLineException on empty rowkey."); } catch (BadTsvLineException b) { assertEquals(1, parser.getRowKeyColumnIndex()); line = Bytes.toBytes("val_a\trowkey\t1234"); rowKeyOffsets = parser.parseRowKey(line, line.length); assertEquals(6, rowKeyOffsets.getFirst().intValue()); assertEquals(6, rowKeyOffsets.getSecond().intValue()); try { line = Bytes.toBytes("val_a"); rowKeyOffsets = parser.parseRowKey(line, line.length); fail("Should get BadTsvLineException when number of columns less than rowkey position."); } catch (BadTsvLineException b) { assertEquals(2, parser.getRowKeyColumnIndex()); line = Bytes.toBytes("val_a\t1234\trowkey"); rowKeyOffsets = parser.parseRowKey(line, line.length); assertEquals(11, rowKeyOffsets.getFirst().intValue()); assertEquals(6, rowKeyOffsets.getSecond().intValue());
/** * Convert a line of TSV text into an HBase table row. */ @Override public void map(LongWritable offset, Text value, Context context) throws IOException { try { Pair<Integer,Integer> rowKeyOffests = parser.parseRowKey(value.getBytes(), value.getLength()); ImmutableBytesWritable rowKey = new ImmutableBytesWritable( value.getBytes(), rowKeyOffests.getFirst(), rowKeyOffests.getSecond()); context.write(rowKey, value); } catch (ImportTsv.TsvParser.BadTsvLineException|IllegalArgumentException badLine) { if (logBadLines) { System.err.println(value); } System.err.println("Bad line at offset: " + offset.get() + ":\n" + badLine.getMessage()); if (skipBadLines) { incrementBadLineCount(1); return; } throw new IOException(badLine); } catch (InterruptedException e) { e.printStackTrace(); Thread.currentThread().interrupt(); } } }