Refine search
/** * Convert a string to Text format and write its bytes in the same way TextOutputFormat would do. * This is needed to properly encode non-ascii characters. */ private static void writeAsText(String text, FSDataOutputStream out) throws IOException { Text to = new Text(text); out.write(to.getBytes(), 0, to.getLength()); }
private static boolean find(Text s, Text sub, int startS, int endS) { byte[] byteS = s.getBytes(); byte[] byteSub = sub.getBytes(); int lenSub = sub.getLength(); boolean match = false; for (int i = startS; (i < endS - lenSub + 1) && (!match); i++) { match = true; for (int j = 0; j < lenSub; j++) { if (byteS[j + i] != byteSub[j]) { match = false; break; } } } return match; }
public Text evaluate(Text s, IntWritable n) { if (n == null || s == null) { return null; } int len = n.get() * s.getLength(); if (len < 0) { len = 0; } byte[] data = result.getBytes(); if (data.length < len) { data = new byte[len]; } for (int i = 0; i < len; i += s.getLength()) { for (int j = 0; j < s.getLength(); j++) { data[i + j] = s.getBytes()[j]; } } result.set(data, 0, len); return result; } }
/** * Write a String as a VInt n, followed by n Bytes as in Text format. * * @param out * @param s * @throws IOException */ public static void writeString(DataOutput out, String s) throws IOException { if (s != null) { Text text = new Text(s); byte[] buffer = text.getBytes(); int len = text.getLength(); writeVInt(out, len); out.write(buffer, 0, len); } else { writeVInt(out, -1); } }
private void testWriterText(TypeInfo type) throws HiveException { Text t1 = new Text("alpha"); Text t2 = new Text("beta"); BytesColumnVector bcv = new BytesColumnVector(vectorSize); bcv.noNulls = false; bcv.initBuffer(); bcv.setVal(0, t1.getBytes(), 0, t1.getLength()); bcv.isNull[1] = true; bcv.setVal(2, t2.getBytes(), 0, t2.getLength()); bcv.isNull[3] = true; bcv.setVal(4, t1.getBytes(), 0, t1.getLength()); VectorExpressionWriter vew = getWriter(type); for (int i = 0; i < vectorSize; i++) { Writable w = (Writable) vew.writeValue(bcv, i); if (w != null) { byte [] val = new byte[bcv.length[i]]; System.arraycopy(bcv.vector[i], bcv.start[i], val, 0, bcv.length[i]); Writable expected = getWritableValue(type, val); Assert.assertEquals(expected, w); } else { Assert.assertTrue(bcv.isNull[i]); } } }
private void testSetterText(TypeInfo type) throws HiveException { Text t1 = new Text("alpha"); Text t2 = new Text("beta"); BytesColumnVector bcv = new BytesColumnVector(vectorSize); bcv.noNulls = false; bcv.initBuffer(); bcv.setVal(0, t1.getBytes(), 0, t1.getLength()); bcv.isNull[1] = true; bcv.setVal(2, t2.getBytes(), 0, t2.getLength()); bcv.isNull[3] = true; bcv.setVal(4, t1.getBytes(), 0, t1.getLength()); Object[] values = new Object[this.vectorSize]; VectorExpressionWriter vew = getWriter(type); for (int i = 0; i < vectorSize; i++) { values[i] = null; // setValue() should be able to handle null input Writable w = (Writable) vew.setValue(values[i], bcv, i); if (w != null) { byte [] val = new byte[bcv.length[i]]; System.arraycopy(bcv.vector[i], bcv.start[i], val, 0, bcv.length[i]); Writable expected = getWritableValue(type, val); Assert.assertEquals(expected, w); } else { Assert.assertTrue(bcv.isNull[i]); } } }
private static boolean find(Text s, Text sub, int startS, int endS) { byte[] byteS = s.getBytes(); byte[] byteSub = sub.getBytes(); int lenSub = sub.getLength(); boolean match = false; for (int i = startS; (i < endS - lenSub + 1) && (!match); i++) { match = true; for (int j = 0; j < lenSub; j++) { if (byteS[j + i] != byteSub[j]) { match = false; break; } } } return match; }
@Test public void testBasic() throws Exception { Configuration hconf = HadoopUtil.getCurrentConfiguration(); Context context = MockupMapContext.create(hconf, cubeName, outKV); CubeHFileMapper mapper = new CubeHFileMapper(); mapper.doSetup(context); Text key = new Text("not important"); Text value = new Text(new byte[] { 2, 2, 51, -79, 1 }); mapper.map(key, value, context); KeyValue outValue = (KeyValue) outKV[1]; assertTrue(Bytes.compareTo(value.getBytes(), 0, value.getLength(), outValue.getValueArray(), outValue.getValueOffset(), outValue.getValueLength()) == 0); }
@Override public byte[] getBytes(Text writable) { //@TODO There is no reason to decode then encode the string to bytes really //@FIXME this issue with CTRL-CHAR ^0 added by Text at the end of string and Json serd does not like that. try { return Text.decode(writable.getBytes(), 0, writable.getLength()).getBytes(Charset.forName("UTF-8")); } catch (CharacterCodingException e) { throw new RuntimeException(e); } }