/** * Adds a value expressed in byte array form to a vector. * * @param originalForm The original form of the value as a byte array. * @param data The vector to which the value should be added. */ public void addToVector(byte[] originalForm, Vector data) { addToVector(originalForm, 1.0, data); }
/** * Adds a value expressed in string form to a vector. * * @param originalForm The original form of the value as a string. * @param data The vector to which the value should be added. */ public void addToVector(String originalForm, Vector data) { addToVector(originalForm, 1.0, data); }
/** * Adds a value expressed in string form to a vector. * * @param originalForm The original form of the value as a string. * @param data The vector to which the value should be added. */ public void addToVector(String originalForm, Vector data) { addToVector(originalForm, 1.0, data); }
/** * Adds a value expressed in byte array form to a vector. * * @param originalForm The original form of the value as a byte array. * @param data The vector to which the value should be added. */ public void addToVector(byte[] originalForm, Vector data) { addToVector(originalForm, 1.0, data); }
/** * Adds a value expressed in string form to a vector. * * @param originalForm The original form of the value as a string. * @param data The vector to which the value should be added. */ public void addToVector(String originalForm, Vector data) { addToVector(originalForm, 1.0, data); }
/** * Adds a value expressed in byte array form to a vector. * * @param originalForm The original form of the value as a byte array. * @param data The vector to which the value should be added. */ public void addToVector(byte[] originalForm, Vector data) { addToVector(originalForm, 1.0, data); }
public abstract void addToVector(byte[] originalForm, double weight, Vector data);
public abstract void addToVector(byte[] originalForm, double weight, Vector data);
public abstract void addToVector(byte[] originalForm, double weight, Vector data);
@Override protected void map(Text key, Text value, Context context) throws IOException, InterruptedException { Vector vector; if (sequentialVectors) { vector = new SequentialAccessSparseVector(cardinality); } else { vector = new RandomAccessSparseVector(cardinality); } if (namedVectors) { vector = new NamedVector(vector, key.toString()); } encoder.addToVector(value.toString(), vector); context.write(new Text(key.toString()), new VectorWritable(vector)); } }
public static void main(String[] args) throws IOException { FeatureVectorEncoder[] encoder = new FeatureVectorEncoder[FIELDS]; for (int i = 0; i < FIELDS; i++) { encoder[i] = new ConstantValueEncoder("v" + i); } long t0 = System.currentTimeMillis(); Vector v = new DenseVector(1000); BufferedReader in = new BufferedReader(new FileReader(args[1])); String line = in.readLine(); while (line != null) { v.assign(0); Line x = new Line(line); for (int i = 0; i < FIELDS; i++) { encoder[i].addToVector((byte[]) null, x.getDouble(i), v); } line = in.readLine(); } System.out.printf("\nElapsed time = %.3f s\n", (System.currentTimeMillis() - t0) / 1000.0); }
@Override protected void map(Text key, Text value, Context context) throws IOException, InterruptedException { Vector vector; if (sequentialVectors) { vector = new SequentialAccessSparseVector(cardinality); } else { vector = new RandomAccessSparseVector(cardinality); } if (namedVectors) { vector = new NamedVector(vector, key.toString()); } encoder.addToVector(value.toString(), vector); context.write(new Text(key.toString()), new VectorWritable(vector)); } }
@Override protected void map(Text key, Text value, Context context) throws IOException, InterruptedException { Vector vector; if (sequentialVectors) { vector = new SequentialAccessSparseVector(cardinality); } else { vector = new RandomAccessSparseVector(cardinality); } if (namedVectors) { vector = new NamedVector(vector, key.toString()); } encoder.addToVector(value.toString(), vector); context.write(new Text(key.toString()), new VectorWritable(vector)); } }
public static void main(String[] args) throws IOException { FeatureVectorEncoder[] encoder = new FeatureVectorEncoder[FIELDS]; for (int i = 0; i < FIELDS; i++) { encoder[i] = new ConstantValueEncoder("v" + i); } long t0 = System.currentTimeMillis(); Vector v = new DenseVector(1000); ByteBuffer buf = ByteBuffer.wrap(FileUtils .readFileToByteArray(new File(args[1]))); FastLine line = FastLine.read(buf); while (line != null) { v.assign(0); for (int i = 0; i < FIELDS; i++) { encoder[i].addToVector((byte[]) null, line.getDouble(i), v); } line = FastLine.read(buf); } System.out.printf("\nElapsed time = %.3f s\n", (System.currentTimeMillis() - t0) / 1000.0); }
public static void main(String[] args) throws IOException { FeatureVectorEncoder encoder = new StaticWordValueEncoder("text"); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31); StringReader in = new StringReader("text to magically vectorize"); TokenStream ts = analyzer.tokenStream("body", in); TermAttribute termAtt = ts.addAttribute(TermAttribute.class); Vector v1 = new RandomAccessSparseVector(100); while (ts.incrementToken()) { char[] termBuffer = termAtt.termBuffer(); int termLen = termAtt.termLength(); String w = new String(termBuffer, 0, termLen); encoder.addToVector(w, 1, v1); } System.out.printf("%s\n", new SequentialAccessSparseVector(v1)); }
@Override public List<Double> classify(String text) throws TException { Vector features = new RandomAccessSparseVector(FEATURES); enc.addText(text.toLowerCase()); enc.flush(1, features); bias.addToVector((byte[]) null, 1, features); Vector r = model.classifyFull(features); List<Double> rx = Lists.newArrayList(); for (int i = 0; i < r.size(); i++) { rx.add(r.get(i)); } return rx; }
/** * Adds all of the tokens that we counted up to a vector. */ public void flush(double weight, Vector data) { for (String word : counts.elementSet()) { // weight words by log_2(tf) times whatever other weight we are given wordEncoder.addToVector(word, weight * Math.log1p(counts.count(word)) / LOG_2, data); } counts.clear(); }
/** * Adds all of the tokens that we counted up to a vector. */ public void flush(double weight, Vector data) { for (String word : counts.elementSet()) { // weight words by log_2(tf) times whatever other weight we are given wordEncoder.addToVector(word, weight * Math.log1p(counts.count(word)) / LOG_2, data); } counts.clear(); }
/** * Adds all of the tokens that we counted up to a vector. */ public void flush(double weight, Vector data) { for (String word : counts.elementSet()) { // weight words by log_2(tf) times whatever other weight we are given wordEncoder.addToVector(word, weight * Math.log1p(counts.count(word)) / LOG_2, data); } counts.clear(); }
@Test public void testAddToVector() { FeatureVectorEncoder enc = new StaticWordValueEncoder("word"); Vector v = new DenseVector(200); enc.addToVector("word1", v); enc.addToVector("word2", v); Iterator<Vector.Element> i = v.nonZeroes().iterator(); Iterator<Integer> j = ImmutableList.of(7, 118, 119, 199).iterator(); while (i.hasNext()) { Vector.Element element = i.next(); assertEquals(j.next().intValue(), element.index()); assertEquals(1, element.get(), 0); } assertFalse(j.hasNext()); }