Vector getVector(Map<String, Object> features) { Vector v = new RandomAccessSparseVector(vectorSize); if (hasIntercept) interceptAdder.addToVector("1", v); for (Map.Entry<String, Object> feature : features.entrySet()) { String key = feature.getKey(); Object value = feature.getValue(); switch (types.get(key)) { case _class: featureAdder.addToVector(key + ":" + (String) value, 1, v); break; case _float: featureAdder.addToVector(key, (double) value, v); break; } } return v; } }
@Test public void testStaticWeights() { StaticWordValueEncoder enc = new StaticWordValueEncoder("word"); enc.setDictionary(ImmutableMap.<String, Double>of("word1", 3.0, "word2", 1.5)); Vector v = new DenseVector(200); enc.addToVector("word1", v); enc.addToVector("word2", v); enc.addToVector("word3", v); Iterator<Vector.Element> i = v.nonZeroes().iterator(); Iterator<Integer> j = ImmutableList.of(7, 101, 118, 119, 152, 199).iterator(); Iterator<Double> k = ImmutableList.of(3.0, 0.75, 1.5, 1.5, 0.75, 3.0).iterator(); while (i.hasNext()) { Vector.Element element = i.next(); assertEquals(j.next().intValue(), element.index()); } i = v.nonZeroes().iterator(); while (i.hasNext()) { Vector.Element element = i.next(); assertEquals(String.format("checking v[%d]", element.index()), k.next(), element.get(), 0); } assertFalse(j.hasNext()); }
@Test public void testAddToVector() { TextValueEncoder enc = new TextValueEncoder("text"); Vector v1 = new DenseVector(200); enc.addToVector("test1 and more", v1); enc.flush(1, v1); // should set 6 distinct locations to 1 assertEquals(6.0, v1.norm(1), 0); assertEquals(1.0, v1.maxValue(), 0); // now some fancy weighting StaticWordValueEncoder w = new StaticWordValueEncoder("text"); w.setDictionary(ImmutableMap.<String, Double>of("word1", 3.0, "word2", 1.5)); enc.setWordEncoder(w); // should set 6 locations to something Vector v2 = new DenseVector(200); enc.addToVector("test1 and more", v2); enc.flush(1, v2); // this should set the same 6 locations to the same values Vector v3 = new DenseVector(200); w.addToVector("test1", v3); w.addToVector("and", v3); w.addToVector("more", v3); assertEquals(0, v3.minus(v2).norm(1), 0); // moreover, the locations set in the unweighted case should be the same as in the weighted case assertEquals(v3.zSum(), v3.dot(v1), 0); }