/** * Adds a value to a vector. * * @param original1 The original form of the first value as a string. * @param original2 The original form of the second value as a string. * @param weight How much to weight this interaction * @param data The vector to which the value should be added. */ public void addInteractionToVector(String original1, String original2, double weight, Vector data) { byte[] originalForm1 = bytesForString(original1); byte[] originalForm2 = bytesForString(original2); addInteractionToVector(originalForm1, originalForm2, weight, data); }
private int probes() { return getProbes(); }
/** * Converts a value into a form that would help a human understand the internals of how the value * is being interpreted. For text-like things, this is likely to be a list of the terms found with * associated weights (if any). * * @param originalForm The original form of the value as a string. * @return A string that a human can read. */ @Override public String asString(String originalForm) { return String.format(Locale.ENGLISH, "%s:%s", getName(), originalForm); }
/** * Adds a value to a vector. * * @param originalForm1 The original form of the first value as a byte array. * @param originalForm2 The original form of the second value as a byte array. * @param weight How much to weight this interaction * @param data The vector to which the value should be added. */ public void addInteractionToVector(byte[] originalForm1, byte[] originalForm2, double weight, Vector data) { String name = getName(); double w = getWeight(originalForm1, originalForm2, weight); for (int i = 0; i < probes(); i++) { Iterable<Integer> jValues = secondEncoder.hashesForProbe(originalForm2, data.size(), name, i % secondEncoder.getProbes()); for (Integer k : firstEncoder.hashesForProbe(originalForm1, data.size(), name, i % firstEncoder.getProbes())) { for (Integer j : jValues) { int n = (k + j) % data.size(); if (isTraceEnabled()) { trace(String.format("%s:%s", new String(originalForm1, Charsets.UTF_8), new String(originalForm2, Charsets.UTF_8)), n); } data.set(n, data.get(n) + w); } } } }
@Test public void testAddToVectorWithTextValueEncoder() { WordValueEncoder wv = new StaticWordValueEncoder("word"); TextValueEncoder tv = new TextValueEncoder("text"); InteractionValueEncoder enc = new InteractionValueEncoder("interactions", wv, tv); Vector v1 = new DenseVector(200); enc.addInteractionToVector("a","some text here",1.0, v1); int k = enc.getProbes(); // should interact "a" with each of "some","text" and "here" assertEquals((float) k*3, v1.norm(1), 0); }
@Test public void testTraceDictionary() { StaticWordValueEncoder encoder1 = new StaticWordValueEncoder("first"); StaticWordValueEncoder encoder2 = new StaticWordValueEncoder("second"); Map<String, Set<Integer>> traceDictionary = Maps.newHashMap(); InteractionValueEncoder interactions = new InteractionValueEncoder("interactions", encoder1, encoder2); interactions.setProbes(1); interactions.setTraceDictionary(traceDictionary); Vector v = new DenseVector(10); interactions.addInteractionToVector("a", "b", 1, v); assertEquals(1, v.getNumNonZeroElements()); assertEquals(1, traceDictionary.size()); assertEquals("interactions=a:b", getFirst(traceDictionary.keySet(), null)); }
@Override protected int hashForProbe(byte[] originalForm, int dataSize, String name, int probe) { return hash(name, probe, dataSize); } }
/** * Adds a value to a vector. * * @param originalForm1 The original form of the first value as a byte array. * @param originalForm2 The original form of the second value as a byte array. * @param weight How much to weight this interaction * @param data The vector to which the value should be added. */ public void addInteractionToVector(byte[] originalForm1, byte[] originalForm2, double weight, Vector data) { String name = getName(); double w = getWeight(originalForm1, originalForm2, weight); for (int i = 0; i < probes(); i++) { Iterable<Integer> jValues = secondEncoder.hashesForProbe(originalForm2, data.size(), name, i % secondEncoder.getProbes()); for (Integer k : firstEncoder.hashesForProbe(originalForm1, data.size(), name, i % firstEncoder.getProbes())) { for (Integer j : jValues) { int n = (k + j) % data.size(); if (isTraceEnabled()) { trace(String.format("%s:%s", new String(originalForm1, Charsets.UTF_8), new String(originalForm2, Charsets.UTF_8)), n); } data.set(n, data.get(n) + w); } } } }
@Test public void testAddToVectorUsesProductOfWeights() { WordValueEncoder wv = new StaticWordValueEncoder("word"); ContinuousValueEncoder cv = new ContinuousValueEncoder("cont"); InteractionValueEncoder enc = new InteractionValueEncoder("interactions", wv, cv); Vector v1 = new DenseVector(200); enc.addInteractionToVector("a","0.9",0.5, v1); int k = enc.getProbes(); // should set k distinct locations to 0.9*0.5 assertEquals((float) k*0.5*0.9, v1.norm(1), 0); assertEquals(0.5*0.9, v1.maxValue(), 0); }
@Override protected int hashForProbe(byte[] originalForm, int dataSize, String name, int probe) { return hash(name, probe, dataSize); } }
/** * Adds a value to a vector. * * @param originalForm1 The original form of the first value as a byte array. * @param originalForm2 The original form of the second value as a byte array. * @param weight How much to weight this interaction * @param data The vector to which the value should be added. */ public void addInteractionToVector(byte[] originalForm1, byte[] originalForm2, double weight, Vector data) { String name = getName(); double w = getWeight(originalForm1, originalForm2, weight); for (int i = 0; i < probes(); i++) { Iterable<Integer> jValues = secondEncoder.hashesForProbe(originalForm2, data.size(), name, i % secondEncoder.getProbes()); for (Integer k : firstEncoder.hashesForProbe(originalForm1, data.size(), name, i % firstEncoder.getProbes())) { for (Integer j : jValues) { int n = (k + j) % data.size(); if (isTraceEnabled()) { trace(String.format("%s:%s", new String(originalForm1, Charsets.UTF_8), new String(originalForm2, Charsets.UTF_8)), n); } data.set(n, data.get(n) + w); } } } }
@Test public void testAddToVector() { WordValueEncoder wv = new StaticWordValueEncoder("word"); ContinuousValueEncoder cv = new ContinuousValueEncoder("cont"); InteractionValueEncoder enc = new InteractionValueEncoder("interactions", wv, cv); Vector v1 = new DenseVector(200); enc.addInteractionToVector("a","1.0",1.0, v1); int k = enc.getProbes(); // should set k distinct locations to 1 assertEquals((float) k, v1.norm(1), 0); assertEquals(1.0, v1.maxValue(), 0); // adding same interaction again should increment weights enc.addInteractionToVector("a","1.0",1.0,v1); assertEquals((float) k*2, v1.norm(1), 0); assertEquals(2.0, v1.maxValue(), 0); Vector v2 = new DenseVector(20000); enc.addInteractionToVector("a","1.0",1.0,v2); wv.addToVector("a", v2); cv.addToVector("1.0", v2); k = enc.getProbes(); //this assumes no hash collision assertEquals((float) (k + wv.getProbes()+cv.getProbes()), v2.norm(1), 1.0e-3); }
/** * Adds a value to a vector. * * @param original1 The original form of the first value as a string. * @param original2 The original form of the second value as a string. * @param weight How much to weight this interaction * @param data The vector to which the value should be added. */ public void addInteractionToVector(String original1, String original2, double weight, Vector data) { byte[] originalForm1 = bytesForString(original1); byte[] originalForm2 = bytesForString(original2); addInteractionToVector(originalForm1, originalForm2, weight, data); }
private int probes() { return getProbes(); }
@Override protected int hashForProbe(byte[] originalForm, int dataSize, String name, int probe) { return hash(name, probe, dataSize); } }
/** * Converts a value into a form that would help a human understand the internals of how the value * is being interpreted. For text-like things, this is likely to be a list of the terms found with * associated weights (if any). * * @param originalForm The original form of the value as a string. * @return A string that a human can read. */ @Override public String asString(String originalForm) { return String.format(Locale.ENGLISH, "%s:%s", getName(), originalForm); }
/** * Adds a value to a vector. * * @param original1 The original form of the first value as a string. * @param original2 The original form of the second value as a string. * @param weight How much to weight this interaction * @param data The vector to which the value should be added. */ public void addInteractionToVector(String original1, String original2, double weight, Vector data) { byte[] originalForm1 = bytesForString(original1); byte[] originalForm2 = bytesForString(original2); addInteractionToVector(originalForm1, originalForm2, weight, data); }
private int probes() { return getProbes(); }
/** * Converts a value into a form that would help a human understand the internals of how the value * is being interpreted. For text-like things, this is likely to be a list of the terms found with * associated weights (if any). * * @param originalForm The original form of the value as a string. * @return A string that a human can read. */ @Override public String asString(String originalForm) { return String.format(Locale.ENGLISH, "%s:%s", getName(), originalForm); }