ArrayList<StringTuple> specialChars = new ArrayList<>(); specialChars.add(new StringTuple("&", "&")); specialChars.add(new StringTuple("<", "<")); specialChars.add(new StringTuple(">", ">"));
public StringTuple(String firstEntry) { add(firstEntry); }
@Override public int compareTo(StringTuple otherTuple) { int thisLength = length(); int otherLength = otherTuple.length(); int min = Math.min(thisLength, otherLength); for (int i = 0; i < min; i++) { int ret = this.tuple.get(i).compareTo(otherTuple.stringAt(i)); if (ret != 0) { return ret; } } if (thisLength < otherLength) { return -1; } else if (thisLength > otherLength) { return 1; } else { return 0; } }
@Override protected void map(WritableComparable<?> key, VectorWritable value, Context context) throws IOException, InterruptedException { String keyName; Vector valVec = value.get(); if (valVec instanceof NamedVector) { keyName = ((NamedVector) valVec).getName(); } else { keyName = key.toString(); } for (NamedVector seedVector : seedVectors) { double distance = measure.distance(seedVector, valVec); if (!usesThreshold || distance <= maxDistance) { StringTuple outKey = new StringTuple(); outKey.add(seedVector.getName()); outKey.add(keyName); context.write(outKey, new DoubleWritable(distance)); } } }
value.addAll(it.next().getEntries());
Vector vector = new RandomAccessSparseVector(dimension, value.length()); // guess at initial size ShingleFilter sf = new ShingleFilter(new IteratorTokenStream(value.getEntries().iterator()), maxNGramSize); sf.reset(); try { for (String term : value.getEntries()) { if (!term.isEmpty() && dictionary.containsKey(term)) { // unigram int termId = dictionary.get(term);
@Override protected void map(WritableComparable<?> key, VectorWritable value, Context context) throws IOException, InterruptedException { String keyName; Vector valVec = value.get(); if (valVec instanceof NamedVector) { keyName = ((NamedVector) valVec).getName(); } else { keyName = key.toString(); } for (NamedVector seedVector : seedVectors) { double distance = measure.distance(seedVector, valVec); if (!usesThreshold || distance <= maxDistance) { StringTuple outKey = new StringTuple(); outKey.add(seedVector.getName()); outKey.add(keyName); context.write(outKey, new DoubleWritable(distance)); } } }
protected void map(Text key, StringTuple value, final Context context) throws IOException, InterruptedException { ShingleFilter sf = new ShingleFilter(new IteratorTokenStream(value.getEntries().iterator()), maxShingleSize); sf.reset(); try { new OpenObjectIntHashMap<String>(value.getEntries().size() * (maxShingleSize - 1)); OpenObjectIntHashMap<String> unigrams = new OpenObjectIntHashMap<String>(value.getEntries().size());
Vector vector = new RandomAccessSparseVector(dimension, value.length()); // guess at initial size ShingleFilter sf = new ShingleFilter(new IteratorTokenStream(value.getEntries().iterator()), maxNGramSize); sf.reset(); try { for (String term : value.getEntries()) { if (!term.isEmpty() && dictionary.containsKey(term)) { // unigram int termId = dictionary.get(term);
@Override protected void map(Text key, Text value, Context context) throws IOException, InterruptedException { TokenStream stream = analyzer.tokenStream(key.toString(), new StringReader(value.toString())); CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class); stream.reset(); StringTuple document = new StringTuple(); while (stream.incrementToken()) { if (termAtt.length() > 0) { document.add(new String(termAtt.buffer(), 0, termAtt.length())); } } stream.end(); Closeables.close(stream, true); context.write(key, document); }
public StringTuple(Iterable<String> entries) { for (String entry : entries) { add(entry); } }
protected void map(Text key, StringTuple value, final Context context) throws IOException, InterruptedException { ShingleFilter sf = new ShingleFilter(new IteratorTokenStream(value.getEntries().iterator()), maxShingleSize); sf.reset(); try { new OpenObjectIntHashMap<String>(value.getEntries().size() * (maxShingleSize - 1)); OpenObjectIntHashMap<String> unigrams = new OpenObjectIntHashMap<String>(value.getEntries().size());
@Override public int compareTo(StringTuple otherTuple) { int thisLength = length(); int otherLength = otherTuple.length(); int min = Math.min(thisLength, otherLength); for (int i = 0; i < min; i++) { int ret = this.tuple.get(i).compareTo(otherTuple.stringAt(i)); if (ret != 0) { return ret; } } if (thisLength < otherLength) { return -1; } else if (thisLength > otherLength) { return 1; } else { return 0; } }
public static void main(String[] args) { ArrayList<StringTuple> specialChars = new ArrayList<>(); specialChars.add(new StringTuple("&", "&")); specialChars.add(new StringTuple("<", "<")); specialChars.add(new StringTuple(">", ">")); System.out.println(replaceSpecialChars("Hi <Nishanth> How are &you !", specialChars)); }
@Override protected void map(Text key, Text value, Context context) throws IOException, InterruptedException { TokenStream stream = analyzer.tokenStream(key.toString(), new StringReader(value.toString())); CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class); stream.reset(); StringTuple document = new StringTuple(); while (stream.incrementToken()) { if (termAtt.length() > 0) { document.add(new String(termAtt.buffer(), 0, termAtt.length())); } } stream.end(); Closeables.close(stream, true); context.write(key, document); }
public StringTuple(String[] entries) { for (String entry : entries) { add(entry); } }
protected void map(Text key, StringTuple value, final Context context) throws IOException, InterruptedException { try (ShingleFilter sf = new ShingleFilter(new IteratorTokenStream(value.getEntries().iterator()), maxShingleSize)){ sf.reset(); int count = 0; // ngram count new OpenObjectIntHashMap<>(value.getEntries().size() * (maxShingleSize - 1)); OpenObjectIntHashMap<String> unigrams = new OpenObjectIntHashMap<>(value.getEntries().size());
@Override public int compareTo(StringTuple otherTuple) { int thisLength = length(); int otherLength = otherTuple.length(); int min = Math.min(thisLength, otherLength); for (int i = 0; i < min; i++) { int ret = this.tuple.get(i).compareTo(otherTuple.stringAt(i)); if (ret != 0) { return ret; } } if (thisLength < otherLength) { return -1; } else if (thisLength > otherLength) { return 1; } else { return 0; } }
@Override protected void map(Text key, Text value, Context context) throws IOException, InterruptedException { TokenStream stream = analyzer.tokenStream(key.toString(), new StringReader(value.toString())); CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class); stream.reset(); StringTuple document = new StringTuple(); while (stream.incrementToken()) { if (termAtt.length() > 0) { document.add(new String(termAtt.buffer(), 0, termAtt.length())); } } stream.end(); Closeables.close(stream, true); context.write(key, document); }
public StringTuple(Iterable<String> entries) { for (String entry : entries) { add(entry); } }