/** * @return Vector for the given word * @throws UnknownWordException If word is not in the model's vocabulary */ private double[] getVector(String word) throws UnknownWordException { final double[] result = getVectorOrNull(word); if(result == null) throw new UnknownWordException(word); return result; }
/** @return {@link NormalizedWord2VecModel} created from a thrift representation */ public static NormalizedWord2VecModel fromThrift(final Word2VecModelThrift thrift) { return fromWord2VecModel(Word2VecModel.fromThrift(thrift)); }
/** * Forwards to {@link #fromBinFile(File, ByteOrder, ProfilingTimer)} with the default * ByteOrder.LITTLE_ENDIAN and no ProfilingTimer */ public static Word2VecModel fromBinFile(File file) throws IOException { return fromBinFile(file, ByteOrder.LITTLE_ENDIAN, ProfilingTimer.NONE); }
public static NormalizedWord2VecModel fromBinFile(final File file) throws IOException { return fromWord2VecModel(Word2VecModel.fromBinFile(file)); }
@Override public SemanticDifference similarity(String s1, String s2) throws UnknownWordException { double[] v1 = getVector(s1); double[] v2 = getVector(s2); final double[] diff = getDifference(v1, v2); return new SemanticDifference() { @Override public List<Match> getMatches(String word, int maxMatches) throws UnknownWordException { double[] target = getDifference(getVector(word), diff); return SearcherImpl.this.getMatches(target, maxMatches); } }; }
/** @return {@link Word2VecTrainerBuilder} for training a model */ public static Word2VecTrainerBuilder trainer() { return new Word2VecTrainerBuilder(); } }
@Override public List<Match> getMatches(String word, int maxMatches) throws UnknownWordException { double[] target = getDifference(getVector(word), diff); return SearcherImpl.this.getMatches(target, maxMatches); } };
@Override public Match apply(String other) { double[] otherVec = getVectorOrNull(other); double d = calculateDistance(otherVec, vec); return new MatchImpl(other, d); } }),
/** @return {@link Searcher} for searching */ public Searcher forSearch() { return new SearcherImpl(this); }
private NormalizedWord2VecModel(Iterable<String> vocab, int layerSize, double[] vectors) { super(vocab, layerSize, vectors); normalize(); }
/** * @return Vector for the given word * @throws UnknownWordException If word is not in the model's vocabulary */ private double[] getVector(String word) throws UnknownWordException { final double[] result = getVectorOrNull(word); if(result == null) throw new UnknownWordException(word); return result; }
/** @return {@link NormalizedWord2VecModel} created from a thrift representation */ public static NormalizedWord2VecModel fromThrift(final Word2VecModelThrift thrift) { return fromWord2VecModel(Word2VecModel.fromThrift(thrift)); }
public static NormalizedWord2VecModel fromBinFile(final File file) throws IOException { return fromWord2VecModel(Word2VecModel.fromBinFile(file)); }
/** * Forwards to {@link #fromBinFile(File, ByteOrder, ProfilingTimer)} with no ProfilingTimer */ public static Word2VecModel fromBinFile(File file, ByteOrder byteOrder) throws IOException { return fromBinFile(file, byteOrder, ProfilingTimer.NONE); }
/** @return {@link Word2VecTrainerBuilder} for training a model */ public static Word2VecTrainerBuilder trainer() { return new Word2VecTrainerBuilder(); } }
@Override public List<Match> getMatches(String word, int maxMatches) throws UnknownWordException { double[] target = getDifference(getVector(word), diff); return SearcherImpl.this.getMatches(target, maxMatches); } };
@Override public Match apply(String other) { double[] otherVec = getVectorOrNull(other); double d = calculateDistance(otherVec, vec); return new MatchImpl(other, d); } }),
/** * @return {@link Word2VecModel} created from the binary representation output * by the open source C version of word2vec using the given byte order. */ public static Word2VecModel fromBinFile(File file, ByteOrder byteOrder, ProfilingTimer timer) throws IOException { return fromBinFile(file, byteOrder, timer, MAX_DOUBLE_BUFFER); }
/** * Forwards to {@link #fromBinFile(File, ByteOrder, ProfilingTimer)} with the default * ByteOrder.LITTLE_ENDIAN and no ProfilingTimer */ public static Word2VecModel fromBinFile(File file) throws IOException { return fromBinFile(file, ByteOrder.LITTLE_ENDIAN, ProfilingTimer.NONE); }
/** * Forwards to {@link #fromBinFile(File, ByteOrder, ProfilingTimer)} with no ProfilingTimer */ public static Word2VecModel fromBinFile(File file, ByteOrder byteOrder) throws IOException { return fromBinFile(file, byteOrder, ProfilingTimer.NONE); }