How to use org.deeplearning4j.models.word2vec

Best Java code snippets using org.deeplearning4j.models.word2vec (Showing top 11 results out of 315)

public static void main(String[] args){
  W2VDistanceMeasurer vw2v = W2VDistanceMeasurer.getInstance();
  double value = vw2v.vec.similarity("product", "item");
  System.out.println(value);
}

vec = new org.deeplearning4j.models.word2vec.Word2Vec.Builder()
    .batchSize(batchSize) //# words per minibatch.
    .windowSize(windowSize)
    .minWordFrequency(minWordFrequency) // 
    .useAdaGrad(false) //
    .layerSize(layerSize) // word feature vector size
    .seed(42)
    .iterations(iterations) // # iterations to train
    .epochs(epochs)
    .stopWords(stopWords)
    .learningRate(0.025) // 
    .minLearningRate(0.001) // learning rate decays wrt # words. floor learning
    .negativeSample(10) // sample size 10 words
    .iterate(iter) //
    .tokenizerFactory(t)
    .build();
 ((org.deeplearning4j.models.word2vec.Word2Vec) vec).fit();

Word2Vec vec = new Word2Vec.Builder().minWordFrequency(5).iterations(1)
    .layerSize(100).seed(42).windowSize(5).iterate(iter)
    .tokenizerFactory(t).build();
vec.fit();
Collection<String> lst = vec.wordsNearest("french", 10);
System.out.println(lst);

new Word2Vec.Builder()
  .minWordFrequency(this.minWordFrequency)
  .useAdaGrad(this.useAdaGrad)
  .allowParallelTokenization(this.allowParallelTokenization)
  .enableScavenger(this.enableScavenger)
  .negativeSample(this.negativeSamplingValue)
  .sampling(this.subSamplingThres)
  .epochs(this.epochs)
  .learningRate(this.learningRate)
  .minLearningRate(this.minLearningRate)
  .workers(this.workers)
  .iterations(this.iterations)
  .layerSize(this.layerSize)
  .seed(this.seed)
  .windowSize(this.windowSize)
  .iterate(iter)
  .stopWords(this.stopWordsHandler.getStopList())
  .tokenizerFactory(this.tokenizerFactory.getBackend())
  .build();

t.setTokenPreProcessor(new CommonPreprocessor());
InMemoryLookupCache cache = new InMemoryLookupCache();
WeightLookupTable table = new InMemoryLookupTable.Builder()
.vectorLength(100)
Word2Vec vec = new Word2Vec.Builder()
.minWordFrequency(5).iterations(1)
.layerSize(100).lookupTable(table)
.stopWords(new ArrayList<String>())
.vocabCache(cache).seed(42)
.windowSize(5).iterate(iter).tokenizerFactory(t).build();
  vec.fit();
} catch (IOException e) {
Collection<String> lst = vec.wordsNearest("day", 10);
System.out.println(lst);

AbstractCache<VocabWord> cache = new AbstractCache.Builder<VocabWord>().build();
List<INDArray> arrays = new ArrayList<>();
int words = 0;
  String word = reader.readUTF();
  INDArray row = Nd4j.read(reader);
  VocabWord word1 = new VocabWord(1.0, word);
  word1.setIndex(cache.numWords());
  cache.addToken(word1);
  cache.addWordToIndex(word1.getIndex(), word);
  cache.putVocabWord(word);
  arrays.add(row);
  words++;

@Override
public Instances getDataSet() throws IOException {
 if (m_sourceFile == null) {
  throw new IOException("No source has been specified");
 }
 if (getRetrieval() == INCREMENTAL) {
  throw new IOException("This loader cannot load instances incrementally.");
 }
 setRetrieval(BATCH);
 if (m_structure == null) {
  getStructure();
 }
 Instances result = new Instances(m_structure);
 for (String word : vec.getVocab().words()) {
  double[] values = new double[result.numAttributes()];
  for (int i = 0; i < this.vec.getWordVector(word).length; i++)
   values[i] = this.vec.getWordVector(word)[i];
  values[result.numAttributes() - 1] = result.attribute("word_id").addStringValue(word);
  Instance inst = new DenseInstance(1, values);
  inst.setDataset(result);
  result.add(inst);
 }
 return result;
}

  if (vec.hasWord(curTokenText))
    curVec = vec.getWordVectorMatrix(curTokenText);
} else {
  if (vec.hasWord(curTokenText))
    curVec = curVec.add(vec.getWordVectorMatrix(curTokenText));

public void setStructure() {
 ArrayList<Attribute> att = new ArrayList<Attribute>();
 // Add one attribute for each embedding dimension
 for (int i = 0; i < this.vec.getLayerSize(); i++) {
  att.add(new Attribute("embedding-" + i));
 }
 att.add(new Attribute("word_id", (ArrayList<String>) null));
 m_structure = new Instances("W2V model loaded from " + this.m_File.toString(), att, 0);
}

double value = w2v.vec.similarity(lemma1,  lemma2);
results.add(w2vPrefix+new Float(value).toString());

  sim = 1.0;
else
  sim = vec.similarity(str1, str2);