How to use
CountVectorizerModel
in
org.apache.spark.ml.feature

Best Java code snippets using org.apache.spark.ml.feature.CountVectorizerModel (Showing top 10 results out of 315)

@Override
public CountVectorizerModelInfo getModelInfo(final CountVectorizerModel from, final DataFrame df) {
  final CountVectorizerModelInfo modelInfo = new CountVectorizerModelInfo();
  modelInfo.setMinTF(from.getMinTF());
  modelInfo.setVocabulary(from.vocabulary());
  Set<String> inputKeys = new LinkedHashSet<String>();
  inputKeys.add(from.getInputCol());
  modelInfo.setInputKeys(inputKeys);
  Set<String> outputKeys = new LinkedHashSet<String>();
  outputKeys.add(from.getOutputCol());
  modelInfo.setOutputKeys(outputKeys);
  return modelInfo;
}

CountVectorizerModel cvm = new CountVectorizerModel(model.getVocabulary()).setInputCol("text")
    .setOutputCol("feature");
Dataset<Row> eventDF = cvm.transform(df);

CountVectorizerModel transformer = getTransformer();
DocumentFeature documentFeature = (DocumentFeature)encoder.getOnlyFeature(transformer.getInputCol());
  .setTokenize(Boolean.TRUE)
  .setWordSeparatorCharacterRE(documentFeature.getWordSeparatorRE())
  .setLocalTermWeights(transformer.getBinary() ? TextIndex.LocalTermWeights.BINARY : null)
  .setExpression(new FieldRef(termField.getName()));
String[] vocabulary = transformer.vocabulary();
for(int i = 0; i < vocabulary.length; i++){
  String term = vocabulary[i];

/**
 * Creates a transition-based parser using a MLP transition classifier.
 * @param jsc
 * @param classifierFileName
 * @param featureFrame
 */
public TransitionBasedParserMLP(JavaSparkContext jsc, String classifierFileName, FeatureFrame featureFrame) {
  this.featureFrame = featureFrame;
  this.classifier = TransitionClassifier.load(jsc, new Path(classifierFileName, "data").toString());
  this.pipelineModel = PipelineModel.load(new Path(classifierFileName, "pipelineModel").toString());
  this.transitionName = ((StringIndexerModel)pipelineModel.stages()[2]).labels();
  String[] features = ((CountVectorizerModel)(pipelineModel.stages()[1])).vocabulary();
  this.featureMap = new HashMap<String, Integer>();
  for (int j = 0; j < features.length; j++) {
    this.featureMap.put(features[j], j);
  }
  
}

  .setP(1.0);
Dataset<Row> eventDF = cvm.transform(df).select("value", "feature");			
return new AgePredictModel(languageCode, model, cvm.vocabulary(), wrapper);

CountVectorizerModel cvm = new CountVectorizerModel(model.getVocabulary()).setInputCol("text")
    .setOutputCol("feature");
Dataset<Row> eventDF = cvm.transform(df);

/**
 * Creates a conditional Markov model.
 * @param pipelineModel
 * @param weights
 * @param markovOrder
 */
public CMMModel(PipelineModel pipelineModel, Vector weights, MarkovOrder markovOrder, Map<String, Set<Integer>> tagDictionary) {
  this.pipelineModel = pipelineModel;
  this.contextExtractor = new ContextExtractor(markovOrder, Constants.REGEXP_FILE);
  this.weights = weights;
  this.tags = ((StringIndexerModel)(pipelineModel.stages()[2])).labels();
  String[] features = ((CountVectorizerModel)(pipelineModel.stages()[1])).vocabulary();
  featureMap = new HashMap<String, Integer>();
  for (int j = 0; j < features.length; j++) {
    featureMap.put(features[j], j);
  }
  this.tagDictionary = tagDictionary;
}

@Override
public CountVectorizerModelInfo getModelInfo(final CountVectorizerModel from) {
  final CountVectorizerModelInfo modelInfo = new CountVectorizerModelInfo();
  modelInfo.setMinTF(from.getMinTF());
  modelInfo.setVocabulary(from.vocabulary());
  Set<String> inputKeys = new LinkedHashSet<String>();
  inputKeys.add(from.getInputCol());
  modelInfo.setInputKeys(inputKeys);
  Set<String> outputKeys = new LinkedHashSet<String>();
  outputKeys.add(from.getOutputCol());
  modelInfo.setOutputKeys(outputKeys);
  return modelInfo;
}

CountVectorizerModel cvm = new CountVectorizerModel(model.getVocabulary())
  .setInputCol("context")
  .setOutputCol("feature");
    .setP(1.0);
  Dataset<Row> eventDF= cvm.transform(df).select("value", "feature");

int vocabSize = cvm.vocabulary().length;
numFeatures = Math.min(numFeatures, vocabSize);

Most used methods

Popular in Java

Start an intent from android
getSharedPreferences (Context)
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
setRequestProperty (URLConnection)
BufferedInputStream (java.io)
A BufferedInputStream adds functionality to another input stream-namely, the ability to buffer the i
InetAddress (java.net)
An Internet Protocol (IP) address. This can be either an IPv4 address or an IPv6 address, and in pra
Base64 (org.apache.commons.codec.binary)
Provides Base64 encoding and decoding as defined by RFC 2045.This class implements section 6.8. Base
VirtualMachine (com.sun.tools.attach)
A Java virtual machine. A VirtualMachine represents a Java virtual machine to which this Java vir
Project (org.apache.tools.ant)
Central representation of an Ant project. This class defines an Ant project with all of its targets,
Join (org.hibernate.mapping)
Top Sublime Text plugins

How to useCountVectorizerModel in org.apache.spark.ml.feature

Best Java code snippets using org.apache.spark.ml.feature.CountVectorizerModel (Showing top 10 results out of 315)

How to use
CountVectorizerModel
in
org.apache.spark.ml.feature