org.apache.spark.ml.PipelineModel java code examples

@Override
public PipelineModelInfo getModelInfo(final PipelineModel from) {
  final PipelineModelInfo modelInfo = new PipelineModelInfo();
  final ModelInfo stages[] = new ModelInfo[from.stages().length];
  for (int i = 0; i < from.stages().length; i++) {
    Transformer sparkModel = from.stages()[i];
    stages[i] = ModelInfoAdapterFactory.getAdapter(sparkModel.getClass()).adapt(sparkModel);
  }
  modelInfo.setStages(stages);
  return modelInfo;
}

/**
 * Creates a transition-based parser using a MLP transition classifier.
 * @param jsc
 * @param classifierFileName
 * @param featureFrame
 */
public TransitionBasedParserMLP(JavaSparkContext jsc, String classifierFileName, FeatureFrame featureFrame) {
  this.featureFrame = featureFrame;
  this.classifier = TransitionClassifier.load(jsc, new Path(classifierFileName, "data").toString());
  this.pipelineModel = PipelineModel.load(new Path(classifierFileName, "pipelineModel").toString());
  this.transitionName = ((StringIndexerModel)pipelineModel.stages()[2]).labels();
  String[] features = ((CountVectorizerModel)(pipelineModel.stages()[1])).vocabulary();
  this.featureMap = new HashMap<String, Integer>();
  for (int j = 0; j < features.length; j++) {
    this.featureMap.put(features[j], j);
  }
  
}

DataFrame trainingData = pipelineModel.transform(dataset);
numLabels++;
int vocabSize = ((CountVectorizerModel)(pipelineModel.stages()[1])).getVocabSize();
  pipelineModel.write().overwrite().save(pipelinePath);
} catch (IOException e) {
  e.printStackTrace();

public PMMLBuilder verify(Dataset<Row> dataset, double precision, double zeroThreshold){
  PipelineModel pipelineModel = getPipelineModel();
  Dataset<Row> transformedDataset = pipelineModel.transform(dataset);
  Verification verification = new Verification(dataset, transformedDataset)
    .setPrecision(precision)
    .setZeroThreshold(zeroThreshold);
  return setVerification(verification);
}

DataFrame df = pipelineModel.transform(dataFrame);
CountVectorizerModel cvm = (CountVectorizerModel)pipelineModel.stages()[1];
int vocabSize = cvm.vocabulary().length;
numFeatures = Math.min(numFeatures, vocabSize);

/**
 * Loads a pipeline model from an external file.
 * @param pipelineModelFileName
 * @return a pipeline model.
 */
public PipelineModel load(String pipelineModelFileName) {
  model = PipelineModel.load(pipelineModelFileName);
  return model;
}

  @Override
  public void saveImpl(String path) {
    // save metadata and params
    DefaultParamsWriter.saveMetadata(instance, path, sc(), 
        DefaultParamsWriter.saveMetadata$default$4(),
        DefaultParamsWriter.saveMetadata$default$5());
    // save model data: markovOrder, numLabels, weights
    Data data = new Data();
    data.setMarkovOrder(contextExtractor.getMarkovOrder().ordinal()+1);
    data.setWeights(weights);
    data.setTagDictionary(tagDictionary);
    List<Data> list = new LinkedList<Data>();
    list.add(data);
    String dataPath = new Path(path, "data").toString();
    sqlContext().createDataFrame(list, Data.class).write().parquet(dataPath);
    // save pipeline model
    try {
      String pipelinePath = new Path(path, "pipelineModel").toString(); 
      pipelineModel.write().overwrite().save(pipelinePath);
    } catch (IOException e) {
      e.printStackTrace();
    }
  }
}

public Transformer build(){
  Evaluator evaluator = getEvaluator();
  PMMLTransformer pmmlTransformer = new PMMLTransformer(evaluator, this.columnProducers);
  if(this.exploded){
    ColumnExploder columnExploder = new ColumnExploder(pmmlTransformer.getOutputCol());
    ColumnPruner columnPruner = new ColumnPruner(new Set.Set1<>(pmmlTransformer.getOutputCol()));
    PipelineModel pipelineModel = new PipelineModel(null, new Transformer[]{pmmlTransformer, columnExploder, columnPruner});
    return pipelineModel;
  }
  return pmmlTransformer;
}

  model.write().overwrite().save(pipelineModelFileName);
} catch (IOException e) {
  e.printStackTrace();
DataFrame predictions = model.transform(df);
predictions.show();
MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator().setMetricName("precision");
System.out.println("training accuracy = " + accuracy);
LogisticRegressionModel lrModel = (LogisticRegressionModel) model.stages()[2];
LogisticRegressionTrainingSummary trainingSummary = lrModel.summary();
double[] objectiveHistory = trainingSummary.objectiveHistory();

DataFrame df1 = model.transform(df0);
prediction = jsc.broadcast(df1.select("prediction").collect());
if (df1.count() > 0) {

PipelineModel pipelineModel = PipelineModel.load(pipelineDir.getAbsolutePath());

 @Test
 public void pipeline() {
  StandardScaler scaler = new StandardScaler()
   .setInputCol("features")
   .setOutputCol("scaledFeatures");
  LogisticRegression lr = new LogisticRegression()
   .setFeaturesCol("scaledFeatures");
  Pipeline pipeline = new Pipeline()
   .setStages(new PipelineStage[]{scaler, lr});
  PipelineModel model = pipeline.fit(dataset);
  model.transform(dataset).createOrReplaceTempView("prediction");
  Dataset<Row> predictions = spark.sql("SELECT label, probability, prediction FROM prediction");
  predictions.collectAsList();
 }
}

@Override
public PipelineModelInfo getModelInfo(final PipelineModel from, final DataFrame df) {
  final PipelineModelInfo modelInfo = new PipelineModelInfo();
  final ModelInfo stages[] = new ModelInfo[from.stages().length];
  for (int i = 0; i < from.stages().length; i++) {
    Transformer sparkModel = from.stages()[i];
    stages[i] = ModelInfoAdapterFactory.getAdapter(sparkModel.getClass()).adapt(sparkModel, df);
  }
  modelInfo.setStages(stages);
  return modelInfo;
}

org.apache.spark.ml.util.DefaultParamsReader.Metadata metadata = DefaultParamsReader.loadMetadata(path, sc(), CMMModel.class.getName());
String pipelinePath = new Path(path, "pipelineModel").toString();
PipelineModel pipelineModel = PipelineModel.load(pipelinePath);
String dataPath = new Path(path, "data").toString();
DataFrame df = sqlContext().read().format("parquet").load(dataPath);

 @Test
 public void pipeline() {
  StandardScaler scaler = new StandardScaler()
   .setInputCol("features")
   .setOutputCol("scaledFeatures");
  LogisticRegression lr = new LogisticRegression()
   .setFeaturesCol("scaledFeatures");
  Pipeline pipeline = new Pipeline()
   .setStages(new PipelineStage[]{scaler, lr});
  PipelineModel model = pipeline.fit(dataset);
  model.transform(dataset).createOrReplaceTempView("prediction");
  Dataset<Row> predictions = spark.sql("SELECT label, probability, prediction FROM prediction");
  predictions.collectAsList();
 }
}

  @Override
  public List<Transformer> apply(Transformer transformer){
    if(transformer instanceof PipelineModel){
      PipelineModel pipelineModel = (PipelineModel)transformer;
      return Arrays.asList(pipelineModel.stages());
    } else
    if(transformer instanceof CrossValidatorModel){
      CrossValidatorModel crossValidatorModel = (CrossValidatorModel)transformer;
      return Collections.singletonList(crossValidatorModel.bestModel());
    } else
    if(transformer instanceof TrainValidationSplitModel){
      TrainValidationSplitModel trainValidationSplitModel = (TrainValidationSplitModel)transformer;
      return Collections.singletonList(trainValidationSplitModel.bestModel());
    }
    return null;
  }
};

 @Test
 public void pipeline() {
  StandardScaler scaler = new StandardScaler()
   .setInputCol("features")
   .setOutputCol("scaledFeatures");
  LogisticRegression lr = new LogisticRegression()
   .setFeaturesCol("scaledFeatures");
  Pipeline pipeline = new Pipeline()
   .setStages(new PipelineStage[]{scaler, lr});
  PipelineModel model = pipeline.fit(dataset);
  model.transform(dataset).createOrReplaceTempView("prediction");
  Dataset<Row> predictions = spark.sql("SELECT label, probability, prediction FROM prediction");
  predictions.collectAsList();
 }
}

/**
 * Creates a conditional Markov model.
 * @param pipelineModel
 * @param weights
 * @param markovOrder
 */
public CMMModel(PipelineModel pipelineModel, Vector weights, MarkovOrder markovOrder, Map<String, Set<Integer>> tagDictionary) {
  this.pipelineModel = pipelineModel;
  this.contextExtractor = new ContextExtractor(markovOrder, Constants.REGEXP_FILE);
  this.weights = weights;
  this.tags = ((StringIndexerModel)(pipelineModel.stages()[2])).labels();
  String[] features = ((CountVectorizerModel)(pipelineModel.stages()[1])).vocabulary();
  featureMap = new HashMap<String, Integer>();
  for (int j = 0; j < features.length; j++) {
    featureMap.put(features[j], j);
  }
  this.tagDictionary = tagDictionary;
}

public void printModel() {
  LogisticRegressionModel lrModel = (LogisticRegressionModel) model.stages()[2];
  System.out.println("intercept = " + lrModel.intercept());
  System.out.println("number of features = " + lrModel.numFeatures());
  System.out.println("regularization parameter = " + lrModel.getRegParam());
  System.out.println(lrModel.explainParams());
}

Transformer[] stages = pipelineModel.stages();
for(Transformer stage : stages){
  TransformerConverter<?> converter = converterFactory.newConverter(stage);

Most used methods

Popular in Java

Making http post requests using okhttp
notifyDataSetChanged (ArrayAdapter)
runOnUiThread (Activity)
getContentResolver (Context)
Selector (java.nio.channels)
A controller for the selection of SelectableChannel objects. Selectable channels can be registered w
MessageFormat (java.text)
Produces concatenated messages in language-neutral way. New code should probably use java.util.Forma
Graphics2D (java.awt)
This Graphics2D class extends the Graphics class to provide more sophisticated control overgraphics
BufferedImage (java.awt.image)
The BufferedImage subclass describes an java.awt.Image with an accessible buffer of image data. All
JOptionPane (javax.swing)
JTable (javax.swing)
From CI to AI: The AI layer in your organization

How to usePipelineModel in org.apache.spark.ml

Best Java code snippets using org.apache.spark.ml.PipelineModel (Showing top 20 results out of 315)

How to use
PipelineModel
in
org.apache.spark.ml