@Override public PipelineModelInfo getModelInfo(final PipelineModel from) { final PipelineModelInfo modelInfo = new PipelineModelInfo(); final ModelInfo stages[] = new ModelInfo[from.stages().length]; for (int i = 0; i < from.stages().length; i++) { Transformer sparkModel = from.stages()[i]; stages[i] = ModelInfoAdapterFactory.getAdapter(sparkModel.getClass()).adapt(sparkModel); } modelInfo.setStages(stages); return modelInfo; }
/** * Creates a transition-based parser using a MLP transition classifier. * @param jsc * @param classifierFileName * @param featureFrame */ public TransitionBasedParserMLP(JavaSparkContext jsc, String classifierFileName, FeatureFrame featureFrame) { this.featureFrame = featureFrame; this.classifier = TransitionClassifier.load(jsc, new Path(classifierFileName, "data").toString()); this.pipelineModel = PipelineModel.load(new Path(classifierFileName, "pipelineModel").toString()); this.transitionName = ((StringIndexerModel)pipelineModel.stages()[2]).labels(); String[] features = ((CountVectorizerModel)(pipelineModel.stages()[1])).vocabulary(); this.featureMap = new HashMap<String, Integer>(); for (int j = 0; j < features.length; j++) { this.featureMap.put(features[j], j); } }
public PMMLBuilder verify(Dataset<Row> dataset, double precision, double zeroThreshold){ PipelineModel pipelineModel = getPipelineModel(); Dataset<Row> transformedDataset = pipelineModel.transform(dataset); Verification verification = new Verification(dataset, transformedDataset) .setPrecision(precision) .setZeroThreshold(zeroThreshold); return setVerification(verification); }
/** * Loads a pipeline model from an external file. * @param pipelineModelFileName * @return a pipeline model. */ public PipelineModel load(String pipelineModelFileName) { model = PipelineModel.load(pipelineModelFileName); return model; }
@Override public void saveImpl(String path) { // save metadata and params DefaultParamsWriter.saveMetadata(instance, path, sc(), DefaultParamsWriter.saveMetadata$default$4(), DefaultParamsWriter.saveMetadata$default$5()); // save model data: markovOrder, numLabels, weights Data data = new Data(); data.setMarkovOrder(contextExtractor.getMarkovOrder().ordinal()+1); data.setWeights(weights); data.setTagDictionary(tagDictionary); List<Data> list = new LinkedList<Data>(); list.add(data); String dataPath = new Path(path, "data").toString(); sqlContext().createDataFrame(list, Data.class).write().parquet(dataPath); // save pipeline model try { String pipelinePath = new Path(path, "pipelineModel").toString(); pipelineModel.write().overwrite().save(pipelinePath); } catch (IOException e) { e.printStackTrace(); } } }
public Transformer build(){ Evaluator evaluator = getEvaluator(); PMMLTransformer pmmlTransformer = new PMMLTransformer(evaluator, this.columnProducers); if(this.exploded){ ColumnExploder columnExploder = new ColumnExploder(pmmlTransformer.getOutputCol()); ColumnPruner columnPruner = new ColumnPruner(new Set.Set1<>(pmmlTransformer.getOutputCol())); PipelineModel pipelineModel = new PipelineModel(null, new Transformer[]{pmmlTransformer, columnExploder, columnPruner}); return pipelineModel; } return pmmlTransformer; }
model.write().overwrite().save(pipelineModelFileName); } catch (IOException e) { e.printStackTrace(); DataFrame predictions = model.transform(df); predictions.show(); MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator().setMetricName("precision"); System.out.println("training accuracy = " + accuracy); LogisticRegressionModel lrModel = (LogisticRegressionModel) model.stages()[2]; LogisticRegressionTrainingSummary trainingSummary = lrModel.summary(); double[] objectiveHistory = trainingSummary.objectiveHistory();
DataFrame df1 = model.transform(df0); prediction = jsc.broadcast(df1.select("prediction").collect()); if (df1.count() > 0) {
PipelineModel pipelineModel = PipelineModel.load(pipelineDir.getAbsolutePath());
@Test public void pipeline() { StandardScaler scaler = new StandardScaler() .setInputCol("features") .setOutputCol("scaledFeatures"); LogisticRegression lr = new LogisticRegression() .setFeaturesCol("scaledFeatures"); Pipeline pipeline = new Pipeline() .setStages(new PipelineStage[]{scaler, lr}); PipelineModel model = pipeline.fit(dataset); model.transform(dataset).createOrReplaceTempView("prediction"); Dataset<Row> predictions = spark.sql("SELECT label, probability, prediction FROM prediction"); predictions.collectAsList(); } }
@Override public PipelineModelInfo getModelInfo(final PipelineModel from, final DataFrame df) { final PipelineModelInfo modelInfo = new PipelineModelInfo(); final ModelInfo stages[] = new ModelInfo[from.stages().length]; for (int i = 0; i < from.stages().length; i++) { Transformer sparkModel = from.stages()[i]; stages[i] = ModelInfoAdapterFactory.getAdapter(sparkModel.getClass()).adapt(sparkModel, df); } modelInfo.setStages(stages); return modelInfo; }
org.apache.spark.ml.util.DefaultParamsReader.Metadata metadata = DefaultParamsReader.loadMetadata(path, sc(), CMMModel.class.getName()); String pipelinePath = new Path(path, "pipelineModel").toString(); PipelineModel pipelineModel = PipelineModel.load(pipelinePath); String dataPath = new Path(path, "data").toString(); DataFrame df = sqlContext().read().format("parquet").load(dataPath);
@Test public void pipeline() { StandardScaler scaler = new StandardScaler() .setInputCol("features") .setOutputCol("scaledFeatures"); LogisticRegression lr = new LogisticRegression() .setFeaturesCol("scaledFeatures"); Pipeline pipeline = new Pipeline() .setStages(new PipelineStage[]{scaler, lr}); PipelineModel model = pipeline.fit(dataset); model.transform(dataset).createOrReplaceTempView("prediction"); Dataset<Row> predictions = spark.sql("SELECT label, probability, prediction FROM prediction"); predictions.collectAsList(); } }
@Override public List<Transformer> apply(Transformer transformer){ if(transformer instanceof PipelineModel){ PipelineModel pipelineModel = (PipelineModel)transformer; return Arrays.asList(pipelineModel.stages()); } else if(transformer instanceof CrossValidatorModel){ CrossValidatorModel crossValidatorModel = (CrossValidatorModel)transformer; return Collections.singletonList(crossValidatorModel.bestModel()); } else if(transformer instanceof TrainValidationSplitModel){ TrainValidationSplitModel trainValidationSplitModel = (TrainValidationSplitModel)transformer; return Collections.singletonList(trainValidationSplitModel.bestModel()); } return null; } };
@Test public void pipeline() { StandardScaler scaler = new StandardScaler() .setInputCol("features") .setOutputCol("scaledFeatures"); LogisticRegression lr = new LogisticRegression() .setFeaturesCol("scaledFeatures"); Pipeline pipeline = new Pipeline() .setStages(new PipelineStage[]{scaler, lr}); PipelineModel model = pipeline.fit(dataset); model.transform(dataset).createOrReplaceTempView("prediction"); Dataset<Row> predictions = spark.sql("SELECT label, probability, prediction FROM prediction"); predictions.collectAsList(); } }
/** * Creates a conditional Markov model. * @param pipelineModel * @param weights * @param markovOrder */ public CMMModel(PipelineModel pipelineModel, Vector weights, MarkovOrder markovOrder, Map<String, Set<Integer>> tagDictionary) { this.pipelineModel = pipelineModel; this.contextExtractor = new ContextExtractor(markovOrder, Constants.REGEXP_FILE); this.weights = weights; this.tags = ((StringIndexerModel)(pipelineModel.stages()[2])).labels(); String[] features = ((CountVectorizerModel)(pipelineModel.stages()[1])).vocabulary(); featureMap = new HashMap<String, Integer>(); for (int j = 0; j < features.length; j++) { featureMap.put(features[j], j); } this.tagDictionary = tagDictionary; }
public void printModel() { LogisticRegressionModel lrModel = (LogisticRegressionModel) model.stages()[2]; System.out.println("intercept = " + lrModel.intercept()); System.out.println("number of features = " + lrModel.numFeatures()); System.out.println("regularization parameter = " + lrModel.getRegParam()); System.out.println(lrModel.explainParams()); }
Transformer[] stages = pipelineModel.stages(); for(Transformer stage : stages){ TransformerConverter<?> converter = converterFactory.newConverter(stage);