@Override public Model encodeModel(RDoubleVector a0, RExp beta, int column, Schema schema){ Double intercept = a0.getValue(column); List<Double> coefficients = getCoefficients((S4Object)beta, column); return RegressionModelUtil.createBinaryLogisticClassification(schema.getFeatures(), coefficients, intercept, RegressionModel.NormalizationMethod.LOGIT, true, schema); } }
@Override public RegressionModel encodeModel(Schema schema){ List<? extends Number> coef = getCoef(); List<? extends Number> intercept = getIntercept(); return RegressionModelUtil.createRegression(schema.getFeatures(), ValueUtil.asDoubles(coef), ValueUtil.asDouble(Iterables.getOnlyElement(intercept)), null, schema); }
@Override public RegressionModel encodeModel(Schema schema){ LinearRegressionModel model = getTransformer(); List<Feature> features = new ArrayList<>(schema.getFeatures()); List<Double> coefficients = new ArrayList<>(VectorUtil.toList(model.coefficients())); RegressionTableUtil.simplify(this, null, features, coefficients); return RegressionModelUtil.createRegression(features, coefficients, model.intercept(), null, schema); } }
public static MiningModel encodeMiningModel(List<List<RegressionTree>> regTrees, float base_score, Schema schema){ Schema segmentSchema = new Schema(new ContinuousLabel(null, DataType.FLOAT), schema.getFeatures()); List<MiningModel> miningModels = new ArrayList<>(); CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel(); int numClasses = regTrees.size(); for (int l=0;l<numClasses;l++){ MiningModel miningModel = createMiningModel(regTrees.get(l), base_score, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("class_(" + categoricalLabel.getValue(l) + ")"), OpType.CONTINUOUS, DataType.FLOAT)); miningModels.add(miningModel); } return MiningModelUtil.createClassification(miningModels, RegressionModel.NormalizationMethod.SOFTMAX, true, schema); }
@Override public Model encodeModel(Schema schema){ RGenericVector lm = getObject(); RDoubleVector coefficients = (RDoubleVector)lm.getValue("coefficients"); Double intercept = coefficients.getValue(getInterceptName(), true); List<? extends Feature> features = schema.getFeatures(); if(coefficients.size() != (features.size() + (intercept != null ? 1 : 0))){ throw new IllegalArgumentException(); } List<Double> featureCoefficients = getFeatureCoefficients(features, coefficients); return RegressionModelUtil.createRegression(features, featureCoefficients, intercept, null, schema); }
@Override public GeneralRegressionModel encodeModel(Schema schema){ RGenericVector earth = getObject(); RDoubleVector coefficients = (RDoubleVector)earth.getValue("coefficients"); Double intercept = coefficients.getValue(0); List<? extends Feature> features = schema.getFeatures(); if(coefficients.size() != (features.size() + 1)){ throw new IllegalArgumentException(); } List<Double> featureCoefficients = (coefficients.getValues()).subList(1, features.size() + 1); GeneralRegressionModel generalRegressionModel = new GeneralRegressionModel(GeneralRegressionModel.ModelType.GENERALIZED_LINEAR, MiningFunction.REGRESSION, ModelUtil.createMiningSchema(schema.getLabel()), null, null, null) .setLinkFunction(GeneralRegressionModel.LinkFunction.IDENTITY); GeneralRegressionModelUtil.encodeRegressionTable(generalRegressionModel, features, featureCoefficients, intercept, null); return generalRegressionModel; }
private MiningModel encodeMultinomialClassification(List<TreeModel> treeModels, Double initF, Schema schema){ CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel(); Schema segmentSchema = new Schema(new ContinuousLabel(null, DataType.DOUBLE), schema.getFeatures()); List<Model> miningModels = new ArrayList<>(); for(int i = 0, columns = categoricalLabel.size(), rows = (treeModels.size() / columns); i < columns; i++){ MiningModel miningModel = createMiningModel(CMatrixUtil.getColumn(treeModels, rows, columns, i), initF, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("gbmValue(" + categoricalLabel.getValue(i) + ")"), OpType.CONTINUOUS, DataType.DOUBLE)); miningModels.add(miningModel); } return MiningModelUtil.createClassification(miningModels, RegressionModel.NormalizationMethod.SOFTMAX, true, schema); }
@Override public MiningModel encodeMiningModel(List<RegTree> regTrees, float base_score, Integer ntreeLimit, Schema schema){ Schema segmentSchema = new Schema(new ContinuousLabel(null, DataType.FLOAT), schema.getFeatures()); List<MiningModel> miningModels = new ArrayList<>(); CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel(); for(int i = 0, columns = categoricalLabel.size(), rows = (regTrees.size() / columns); i < columns; i++){ MiningModel miningModel = createMiningModel(CMatrixUtil.getColumn(regTrees, rows, columns, i), base_score, ntreeLimit, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("xgbValue(" + categoricalLabel.getValue(i) + ")"), OpType.CONTINUOUS, DataType.FLOAT)); miningModels.add(miningModel); } return MiningModelUtil.createClassification(miningModels, RegressionModel.NormalizationMethod.SOFTMAX, true, schema); } }
@Override public MiningModel encodeMiningModel(List<Tree> trees, Integer numIteration, Schema schema){ Schema segmentSchema = new Schema(new ContinuousLabel(null, DataType.DOUBLE), schema.getFeatures()); List<MiningModel> miningModels = new ArrayList<>(); CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel(); for(int i = 0, rows = categoricalLabel.size(), columns = (trees.size() / rows); i < rows; i++){ MiningModel miningModel = createMiningModel(FortranMatrixUtil.getRow(trees, rows, columns, i), numIteration, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("lgbmValue(" + categoricalLabel.getValue(i) + ")"), OpType.CONTINUOUS, DataType.DOUBLE)); miningModels.add(miningModel); } return MiningModelUtil.createClassification(miningModels, RegressionModel.NormalizationMethod.SOFTMAX, true, schema); } }
@Override public MiningModel encodeMiningModel(List<RegTree> regTrees, float base_score, Integer ntreeLimit, Schema schema){ Schema segmentSchema = new Schema(new ContinuousLabel(null, DataType.FLOAT), schema.getFeatures()); List<MiningModel> miningModels = new ArrayList<>(); CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel(); for(int i = 0, columns = categoricalLabel.size(), rows = (regTrees.size() / columns); i < columns; i++){ MiningModel miningModel = createMiningModel(CMatrixUtil.getColumn(regTrees, rows, columns, i), base_score, ntreeLimit, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("xgbValue(" + categoricalLabel.getValue(i) + ")"), OpType.CONTINUOUS, DataType.FLOAT)); miningModels.add(miningModel); } return MiningModelUtil.createClassification(miningModels, RegressionModel.NormalizationMethod.SOFTMAX, true, schema); } }
@Override public ClusteringModel encodeModel(Schema schema){ KMeansModel model = getTransformer(); List<Cluster> clusters = new ArrayList<>(); Vector[] clusterCenters = model.clusterCenters(); for(int i = 0; i < clusterCenters.length; i++){ Cluster cluster = new Cluster() .setId(String.valueOf(i)) .setArray(PMMLUtil.createRealArray(VectorUtil.toList(clusterCenters[i]))); clusters.add(cluster); } ComparisonMeasure comparisonMeasure = new ComparisonMeasure(ComparisonMeasure.Kind.DISTANCE) .setCompareFunction(CompareFunction.ABS_DIFF) .setMeasure(new SquaredEuclidean()); return new ClusteringModel(MiningFunction.CLUSTERING, ClusteringModel.ModelClass.CENTER_BASED, clusters.size(), ModelUtil.createMiningSchema(schema.getLabel()), comparisonMeasure, ClusteringModelUtil.createClusteringFields(schema.getFeatures()), clusters); } }
@Override public Model encodeModel(RDoubleVector a0, RExp beta, int column, Schema schema){ Double intercept = a0.getValue(column); List<Double> coefficients = getCoefficients((S4Object)beta, column); GeneralRegressionModel generalRegressionModel = new GeneralRegressionModel(GeneralRegressionModel.ModelType.GENERAL_LINEAR, MiningFunction.REGRESSION, ModelUtil.createMiningSchema(schema.getLabel()), null, null, null) .setDistribution(GeneralRegressionModel.Distribution.POISSON); GeneralRegressionModelUtil.encodeRegressionTable(generalRegressionModel, schema.getFeatures(), coefficients, intercept, null); return generalRegressionModel; } }
@Override public Model encodeModel(RDoubleVector a0, RExp beta, int column, Schema schema){ Double intercept = a0.getValue(column); List<Double> coefficients = getCoefficients((S4Object)beta, column); GeneralRegressionModel generalRegressionModel = new GeneralRegressionModel(GeneralRegressionModel.ModelType.GENERAL_LINEAR, MiningFunction.REGRESSION, ModelUtil.createMiningSchema(schema.getLabel()), null, null, null) .setDistribution(GeneralRegressionModel.Distribution.NORMAL); GeneralRegressionModelUtil.encodeRegressionTable(generalRegressionModel, schema.getFeatures(), coefficients, intercept, null); return generalRegressionModel; } }
private MiningModel encodeBinaryClassification(List<TreeModel> treeModels, Double initF, double coefficient, Schema schema){ Schema segmentSchema = new Schema(new ContinuousLabel(null, DataType.DOUBLE), schema.getFeatures()); MiningModel miningModel = createMiningModel(treeModels, initF, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("gbmValue"), OpType.CONTINUOUS, DataType.DOUBLE)); return MiningModelUtil.createBinaryLogisticClassification(miningModel, -coefficient, 0d, RegressionModel.NormalizationMethod.LOGIT, true, schema); }
@Override public MiningModel encodeMiningModel(List<RegTree> regTrees, float base_score, Integer ntreeLimit, Schema schema){ Schema segmentSchema = new Schema(new ContinuousLabel(null, DataType.FLOAT), schema.getFeatures()); MiningModel miningModel = createMiningModel(regTrees, base_score, ntreeLimit, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("xgbValue"), OpType.CONTINUOUS, DataType.FLOAT)); return MiningModelUtil.createBinaryLogisticClassification(miningModel, 1d, 0d, RegressionModel.NormalizationMethod.LOGIT, true, schema); } }
@Override public MiningModel encodeMiningModel(List<RegTree> regTrees, float base_score, Integer ntreeLimit, Schema schema){ Schema segmentSchema = new Schema(new ContinuousLabel(null, DataType.FLOAT), schema.getFeatures()); MiningModel miningModel = createMiningModel(regTrees, base_score, ntreeLimit, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("xgbValue"), OpType.CONTINUOUS, DataType.FLOAT)); return MiningModelUtil.createBinaryLogisticClassification(miningModel, 1d, 0d, RegressionModel.NormalizationMethod.LOGIT, true, schema); } }
@Override public MiningModel encodeMiningModel(List<Tree> trees, Integer numIteration, Schema schema){ Schema segmentSchema = new Schema(new ContinuousLabel(null, DataType.DOUBLE), schema.getFeatures()); MiningModel miningModel = createMiningModel(trees, numIteration, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("lgbmValue"), OpType.CONTINUOUS, DataType.DOUBLE, new SigmoidTransformation(-1d * BinomialLogisticRegression.this.sigmoid_))); return MiningModelUtil.createBinaryLogisticClassification(miningModel, 1d, 0d, RegressionModel.NormalizationMethod.NONE, true, schema); } }
@Override public MiningModel encodeModel(Schema schema){ RGenericVector gbm = getObject(); RDoubleVector initF = (RDoubleVector)gbm.getValue("initF"); RGenericVector trees = (RGenericVector)gbm.getValue("trees"); RGenericVector c_splits = (RGenericVector)gbm.getValue("c.splits"); RGenericVector distribution = (RGenericVector)gbm.getValue("distribution"); RStringVector distributionName = (RStringVector)distribution.getValue("name"); Schema segmentSchema = new Schema(new ContinuousLabel(null, DataType.DOUBLE), schema.getFeatures()); List<TreeModel> treeModels = new ArrayList<>(); for(int i = 0; i < trees.size(); i++){ RGenericVector tree = (RGenericVector)trees.getValue(i); TreeModel treeModel = encodeTreeModel(MiningFunction.REGRESSION, tree, c_splits, segmentSchema); treeModels.add(treeModel); } MiningModel miningModel = encodeMiningModel(distributionName, treeModels, initF.asScalar(), schema); return miningModel; }
@Override public MiningModel encodeModel(Schema schema){ GBTClassificationModel model = getTransformer(); String lossType = model.getLossType(); switch(lossType){ case "logistic": break; default: throw new IllegalArgumentException("Loss function " + lossType + " is not supported"); } Schema segmentSchema = new Schema(new ContinuousLabel(null, DataType.DOUBLE), schema.getFeatures()); List<TreeModel> treeModels = TreeModelUtil.encodeDecisionTreeEnsemble(this, segmentSchema); MiningModel miningModel = new MiningModel(MiningFunction.REGRESSION, ModelUtil.createMiningSchema(segmentSchema.getLabel())) .setSegmentation(MiningModelUtil.createSegmentation(Segmentation.MultipleModelMethod.WEIGHTED_SUM, treeModels, Doubles.asList(model.treeWeights()))) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("gbtValue"), OpType.CONTINUOUS, DataType.DOUBLE)); return MiningModelUtil.createBinaryLogisticClassification(miningModel, 2d, 0d, RegressionModel.NormalizationMethod.LOGIT, false, schema); } }
@Override public RuleSetModel encodeModel(Schema schema){ String defaultScore = getDefaultScore(); List<Object[]> rules = getRules(); Label label = schema.getLabel(); List<? extends Feature> features = schema.getFeatures(); RuleSelectionMethod ruleSelectionMethod = new RuleSelectionMethod(RuleSelectionMethod.Criterion.FIRST_HIT); RuleSet ruleSet = new RuleSet() .addRuleSelectionMethods(ruleSelectionMethod); if(defaultScore != null){ ruleSet .setDefaultConfidence(1d) .setDefaultScore(defaultScore); } for(Object[] rule : rules){ String predicate = TupleUtil.extractElement(rule, 0, String.class); String score = TupleUtil.extractElement(rule, 1, String.class); SimpleRule simpleRule = new SimpleRule() .setPredicate(PredicateTranslator.translate(predicate, features)) .setScore(score); ruleSet.addRules(simpleRule); } RuleSetModel ruleSetModel = new RuleSetModel(MiningFunction.CLASSIFICATION, ModelUtil.createMiningSchema(label), ruleSet); return ruleSetModel; }