public Schema createSchema(){ Schema schema = new Schema(getLabel(), getFeatures()); return schema; }
@Override public Schema toMojoModelSchema(Schema schema){ Label label = schema.getLabel(); List<? extends Feature> features = schema.getFeatures(); Function<Feature, Stream<Feature>> function = new Function<Feature, Stream<Feature>>(){ @Override public Stream<Feature> apply(Feature feature){ ModelEncoder encoder = (ModelEncoder)feature.getEncoder(); if(feature instanceof CategoricalFeature){ CategoricalFeature categoricalFeature = (CategoricalFeature)feature; List<String> values = new ArrayList<>(categoricalFeature.getValues()); values.add("missing(NA)"); ImputerUtil.encodeFeature(categoricalFeature, "missing(NA)", MissingValueTreatmentMethod.AS_VALUE); return values.stream() .map(value -> new BinaryFeature(encoder, categoricalFeature.getName(), categoricalFeature.getDataType(), value)); } return Stream.of(feature); } }; features = features.stream() .flatMap(function) .collect(Collectors.toList()); return new Schema(label, features); }
public Schema createSchema(){ return new Schema(getLabel(), getFeatures()); }
@Override public Schema createSchema(){ RGenericVector recipe = getObject(); Label label = getLabel(); List<? extends Feature> features = getFeatures(); RGenericVector steps = (RGenericVector)recipe.getValue("steps"); List<FieldName> outcomeNames = this.termRoles.entrySet().stream() .filter(entry -> (Role.OUTCOME).equals(entry.getValue())) .map(entry -> entry.getKey()) .collect(Collectors.toList()); if(outcomeNames.size() == 1){ FieldName outcomeName = outcomeNames.get(0); renameDataField(label.getName(), outcomeName); label = label.toRenamedLabel(outcomeName); } else if(outcomeNames.size() >= 2){ throw new IllegalArgumentException(); } // End if if(steps != null){ throw new IllegalArgumentException(); } return new Schema(label, features); }
public static MiningModel encodeMiningModel(List<List<RegressionTree>> regTrees, float base_score, Schema schema){ Schema segmentSchema = new Schema(new ContinuousLabel(null, DataType.FLOAT), schema.getFeatures()); List<MiningModel> miningModels = new ArrayList<>(); CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel(); int numClasses = regTrees.size(); for (int l=0;l<numClasses;l++){ MiningModel miningModel = createMiningModel(regTrees.get(l), base_score, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("class_(" + categoricalLabel.getValue(l) + ")"), OpType.CONTINUOUS, DataType.FLOAT)); miningModels.add(miningModel); } return MiningModelUtil.createClassification(miningModels, RegressionModel.NormalizationMethod.SOFTMAX, true, schema); }
private MiningModel encodeMultinomialClassification(List<TreeModel> treeModels, Double initF, Schema schema){ CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel(); Schema segmentSchema = new Schema(new ContinuousLabel(null, DataType.DOUBLE), schema.getFeatures()); List<Model> miningModels = new ArrayList<>(); for(int i = 0, columns = categoricalLabel.size(), rows = (treeModels.size() / columns); i < columns; i++){ MiningModel miningModel = createMiningModel(CMatrixUtil.getColumn(treeModels, rows, columns, i), initF, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("gbmValue(" + categoricalLabel.getValue(i) + ")"), OpType.CONTINUOUS, DataType.DOUBLE)); miningModels.add(miningModel); } return MiningModelUtil.createClassification(miningModels, RegressionModel.NormalizationMethod.SOFTMAX, true, schema); }
@Override public MiningModel encodeMiningModel(List<RegTree> regTrees, float base_score, Integer ntreeLimit, Schema schema){ Schema segmentSchema = new Schema(new ContinuousLabel(null, DataType.FLOAT), schema.getFeatures()); List<MiningModel> miningModels = new ArrayList<>(); CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel(); for(int i = 0, columns = categoricalLabel.size(), rows = (regTrees.size() / columns); i < columns; i++){ MiningModel miningModel = createMiningModel(CMatrixUtil.getColumn(regTrees, rows, columns, i), base_score, ntreeLimit, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("xgbValue(" + categoricalLabel.getValue(i) + ")"), OpType.CONTINUOUS, DataType.FLOAT)); miningModels.add(miningModel); } return MiningModelUtil.createClassification(miningModels, RegressionModel.NormalizationMethod.SOFTMAX, true, schema); } }
@Override public MiningModel encodeMiningModel(List<Tree> trees, Integer numIteration, Schema schema){ Schema segmentSchema = new Schema(new ContinuousLabel(null, DataType.DOUBLE), schema.getFeatures()); List<MiningModel> miningModels = new ArrayList<>(); CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel(); for(int i = 0, rows = categoricalLabel.size(), columns = (trees.size() / rows); i < rows; i++){ MiningModel miningModel = createMiningModel(FortranMatrixUtil.getRow(trees, rows, columns, i), numIteration, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("lgbmValue(" + categoricalLabel.getValue(i) + ")"), OpType.CONTINUOUS, DataType.DOUBLE)); miningModels.add(miningModel); } return MiningModelUtil.createClassification(miningModels, RegressionModel.NormalizationMethod.SOFTMAX, true, schema); } }
public PMML encodePMML(FieldName targetField, List<String> targetCategories, FeatureMap featureMap, Map<String, ?> options){ XGBoostEncoder encoder = new XGBoostEncoder(); if(targetField == null){ targetField = FieldName.create("_target"); } Label label = this.obj.encodeLabel(targetField, targetCategories, encoder); List<Feature> features = featureMap.encodeFeatures(encoder); Schema schema = new Schema(label, features); MiningModel miningModel = encodeMiningModel(options, schema); PMML pmml = encoder.encodePMML(miningModel); return pmml; }
@Override public MiningModel encodeMiningModel(List<RegTree> regTrees, float base_score, Integer ntreeLimit, Schema schema){ Schema segmentSchema = new Schema(new ContinuousLabel(null, DataType.FLOAT), schema.getFeatures()); List<MiningModel> miningModels = new ArrayList<>(); CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel(); for(int i = 0, columns = categoricalLabel.size(), rows = (regTrees.size() / columns); i < columns; i++){ MiningModel miningModel = createMiningModel(CMatrixUtil.getColumn(regTrees, rows, columns, i), base_score, ntreeLimit, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("xgbValue(" + categoricalLabel.getValue(i) + ")"), OpType.CONTINUOUS, DataType.FLOAT)); miningModels.add(miningModel); } return MiningModelUtil.createClassification(miningModels, RegressionModel.NormalizationMethod.SOFTMAX, true, schema); } }
public PMML encodePMML(FieldName targetField, List<String> targetCategories, FeatureMap featureMap, Map<String, ?> options){ XGBoostEncoder encoder = new XGBoostEncoder(); if(targetField == null){ targetField = FieldName.create("_target"); } Label label = this.obj.encodeLabel(targetField, targetCategories, encoder); List<Feature> features = featureMap.encodeFeatures(encoder); Schema schema = new Schema(label, features); MiningModel miningModel = encodeMiningModel(options, schema); PMML pmml = encoder.encodePMML(miningModel); return pmml; }
public static PMML encodePMML(FieldName targetField, List<String> targetCategories, FeatureList featureList, List<RegressionTree> regTrees, float base_score){ LSBoostEncoder encoder = new LSBoostEncoder(); if(targetField == null){ targetField = FieldName.create("_target"); } Label label = encodeLabel(targetField, targetCategories, encoder); //todo List<Feature> features = new ArrayList<>(); for (int i=0;i<featureList.size();i++){ FieldName fieldName = new FieldName("feature_"+i); DataField dataField = encoder.createDataField(fieldName, OpType.CONTINUOUS, DataType.FLOAT); Feature feature = new ContinuousFeature(encoder, dataField); features.add(feature); } Schema schema = new Schema(label, features); MiningModel miningModel = encodeMiningModel(regTrees, base_score, schema); PMML pmml = encoder.encodePMML(miningModel); return pmml; }
public static PMML encodePMML(FieldName targetField, List<String> targetCategories, FeatureList featureList, List<List<RegressionTree>> regTrees, float base_score, int numClasses){ LKBoostEncoder encoder = new LKBoostEncoder(); if(targetField == null){ targetField = FieldName.create("_target"); } Label label = encodeLabel(targetField, targetCategories, encoder, numClasses); //todo List<Feature> features = new ArrayList<>(); for (int i=0;i<featureList.size();i++){ FieldName fieldName = new FieldName("feature_"+i); DataField dataField = encoder.createDataField(fieldName, OpType.CONTINUOUS, DataType.FLOAT); Feature feature = new ContinuousFeature(encoder, dataField); features.add(feature); } Schema schema = new Schema(label, features); MiningModel miningModel = encodeMiningModel(regTrees, base_score, schema); PMML pmml = encoder.encodePMML(miningModel); return pmml; }
private MiningModel encodeBinaryClassification(List<TreeModel> treeModels, Double initF, double coefficient, Schema schema){ Schema segmentSchema = new Schema(new ContinuousLabel(null, DataType.DOUBLE), schema.getFeatures()); MiningModel miningModel = createMiningModel(treeModels, initF, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("gbmValue"), OpType.CONTINUOUS, DataType.DOUBLE)); return MiningModelUtil.createBinaryLogisticClassification(miningModel, -coefficient, 0d, RegressionModel.NormalizationMethod.LOGIT, true, schema); }
@Override public MiningModel encodeMiningModel(List<RegTree> regTrees, float base_score, Integer ntreeLimit, Schema schema){ Schema segmentSchema = new Schema(new ContinuousLabel(null, DataType.FLOAT), schema.getFeatures()); MiningModel miningModel = createMiningModel(regTrees, base_score, ntreeLimit, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("xgbValue"), OpType.CONTINUOUS, DataType.FLOAT)); return MiningModelUtil.createBinaryLogisticClassification(miningModel, 1d, 0d, RegressionModel.NormalizationMethod.LOGIT, true, schema); } }
@Override public MiningModel encodeMiningModel(List<RegTree> regTrees, float base_score, Integer ntreeLimit, Schema schema){ Schema segmentSchema = new Schema(new ContinuousLabel(null, DataType.FLOAT), schema.getFeatures()); MiningModel miningModel = createMiningModel(regTrees, base_score, ntreeLimit, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("xgbValue"), OpType.CONTINUOUS, DataType.FLOAT)); return MiningModelUtil.createBinaryLogisticClassification(miningModel, 1d, 0d, RegressionModel.NormalizationMethod.LOGIT, true, schema); } }
@Override public MiningModel encodeMiningModel(List<Tree> trees, Integer numIteration, Schema schema){ Schema segmentSchema = new Schema(new ContinuousLabel(null, DataType.DOUBLE), schema.getFeatures()); MiningModel miningModel = createMiningModel(trees, numIteration, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("lgbmValue"), OpType.CONTINUOUS, DataType.DOUBLE, new SigmoidTransformation(-1d * BinomialLogisticRegression.this.sigmoid_))); return MiningModelUtil.createBinaryLogisticClassification(miningModel, 1d, 0d, RegressionModel.NormalizationMethod.NONE, true, schema); } }
@Override public Model encodeModel(Schema schema){ int[] shape = getCoefShape(); int numberOfClasses = shape[0]; int numberOfFeatures = shape[1]; boolean hasProbabilityDistribution = hasProbabilityDistribution(); List<? extends Number> coef = getCoef(); List<? extends Number> intercepts = getIntercept(); CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel(); List<? extends Feature> features = schema.getFeatures(); if(numberOfClasses == 1){ ClassifierUtil.checkSize(2, categoricalLabel); return RegressionModelUtil.createBinaryLogisticClassification(features, ValueUtil.asDoubles(CMatrixUtil.getRow(coef, numberOfClasses, numberOfFeatures, 0)), ValueUtil.asDouble(intercepts.get(0)), RegressionModel.NormalizationMethod.LOGIT, hasProbabilityDistribution, schema); } else if(numberOfClasses >= 3){ ClassifierUtil.checkSize(numberOfClasses, categoricalLabel); Schema segmentSchema = new Schema(new ContinuousLabel(null, DataType.DOUBLE), Collections.emptyList()); List<RegressionModel> regressionModels = new ArrayList<>(); for(int i = 0, rows = categoricalLabel.size(); i < rows; i++){ RegressionModel regressionModel = RegressionModelUtil.createRegression(features, ValueUtil.asDoubles(CMatrixUtil.getRow(coef, numberOfClasses, numberOfFeatures, i)), ValueUtil.asDouble(intercepts.get(i)), RegressionModel.NormalizationMethod.LOGIT, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("decisionFunction(" + categoricalLabel.getValue(i) + ")"), OpType.CONTINUOUS, DataType.DOUBLE)); regressionModels.add(regressionModel); } return MiningModelUtil.createClassification(regressionModels, RegressionModel.NormalizationMethod.SIMPLEMAX, hasProbabilityDistribution, schema); } else { throw new IllegalArgumentException(); } }
@Override public MiningModel encodeModel(Schema schema){ RGenericVector gbm = getObject(); RDoubleVector initF = (RDoubleVector)gbm.getValue("initF"); RGenericVector trees = (RGenericVector)gbm.getValue("trees"); RGenericVector c_splits = (RGenericVector)gbm.getValue("c.splits"); RGenericVector distribution = (RGenericVector)gbm.getValue("distribution"); RStringVector distributionName = (RStringVector)distribution.getValue("name"); Schema segmentSchema = new Schema(new ContinuousLabel(null, DataType.DOUBLE), schema.getFeatures()); List<TreeModel> treeModels = new ArrayList<>(); for(int i = 0; i < trees.size(); i++){ RGenericVector tree = (RGenericVector)trees.getValue(i); TreeModel treeModel = encodeTreeModel(MiningFunction.REGRESSION, tree, c_splits, segmentSchema); treeModels.add(treeModel); } MiningModel miningModel = encodeMiningModel(distributionName, treeModels, initF.asScalar(), schema); return miningModel; }
@Override public MiningModel encodeModel(Schema schema){ GBTClassificationModel model = getTransformer(); String lossType = model.getLossType(); switch(lossType){ case "logistic": break; default: throw new IllegalArgumentException("Loss function " + lossType + " is not supported"); } Schema segmentSchema = new Schema(new ContinuousLabel(null, DataType.DOUBLE), schema.getFeatures()); List<TreeModel> treeModels = TreeModelUtil.encodeDecisionTreeEnsemble(this, segmentSchema); MiningModel miningModel = new MiningModel(MiningFunction.REGRESSION, ModelUtil.createMiningSchema(segmentSchema.getLabel())) .setSegmentation(MiningModelUtil.createSegmentation(Segmentation.MultipleModelMethod.WEIGHTED_SUM, treeModels, Doubles.asList(model.treeWeights()))) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("gbtValue"), OpType.CONTINUOUS, DataType.DOUBLE)); return MiningModelUtil.createBinaryLogisticClassification(miningModel, 2d, 0d, RegressionModel.NormalizationMethod.LOGIT, false, schema); } }