static public <E extends Estimator & HasEstimatorEnsemble<T>, T extends Estimator & HasTree> List<TreeModel> encodeTreeModelSegmentation(E estimator, PredicateManager predicateManager, ScoreDistributionManager scoreDistributionManager, MiningFunction miningFunction, Schema schema){ List<? extends T> estimators = estimator.getEstimators(); Schema segmentSchema = schema.toAnonymousSchema(); Function<T, TreeModel> function = new Function<T, TreeModel>(){ @Override public TreeModel apply(T estimator){ Schema treeModelSchema = toTreeModelSchema(estimator.getDataType(), segmentSchema); return TreeModelUtil.encodeTreeModel(estimator, predicateManager, scoreDistributionManager, miningFunction, treeModelSchema); } }; return estimators.stream() .map(function) .collect(Collectors.toList()); }
@Override public Schema apply(Schema schema){ Label label = schema.getLabel(); if(label instanceof ContinuousLabel){ return schema.toAnonymousSchema(); } else // XXX: Ideally, the categorical target field should also be anonymized if(label instanceof CategoricalLabel){ return schema; } else { throw new IllegalArgumentException(); } } };
static public <C extends ModelConverter<? extends M> & HasTreeOptions, M extends Model<M> & TreeEnsembleModel<T>, T extends Model<T> & DecisionTreeModel> List<TreeModel> encodeDecisionTreeEnsemble(C converter, PredicateManager predicateManager, Schema schema){ M model = converter.getTransformer(); Schema segmentSchema = schema.toAnonymousSchema(); List<TreeModel> treeModels = new ArrayList<>(); T[] trees = model.trees(); for(T tree : trees){ TreeModel treeModel = encodeDecisionTree(converter, tree, predicateManager, segmentSchema); treeModels.add(treeModel); } return treeModels; }
public List<TreeModel> encodeTreeModels(RGenericVector trees){ List<TreeModel> result = new ArrayList<>(); if(trees.size() != this.schemas.size()){ throw new IllegalArgumentException(); } for(int i = 0; i < trees.size(); i++){ RGenericVector tree = (RGenericVector)trees.getValue(i); Schema schema = this.schemas.get(i); RPartConverter converter = this.converters.get(tree); if(converter == null){ throw new IllegalArgumentException(); } Schema segmentSchema = schema.toAnonymousSchema(); TreeModel treeModel = converter.encodeModel(segmentSchema); result.add(treeModel); } return result; } }
static public <E extends Estimator> MiningModel encodeBagging(List<E> estimators, List<List<Integer>> estimatorsFeatures, Segmentation.MultipleModelMethod multipleModelMethod, MiningFunction miningFunction, Schema schema){ Schema segmentSchema = schema.toAnonymousSchema(); List<Model> models = new ArrayList<>(); for(int i = 0; i < estimators.size(); i++){ E estimator = estimators.get(i); List<Integer> estimatorFeatures = estimatorsFeatures.get(i); Schema estimatorSchema = segmentSchema.toSubSchema(Ints.toArray(estimatorFeatures)); Model model = estimator.encodeModel(estimatorSchema); models.add(model); } MiningModel miningModel = new MiningModel(miningFunction, ModelUtil.createMiningSchema(schema.getLabel())) .setSegmentation(MiningModelUtil.createSegmentation(multipleModelMethod, models)); return miningModel; }
ContinuousLabel continuousLabel = (ContinuousLabel)schema.getLabel(); Schema segmentSchema = schema.toAnonymousSchema();
@Override public MiningModel encodeModel(Schema schema){ List<? extends Regressor> estimators = getEstimators(); List<? extends Number> estimatorWeights = getEstimatorWeights(); Schema segmentSchema = schema.toAnonymousSchema(); List<Model> models = new ArrayList<>(); for(Regressor estimator : estimators){ Model model = estimator.encodeModel(segmentSchema); models.add(model); } MiningModel miningModel = new MiningModel(MiningFunction.REGRESSION, ModelUtil.createMiningSchema(schema.getLabel())) .setSegmentation(MiningModelUtil.createSegmentation(MultipleModelMethod.WEIGHTED_MEDIAN, models, estimatorWeights)); return miningModel; }
@Override public MiningModel encodeMiningModel(List<Tree> trees, Integer numIteration, Schema schema){ Schema segmentSchema = schema.toAnonymousSchema(); MiningModel miningModel = super.encodeMiningModel(trees, numIteration, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("lgbmValue"), OpType.CONTINUOUS, DataType.DOUBLE)); return MiningModelUtil.createRegression(miningModel, RegressionModel.NormalizationMethod.EXP, schema); } }
@Override public MiningModel encodeMiningModel(List<RegTree> regTrees, float base_score, Integer ntreeLimit, Schema schema){ Schema segmentSchema = schema.toAnonymousSchema(); MiningModel miningModel = createMiningModel(regTrees, base_score, ntreeLimit, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("xgbValue"), OpType.CONTINUOUS, DataType.FLOAT)); return MiningModelUtil.createRegression(miningModel, RegressionModel.NormalizationMethod.LOGIT, schema); } }
@Override public MiningModel encodeMiningModel(List<RegTree> regTrees, float base_score, Integer ntreeLimit, Schema schema){ Schema segmentSchema = schema.toAnonymousSchema(); MiningModel miningModel = createMiningModel(regTrees, base_score, ntreeLimit, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("xgbValue"), OpType.CONTINUOUS, DataType.FLOAT)); return MiningModelUtil.createRegression(miningModel, RegressionModel.NormalizationMethod.EXP, schema); } }
@Override public MiningModel encodeMiningModel(List<RegTree> regTrees, float base_score, Integer ntreeLimit, Schema schema){ Schema segmentSchema = schema.toAnonymousSchema(); MiningModel miningModel = createMiningModel(regTrees, base_score, ntreeLimit, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("xgbValue"), OpType.CONTINUOUS, DataType.FLOAT)); return MiningModelUtil.createRegression(miningModel, RegressionModel.NormalizationMethod.LOGIT, schema); } }
@Override public MiningModel encodeMiningModel(List<RegTree> regTrees, float base_score, Integer ntreeLimit, Schema schema){ Schema segmentSchema = schema.toAnonymousSchema(); MiningModel miningModel = createMiningModel(regTrees, base_score, ntreeLimit, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("xgbValue"), OpType.CONTINUOUS, DataType.FLOAT)); return MiningModelUtil.createRegression(miningModel, RegressionModel.NormalizationMethod.EXP, schema); } }
ContinuousLabel continuousLabel = (ContinuousLabel)schema.getLabel(); Schema segmentSchema = schema.toAnonymousSchema();
ContinuousLabel continuousLabel = (ContinuousLabel)schema.getLabel(); Schema segmentSchema = schema.toAnonymousSchema();
static protected MiningModel createMiningModel(List<RegressionTree> regTrees, float base_score, Schema schema){ ContinuousLabel continuousLabel = (ContinuousLabel)schema.getLabel(); Schema segmentSchema = schema.toAnonymousSchema(); List<TreeModel> treeModels = new ArrayList<>(); for(RegressionTree regTree : regTrees){ TreeModel treeModel = regTree.encodeTreeModel(segmentSchema); treeModels.add(treeModel); } MiningModel miningModel = new MiningModel(MiningFunction.REGRESSION, ModelUtil.createMiningSchema(continuousLabel)) .setMathContext(MathContext.FLOAT) .setSegmentation(MiningModelUtil.createSegmentation(Segmentation.MultipleModelMethod.SUM, treeModels)) .setTargets(ModelUtil.createRescaleTargets(null, ValueUtil.floatToDouble(base_score), continuousLabel)); return miningModel; }
static protected MiningModel createMiningModel(List<RegressionTree> regTrees, float base_score, Schema schema){ ContinuousLabel continuousLabel = (ContinuousLabel)schema.getLabel(); Schema segmentSchema = schema.toAnonymousSchema(); List<TreeModel> treeModels = new ArrayList<>(); for(RegressionTree regTree : regTrees){ TreeModel treeModel = regTree.encodeTreeModel(segmentSchema); treeModels.add(treeModel); } MiningModel miningModel = new MiningModel(MiningFunction.REGRESSION, ModelUtil.createMiningSchema(continuousLabel)) .setMathContext(MathContext.FLOAT) .setSegmentation(MiningModelUtil.createSegmentation(Segmentation.MultipleModelMethod.SUM, treeModels)) .setTargets(ModelUtil.createRescaleTargets(null, ValueUtil.floatToDouble(base_score), continuousLabel)); return miningModel; }
private List<TreeModel> encodeForest(RGenericVector forest, MiningFunction miningFunction, ScoreEncoder scoreEncoder, Schema schema){ RNumberVector<?> numTrees = (RNumberVector<?>)forest.getValue("num.trees"); RGenericVector childNodeIDs = (RGenericVector)forest.getValue("child.nodeIDs"); RGenericVector splitVarIDs = (RGenericVector)forest.getValue("split.varIDs"); RGenericVector splitValues = (RGenericVector)forest.getValue("split.values"); RGenericVector terminalClassCounts = (RGenericVector)forest.getValue("terminal.class.counts", true); Schema segmentSchema = schema.toAnonymousSchema(); List<TreeModel> treeModels = new ArrayList<>(); for(int i = 0; i < ValueUtil.asInt(numTrees.asScalar()); i++){ TreeModel treeModel = encodeTreeModel(miningFunction, scoreEncoder, (RGenericVector)childNodeIDs.getValue(i), (RNumberVector<?>)splitVarIDs.getValue(i), (RNumberVector<?>)splitValues.getValue(i), (terminalClassCounts != null ? (RGenericVector)terminalClassCounts.getValue(i) : null), segmentSchema); treeModels.add(treeModel); } return treeModels; }
@Override public Model encodeModel(Schema schema){ Regressor regressor = getRegressor(); FunctionTransformer transformer = getTransformer(); UFunc func = transformer.getFunc(); UFunc inverseFunc = transformer.getInverseFunc(); if(inverseFunc == null){ return regressor.encodeModel(schema); } Label label = schema.getLabel(); List<? extends Feature> features = schema.getFeatures(); Transformation transformation = new AbstractTransformation(){ @Override public FieldName getName(FieldName name){ return FieldName.create("inverseFunc(" + name + ")"); } @Override public Expression createExpression(FieldRef fieldRef){ return FunctionTransformer.encodeUFunc(inverseFunc, fieldRef); } }; FieldName name = label.getName(); Schema segmentSchema = schema.toAnonymousSchema(); Model model = regressor.encodeModel(segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("func(" + name + ")"), OpType.CONTINUOUS, DataType.DOUBLE, transformation)); return MiningModelUtil.createRegression(model, NormalizationMethod.NONE, schema); }
int columns = ValueUtil.asInt(ntree.asScalar()); Schema segmentSchema = schema.toAnonymousSchema();
Schema segmentSchema = schema.toAnonymousSchema();