- Common ways to obtain ContinuousFeature
private void myMethod () {}
Discretize discretize = new Discretize(continuousFeature.getName());
@Override public List<Feature> encodeFeatures(SparkMLEncoder encoder){ Bucketizer transformer = getTransformer(); Feature feature = encoder.getOnlyFeature(transformer.getInputCol()); ContinuousFeature continuousFeature = feature.toContinuousFeature(); Discretize discretize = new Discretize(continuousFeature.getName()); List<String> categories = new ArrayList<>(); double[] splits = transformer.getSplits(); for(int i = 0; i < (splits.length - 1); i++){ String category = String.valueOf(i); categories.add(category); Interval interval = new Interval((i < (splits.length - 2)) ? Interval.Closure.CLOSED_OPEN : Interval.Closure.CLOSED_CLOSED) .setLeftMargin(formatMargin(splits[i])) .setRightMargin(formatMargin(splits[i + 1])); DiscretizeBin discretizeBin = new DiscretizeBin(category, interval); discretize.addDiscretizeBins(discretizeBin); } DerivedField derivedField = encoder.createDerivedField(formatName(transformer), OpType.CATEGORICAL, DataType.INTEGER, discretize); return Collections.singletonList(new CategoricalFeature(encoder, derivedField, categories)); }
Field<?> field = encoder.toCategorical(continuousFeature.getName(), categories);
FieldName name = continuousFeature.getName();
@Override public NaiveBayesModel encodeModel(Schema schema){ int[] shape = getThetaShape(); int numberOfClasses = shape[0]; int numberOfFeatures = shape[1]; List<? extends Number> theta = getTheta(); List<? extends Number> sigma = getSigma(); CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel(); BayesInputs bayesInputs = new BayesInputs(); for(int i = 0; i < numberOfFeatures; i++){ Feature feature = schema.getFeature(i); List<? extends Number> means = CMatrixUtil.getColumn(theta, numberOfClasses, numberOfFeatures, i); List<? extends Number> variances = CMatrixUtil.getColumn(sigma, numberOfClasses, numberOfFeatures, i); ContinuousFeature continuousFeature = feature.toContinuousFeature(); BayesInput bayesInput = new BayesInput(continuousFeature.getName()) .setTargetValueStats(encodeTargetValueStats(categoricalLabel.getValues(), means, variances)); bayesInputs.addBayesInputs(bayesInput); } List<Integer> classCount = getClassCount(); BayesOutput bayesOutput = new BayesOutput(categoricalLabel.getName(), null) .setTargetValueCounts(encodeTargetValueCounts(categoricalLabel.getValues(), classCount)); NaiveBayesModel naiveBayesModel = new NaiveBayesModel(0d, MiningFunction.CLASSIFICATION, ModelUtil.createMiningSchema(categoricalLabel), bayesInputs, bayesOutput) .setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, categoricalLabel)); return naiveBayesModel; }