@Override public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){ Number threshold = getThreshold(); List<Feature> result = new ArrayList<>(); for(int i = 0; i < features.size(); i++){ Feature feature = features.get(i); ContinuousFeature continuousFeature = feature.toContinuousFeature(); // "($name <= threshold) ? 0 : 1" Apply apply = PMMLUtil.createApply("threshold", continuousFeature.ref(), PMMLUtil.createConstant(threshold)); DerivedField derivedField = encoder.createDerivedField(FeatureUtil.createName("binarizer", continuousFeature), apply); result.add(new ContinuousFeature(encoder, derivedField)); } return result; }
@Override public Feature apply(Feature feature){ if(feature instanceof BinaryFeature){ BinaryFeature binaryFeature = (BinaryFeature)feature; return binaryFeature; } else { ContinuousFeature continuousFeature = feature.toContinuousFeature(); DataType dataType = continuousFeature.getDataType(); switch(dataType){ case INTEGER: case FLOAT: break; case DOUBLE: continuousFeature = continuousFeature.toContinuousFeature(DataType.FLOAT); break; default: throw new IllegalArgumentException(); } return continuousFeature; } } };
@Override public ContinuousFeature toContinuousFeature(){ PMMLEncoder encoder = ensureEncoder(); return new ContinuousFeature(encoder, this); } };
Discretize discretize = new Discretize(continuousFeature.getName());
@Override public ContinuousFeature toContinuousFeature(){ Supplier<Apply> applySupplier = () -> { Feature feature = getFeature(); Number factor = getFactor(); return PMMLUtil.createApply("*", (feature.toContinuousFeature()).ref(), PMMLUtil.createConstant(factor)); }; return toContinuousFeature(name, DataType.DOUBLE, applySupplier); } };
DataType dataType = continuousFeature.getDataType(); switch(dataType){ case INTEGER:
@Override public ContinuousOutputFeature toContinuousFeature(DataType dataType){ return (ContinuousOutputFeature)super.toContinuousFeature(dataType); }
public void addFeature(Field<?> field){ Feature feature; OpType opType = field.getOpType(); switch(opType){ case CATEGORICAL: feature = new CategoricalFeature(this, (DataField)field); break; case CONTINUOUS: feature = new ContinuousFeature(this, field); break; default: throw new IllegalArgumentException(); } addFeature(feature); }
@Override public List<Feature> encodeFeatures(SparkMLEncoder encoder){ Bucketizer transformer = getTransformer(); Feature feature = encoder.getOnlyFeature(transformer.getInputCol()); ContinuousFeature continuousFeature = feature.toContinuousFeature(); Discretize discretize = new Discretize(continuousFeature.getName()); List<String> categories = new ArrayList<>(); double[] splits = transformer.getSplits(); for(int i = 0; i < (splits.length - 1); i++){ String category = String.valueOf(i); categories.add(category); Interval interval = new Interval((i < (splits.length - 2)) ? Interval.Closure.CLOSED_OPEN : Interval.Closure.CLOSED_CLOSED) .setLeftMargin(formatMargin(splits[i])) .setRightMargin(formatMargin(splits[i + 1])); DiscretizeBin discretizeBin = new DiscretizeBin(category, interval); discretize.addDiscretizeBins(discretizeBin); } DerivedField derivedField = encoder.createDerivedField(formatName(transformer), OpType.CATEGORICAL, DataType.INTEGER, discretize); return Collections.singletonList(new CategoricalFeature(encoder, derivedField, categories)); }
@Override public List<Feature> encodeFeatures(SparkMLEncoder encoder){ Binarizer transformer = getTransformer(); Feature feature = encoder.getOnlyFeature(transformer.getInputCol()); ContinuousFeature continuousFeature = feature.toContinuousFeature(); Apply apply = new Apply("if") .addExpressions(PMMLUtil.createApply("lessOrEqual", continuousFeature.ref(), PMMLUtil.createConstant(transformer.getThreshold()))) .addExpressions(PMMLUtil.createConstant(0d), PMMLUtil.createConstant(1d)); DerivedField derivedField = encoder.createDerivedField(formatName(transformer), OpType.CATEGORICAL, DataType.DOUBLE, apply); return Collections.singletonList(new CategoricalFeature(encoder, derivedField, Arrays.asList("0", "1"))); } }
DataType dataType = continuousFeature.getDataType(); switch(dataType){ case INTEGER:
.toContinuousFeature(DataType.DOUBLE); // Second, cast from numpy.float32 to numpy.float64
@Override public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){ UFunc func = getFunc(); if(func == null){ return features; } List<Feature> result = new ArrayList<>(); for(int i = 0; i < features.size(); i++){ ContinuousFeature continuousFeature = (features.get(i)).toContinuousFeature(); DerivedField derivedField = encoder.ensureDerivedField(FeatureUtil.createName(func.getName(), continuousFeature), OpType.CONTINUOUS, DataType.DOUBLE, () -> encodeUFunc(func, continuousFeature.ref())); result.add(new ContinuousFeature(encoder, derivedField)); } return result; }
@Override public ContinuousFeature toContinuousFeature(){ PMMLEncoder encoder = getEncoder(); DerivedField derivedField = (DerivedField)encoder.toContinuous(getName()); return new ContinuousFeature(encoder, derivedField); } };
@Override public Feature apply(Feature feature){ if(feature instanceof BinaryFeature){ BinaryFeature binaryFeature = (BinaryFeature)feature; return binaryFeature; } else { ContinuousFeature continuousFeature = feature.toContinuousFeature(); DataType dataType = continuousFeature.getDataType(); switch(dataType){ case INTEGER: case FLOAT: break; case DOUBLE: continuousFeature = continuousFeature.toContinuousFeature(DataType.FLOAT); break; default: throw new IllegalArgumentException(); } return continuousFeature; } } };
Field<?> field = encoder.toCategorical(continuousFeature.getName(), categories);
DataType dataType = continuousFeature.getDataType(); switch(dataType){ case INTEGER:
@Override public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){ List<? extends Number> scale = getScale(); ClassDictUtil.checkSize(features, scale); List<Feature> result = new ArrayList<>(); for(int i = 0; i < features.size(); i++){ Feature feature = features.get(i); Number value = scale.get(i); if(ValueUtil.isOne(value)){ result.add(feature); continue; } ContinuousFeature continuousFeature = feature.toContinuousFeature(); // "$name / scale" Apply apply = PMMLUtil.createApply("/", continuousFeature.ref(), PMMLUtil.createConstant(value)); DerivedField derivedField = encoder.createDerivedField(FeatureUtil.createName("max_abs_scaler", continuousFeature), apply); result.add(new ContinuousFeature(encoder, derivedField)); } return result; }
@Override public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){ String expr = getExpr(); Expression expression = ExpressionTranslator.translate(expr, features); DerivedField derivedField = encoder.createDerivedField(FieldName.create("eval(" + expr + ")"), expression); return Collections.singletonList(new ContinuousFeature(encoder, derivedField)); }
FieldName name = continuousFeature.getName();