@Override public Feature apply(Feature feature){ if(feature instanceof BinaryFeature){ BinaryFeature binaryFeature = (BinaryFeature)feature; return binaryFeature; } else { ContinuousFeature continuousFeature = feature.toContinuousFeature(dataType); return continuousFeature; } } };
static public Feature encodeFeature(Feature feature, Object replacementValue, MissingValueTreatmentMethod missingValueTreatmentMethod){ ModelEncoder encoder = (ModelEncoder)feature.getEncoder(); Field<?> field = feature.getField(); if(field instanceof DataField){ MissingValueDecorator missingValueDecorator = new MissingValueDecorator() .setMissingValueReplacement(ValueUtil.formatValue(replacementValue)) .setMissingValueTreatment(missingValueTreatmentMethod); encoder.addDecorator(feature.getName(), missingValueDecorator); return feature; } else { throw new IllegalArgumentException(); } } }
ModelEncoder encoder = (ModelEncoder)feature.getEncoder(); encoder.addDecorator(feature.getName(), importanceDecorator); return feature.toContinuousFeature();
static private Predicate encodePredicate(Feature feature, Node node, boolean left){ FieldName name = feature.getName(); SimplePredicate.Operator operator; String value; ContinuousFeature continuousFeature = feature.toContinuousFeature();
static public Feature encodeFeature(Feature feature, Object missingValue, Object replacementValue, MissingValueTreatmentMethod missingValueTreatmentMethod, SkLearnEncoder encoder){ Field<?> field = feature.getField(); encoder.addDecorator(feature.getName(), missingValueDecorator); Expression expression = feature.ref(); expression = PMMLUtil.createApply("equal", expression, PMMLUtil.createConstant(missingValue, feature.getDataType())); } else expression = PMMLUtil.createApply("if", expression, PMMLUtil.createConstant(replacementValue, feature.getDataType()), feature.ref());
Apply apply = PMMLUtil.createApply("if", PMMLUtil.createApply("equal", feature.ref(), PMMLUtil.createConstant(value, feature.getDataType())), PMMLUtil.createConstant(posLabel), PMMLUtil.createConstant(negLabel)); encoder.toCategorical(feature.getName(), categories);
apply.addExpressions(feature.ref()); ContinuousFeature continuousFeature = feature.toContinuousFeature();
@Override public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){ String function = getFunction(); Boolean trimBlanks = getTrimBlanks(); if(function == null && !trimBlanks){ return features; } List<Feature> result = new ArrayList<>(); for(Feature feature : features){ Expression expression = feature.ref(); if(function != null){ expression = PMMLUtil.createApply(function, expression); } // End if if(trimBlanks){ expression = PMMLUtil.createApply("trimBlanks", expression); } Field<?> field = encoder.toCategorical(feature.getName(), Collections.emptyList()); // XXX: Should have been set by the previous transformer field.setDataType(DataType.STRING); DerivedField derivedField = encoder.createDerivedField(FeatureUtil.createName("normalize", feature), OpType.CATEGORICAL, DataType.STRING, expression); feature = new StringFeature(encoder, derivedField); result.add(feature); } return result; }
public Apply encodeApply(String function, Feature feature, int index, String term){ Constant constant = PMMLUtil.createConstant(term, DataType.STRING); return PMMLUtil.createApply(function, feature.ref(), constant); }
@Override public List<Feature> encodeFeatures(SparkMLEncoder encoder){ RegexTokenizer transformer = getTransformer(); if(!transformer.getGaps()){ throw new IllegalArgumentException("Expected splitter mode, got token matching mode"); } // End if if(transformer.getMinTokenLength() != 1){ throw new IllegalArgumentException("Expected 1 as minimum token length, got " + transformer.getMinTokenLength() + " as minimum token length"); } Feature feature = encoder.getOnlyFeature(transformer.getInputCol()); Field<?> field = feature.getField(); if(transformer.getToLowercase()){ Apply apply = PMMLUtil.createApply("lowercase", feature.ref()); field = encoder.createDerivedField(FeatureUtil.createName("lowercase", feature), OpType.CATEGORICAL, DataType.STRING, apply); } return Collections.singletonList(new DocumentFeature(encoder, field, transformer.getPattern())); } }
@Override public Stream<Feature> apply(Feature feature){ PMMLEncoder encoder = feature.getEncoder(); if(feature instanceof CategoricalFeature){ CategoricalFeature categoricalFeature = (CategoricalFeature)feature; List<String> values = categoricalFeature.getValues(); if(!useAllFactorLevels){ values = values.subList(1, values.size()); } return values.stream() .map(value -> new BinaryFeature(encoder, categoricalFeature.getName(), categoricalFeature.getDataType(), value)); } return Stream.of(feature); } };
FieldName name = feature.getName(); ContinuousFeature continuousFeature = feature.toContinuousFeature();
DataType dataType = feature.getDataType(); switch(dataType){ case INTEGER: Field<?> field = encoder.toCategorical(feature.getName(), categories); case "keep": Apply setApply = PMMLUtil.createApply("isIn", feature.ref()); Apply apply = PMMLUtil.createApply("if", setApply, feature.ref(), PMMLUtil.createConstant(invalidCategory, dataType));
static private Apply createHingeFunction(int dir, Feature feature, double cut){ Expression expression; switch(dir){ case -1: expression = PMMLUtil.createApply("-", PMMLUtil.createConstant(cut), feature.ref()); break; case 1: expression = PMMLUtil.createApply("-", feature.ref(), PMMLUtil.createConstant(cut)); break; default: throw new IllegalArgumentException(); } return PMMLUtil.createApply("max", expression, PMMLUtil.createConstant(0d)); } }
@Override public Stream<Feature> apply(Feature feature){ ModelEncoder encoder = (ModelEncoder)feature.getEncoder(); if(feature instanceof CategoricalFeature){ CategoricalFeature categoricalFeature = (CategoricalFeature)feature; List<String> values = new ArrayList<>(categoricalFeature.getValues()); values.add("missing(NA)"); ImputerUtil.encodeFeature(categoricalFeature, "missing(NA)", MissingValueTreatmentMethod.AS_VALUE); return values.stream() .map(value -> new BinaryFeature(encoder, categoricalFeature.getName(), categoricalFeature.getDataType(), value)); } return Stream.of(feature); } };
@Override public ContinuousFeature toContinuousFeature(){ return encodedFeature.toContinuousFeature(); } };
public void putFeatures(String column, List<Feature> features){ List<Feature> existingFeatures = this.columnFeatures.get(column); if(existingFeatures != null && existingFeatures.size() > 0){ if(features.size() != existingFeatures.size()){ throw new IllegalArgumentException("Expected " + existingFeatures.size() + " features, got " + features.size() + " features"); } for(int i = 0; i < existingFeatures.size(); i++){ Feature existingFeature = existingFeatures.get(i); Feature feature = features.get(i); if(!(feature.getName()).equals(existingFeature.getName())){ throw new IllegalArgumentException(); } } } this.columnFeatures.put(column, features); }
@Override public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){ String function = getFunction(); if(features.size() <= 1){ return features; } Apply apply = new Apply(translateFunction(function)); for(Feature feature : features){ apply.addExpressions(feature.ref()); } FieldName name = FeatureUtil.createName(function, features); DerivedField derivedField = encoder.createDerivedField(name, OpType.CONTINUOUS, DataType.DOUBLE, apply); return Collections.singletonList(new ContinuousFeature(encoder, derivedField)); }
@Override public Feature[] apply(Feature feature){ Feature[] features = new Feature[degree]; if(feature instanceof BinaryFeature){ BinaryFeature binaryFeature = (BinaryFeature)feature; Arrays.fill(features, binaryFeature); } else { features[0] = feature; ContinuousFeature continuousFeature = feature.toContinuousFeature(); for(int i = 2; i <= degree; i++){ features[i - 1] = new PowerFeature(encoder, continuousFeature, i); } } return features; } };