@Override public Stream<Feature> apply(Feature feature){ PMMLEncoder encoder = feature.getEncoder(); if(feature instanceof CategoricalFeature){ CategoricalFeature categoricalFeature = (CategoricalFeature)feature; List<String> values = categoricalFeature.getValues(); if(!useAllFactorLevels){ values = values.subList(1, values.size()); } return values.stream() .map(value -> new BinaryFeature(encoder, categoricalFeature.getName(), categoricalFeature.getDataType(), value)); } return Stream.of(feature); } };
@Override public Stream<Feature> apply(Feature feature){ ModelEncoder encoder = (ModelEncoder)feature.getEncoder(); if(feature instanceof CategoricalFeature){ CategoricalFeature categoricalFeature = (CategoricalFeature)feature; List<String> values = new ArrayList<>(categoricalFeature.getValues()); values.add("missing(NA)"); ImputerUtil.encodeFeature(categoricalFeature, "missing(NA)", MissingValueTreatmentMethod.AS_VALUE); return values.stream() .map(value -> new BinaryFeature(encoder, categoricalFeature.getName(), categoricalFeature.getDataType(), value)); } return Stream.of(feature); } };
@Override public List<Feature> encodeFeatures(SparkMLEncoder encoder){ OneHotEncoder transformer = getTransformer(); CategoricalFeature categoricalFeature = (CategoricalFeature)encoder.getOnlyFeature(transformer.getInputCol()); boolean dropLast = true; Option<Object> dropLastOption = transformer.get(transformer.dropLast()); if(dropLastOption.isDefined()){ dropLast = (Boolean)dropLastOption.get(); } List<String> values = categoricalFeature.getValues(); if(dropLast){ values = values.subList(0, values.size() - 1); } return encodeFeature(encoder, categoricalFeature, values); }
features = OneHotEncoderConverter.encodeFeature(categoricalFeature.getEncoder(), categoricalFeature, categoricalFeature.getValues());
@Override public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){ List<? extends Number> values = getValues(); ClassDictUtil.checkSize(1, features); Feature feature = features.get(0); List<Feature> result = new ArrayList<>(); if(feature instanceof CategoricalFeature){ CategoricalFeature categoricalFeature = (CategoricalFeature)feature; ClassDictUtil.checkSize(values, categoricalFeature.getValues()); for(int i = 0; i < values.size(); i++){ result.add(new BinaryFeature(encoder, categoricalFeature, categoricalFeature.getValue(i))); } } else if(feature instanceof WildcardFeature){ WildcardFeature wildcardFeature = (WildcardFeature)feature; List<String> categories = new ArrayList<>(); for(int i = 0; i < values.size(); i++){ int value = ValueUtil.asInt(values.get(i)); String category = ValueUtil.formatValue(value); categories.add(category); result.add(new BinaryFeature(encoder, wildcardFeature, category)); } wildcardFeature.toCategoricalFeature(categories); } else { throw new IllegalArgumentException(); } return result; }
@Override public List<Feature> encodeFeatures(SparkMLEncoder encoder){ OneHotEncoderModel transformer = getTransformer(); String[] inputCols = transformer.getInputCols(); boolean dropLast = transformer.getDropLast(); List<Feature> result = new ArrayList<>(); for(int i = 0; i < inputCols.length; i++){ CategoricalFeature categoricalFeature = (CategoricalFeature)encoder.getOnlyFeature(inputCols[i]); List<String> values = categoricalFeature.getValues(); if(dropLast){ values = values.subList(0, values.size() - 1); } // XXX List<BinaryFeature> binaryFeatures = (List)OneHotEncoderConverter.encodeFeature(encoder, categoricalFeature, values); result.add(new BinarizedCategoricalFeature(encoder, categoricalFeature.getName(), categoricalFeature.getDataType(), binaryFeatures)); } return result; } }
List<String> values = categoricalFeature.getValues();
List<String> values = categoricalFeature.getValues(); if(values.size() != (leftCategories.length + rightCategories.length)){ throw new IllegalArgumentException();
List<String> values = categoricalFeature.getValues();
List<String> values = categoricalFeature.getValues();
List<String> values = categoricalFeature.getValues();
CategoricalFeature categoricalFeature = (CategoricalFeature)feature; List<String> values = categoricalFeature.getValues(); List<Integer> splitValues = (List<Integer>)splitpoint.getValues();
List<String> values = categoricalFeature.getValues();
List<String> values = categoricalFeature.getValues();
List<String> values = categoricalFeature.getValues();