@Override public List<Feature> encodeFeatures(SparkMLEncoder encoder){ IndexToString transformer = getTransformer(); DataField dataField = encoder.createDataField(formatName(transformer), OpType.CATEGORICAL, DataType.STRING, Arrays.asList(transformer.getLabels())); return Collections.singletonList(new CategoricalFeature(encoder, dataField)); } }
@Override public Stream<Feature> apply(Feature feature){ PMMLEncoder encoder = feature.getEncoder(); if(feature instanceof CategoricalFeature){ CategoricalFeature categoricalFeature = (CategoricalFeature)feature; List<String> values = categoricalFeature.getValues(); if(!useAllFactorLevels){ values = values.subList(1, values.size()); } return values.stream() .map(value -> new BinaryFeature(encoder, categoricalFeature.getName(), categoricalFeature.getDataType(), value)); } return Stream.of(feature); } };
CategoricalFeature categoricalFeature = (CategoricalFeature)feature; FieldName name = categoricalFeature.getName(); features = OneHotEncoderConverter.encodeFeature(categoricalFeature.getEncoder(), categoricalFeature, categoricalFeature.getValues());
@Override public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){ List<? extends Number> values = getValues(); ClassDictUtil.checkSize(1, features); Feature feature = features.get(0); List<Feature> result = new ArrayList<>(); if(feature instanceof CategoricalFeature){ CategoricalFeature categoricalFeature = (CategoricalFeature)feature; ClassDictUtil.checkSize(values, categoricalFeature.getValues()); for(int i = 0; i < values.size(); i++){ result.add(new BinaryFeature(encoder, categoricalFeature, categoricalFeature.getValue(i))); } } else if(feature instanceof WildcardFeature){ WildcardFeature wildcardFeature = (WildcardFeature)feature; List<String> categories = new ArrayList<>(); for(int i = 0; i < values.size(); i++){ int value = ValueUtil.asInt(values.get(i)); String category = ValueUtil.formatValue(value); categories.add(category); result.add(new BinaryFeature(encoder, wildcardFeature, category)); } wildcardFeature.toCategoricalFeature(categories); } else { throw new IllegalArgumentException(); } return result; }
@Override public List<Feature> encodeFeatures(SparkMLEncoder encoder){ OneHotEncoder transformer = getTransformer(); CategoricalFeature categoricalFeature = (CategoricalFeature)encoder.getOnlyFeature(transformer.getInputCol()); boolean dropLast = true; Option<Object> dropLastOption = transformer.get(transformer.dropLast()); if(dropLastOption.isDefined()){ dropLast = (Boolean)dropLastOption.get(); } List<String> values = categoricalFeature.getValues(); if(dropLast){ values = values.subList(0, values.size() - 1); } return encodeFeature(encoder, categoricalFeature, values); }
CategoricalFeature categoricalFeature = (CategoricalFeature)feature; DataField dataField = (DataField)categoricalFeature.getField(); encoder.putOnlyFeature(labelCol, new CategoricalFeature(encoder, field, categories));
@Override public Stream<Feature> apply(Feature feature){ ModelEncoder encoder = (ModelEncoder)feature.getEncoder(); if(feature instanceof CategoricalFeature){ CategoricalFeature categoricalFeature = (CategoricalFeature)feature; List<String> values = new ArrayList<>(categoricalFeature.getValues()); values.add("missing(NA)"); ImputerUtil.encodeFeature(categoricalFeature, "missing(NA)", MissingValueTreatmentMethod.AS_VALUE); return values.stream() .map(value -> new BinaryFeature(encoder, categoricalFeature.getName(), categoricalFeature.getDataType(), value)); } return Stream.of(feature); } };
public void addFeature(Field<?> field){ Feature feature; OpType opType = field.getOpType(); switch(opType){ case CATEGORICAL: feature = new CategoricalFeature(this, (DataField)field); break; case CONTINUOUS: feature = new ContinuousFeature(this, field); break; default: throw new IllegalArgumentException(); } addFeature(feature); }
List<String> values = categoricalFeature.getValues();
@Override public List<Feature> encodeFeatures(SparkMLEncoder encoder){ OneHotEncoderModel transformer = getTransformer(); String[] inputCols = transformer.getInputCols(); boolean dropLast = transformer.getDropLast(); List<Feature> result = new ArrayList<>(); for(int i = 0; i < inputCols.length; i++){ CategoricalFeature categoricalFeature = (CategoricalFeature)encoder.getOnlyFeature(inputCols[i]); List<String> values = categoricalFeature.getValues(); if(dropLast){ values = values.subList(0, values.size() - 1); } // XXX List<BinaryFeature> binaryFeatures = (List)OneHotEncoderConverter.encodeFeature(encoder, categoricalFeature, values); result.add(new BinarizedCategoricalFeature(encoder, categoricalFeature.getName(), categoricalFeature.getDataType(), binaryFeatures)); } return result; } }
public void addFeature(DataField dataField){ OpType opType = dataField.getOpType(); switch(opType){ case CONTINUOUS: addFeature(new ContinuousFeature(this, dataField)); break; case CATEGORICAL: addFeature(new CategoricalFeature(this, dataField)); break; default: throw new IllegalArgumentException(); } }
CategoricalFeature categoricalFeature = (CategoricalFeature)feature; List<String> values = categoricalFeature.getValues(); List<Integer> splitValues = (List<Integer>)splitpoint.getValues();
@Override public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){ List<?> classes = getClasses(); ClassDictUtil.checkSize(1, features); Feature feature = features.get(0); List<String> inputCategories = new ArrayList<>(); List<String> outputCategories = new ArrayList<>(); for(int i = 0; i < classes.size(); i++){ inputCategories.add(ValueUtil.formatValue(classes.get(i))); outputCategories.add(ValueUtil.formatValue(i)); } Supplier<MapValues> mapValuesSupplier = () -> { encoder.toCategorical(feature.getName(), inputCategories); return PMMLUtil.createMapValues(feature.getName(), inputCategories, outputCategories); }; DerivedField derivedField = encoder.ensureDerivedField(FeatureUtil.createName("label_encoder", feature), OpType.CATEGORICAL, DataType.INTEGER, mapValuesSupplier); Feature encodedFeature = new CategoricalFeature(encoder, derivedField, outputCategories); Feature result = new CategoricalFeature(encoder, feature, inputCategories){ @Override public ContinuousFeature toContinuousFeature(){ return encodedFeature.toContinuousFeature(); } }; return Collections.singletonList(result); }
List<String> values = categoricalFeature.getValues();