private List<Feature> initFeatures(PyClassDict object, OpType opType, DataType dataType, SkLearnEncoder encoder){ List<String> activeFields = getActiveFields(); if(activeFields == null){ int numberOfFeatures = -1; if(object instanceof HasNumberOfFeatures){ HasNumberOfFeatures hasNumberOfFeatures = (HasNumberOfFeatures)object; numberOfFeatures = hasNumberOfFeatures.getNumberOfFeatures(); } // End if if(numberOfFeatures < 0){ throw new IllegalArgumentException("The first transformer or estimator object (" + ClassDictUtil.formatClass(object) + ") does not specify the number of input features"); } activeFields = new ArrayList<>(numberOfFeatures); for(int i = 0, max = numberOfFeatures; i < max; i++){ activeFields.add("x" + String.valueOf(i + 1)); } logger.warn("Attribute \'" + ClassDictUtil.formatMember(this, "active_fields") + "\' is not set. Assuming {} as the names of active fields", activeFields); } List<Feature> result = new ArrayList<>(); for(String activeField : activeFields){ DataField dataField = encoder.createDataField(FieldName.create(activeField), opType, dataType); result.add(new WildcardFeature(encoder, dataField)); } return result; }
wildcardFeature.toCategoricalFeature(Arrays.asList("0", "1")); BinaryFeature binaryFeature = new BinaryFeature(wildcardFeature.getEncoder(), wildcardFeature, "1");
public void updateFeatures(List<Feature> features, Transformer transformer){ OpType opType; DataType dataType; try { opType = transformer.getOpType(); dataType = transformer.getDataType(); } catch(UnsupportedOperationException uoe){ return; } for(Feature feature : features){ if(feature instanceof WildcardFeature){ WildcardFeature wildcardFeature = (WildcardFeature)feature; updateType(wildcardFeature.getName(), opType, dataType); } } }
@Override public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){ Boolean withData = getWithData(); Boolean withStatistics = getWithStatistics(); ClassDictUtil.checkSize(1, features); Feature feature = features.get(0); WildcardFeature wildcardFeature = (WildcardFeature)feature; if(withData){ List<?> data = getData(); Function<Object, String> function = new Function<Object, String>(){ @Override public String apply(Object object){ return ValueUtil.formatValue(object); } }; List<String> categories = Lists.transform(data, function); feature = wildcardFeature.toCategoricalFeature(categories); } // End if if(withStatistics){ Map<String, ?> counts = extractMap(getCounts(), 0); Object[] discrStats = getDiscrStats(); UnivariateStats univariateStats = new UnivariateStats() .setField(wildcardFeature.getName()) .setCounts(createCounts(counts)) .setDiscrStats(createDiscrStats(discrStats)); encoder.putUnivariateStats(univariateStats); } return super.encodeFeatures(Collections.singletonList(feature), encoder); }
@Override public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){ List<? extends Number> values = getValues(); ClassDictUtil.checkSize(1, features); Feature feature = features.get(0); List<Feature> result = new ArrayList<>(); if(feature instanceof CategoricalFeature){ CategoricalFeature categoricalFeature = (CategoricalFeature)feature; ClassDictUtil.checkSize(values, categoricalFeature.getValues()); for(int i = 0; i < values.size(); i++){ result.add(new BinaryFeature(encoder, categoricalFeature, categoricalFeature.getValue(i))); } } else if(feature instanceof WildcardFeature){ WildcardFeature wildcardFeature = (WildcardFeature)feature; List<String> categories = new ArrayList<>(); for(int i = 0; i < values.size(); i++){ int value = ValueUtil.asInt(values.get(i)); String category = ValueUtil.formatValue(value); categories.add(category); result.add(new BinaryFeature(encoder, wildcardFeature, category)); } wildcardFeature.toCategoricalFeature(categories); } else { throw new IllegalArgumentException(); } return result; }
.setHighValue(highValue); encoder.addDecorator(wildcardFeature.getName(), outlierDecorator); .addIntervals(interval); feature = wildcardFeature.toContinuousFeature(); encoder.addDecorator(wildcardFeature.getName(), validValueDecorator); .setField(wildcardFeature.getName()) .setCounts(createCounts(counts)) .setNumericInfo(createNumericInfo(numericInfo));
public List<Feature> getFeatures(String column){ List<Feature> features = this.columnFeatures.get(column); if(features == null){ FieldName name = FieldName.create(column); DataField dataField = getDataField(name); if(dataField == null){ dataField = createDataField(name); } Feature feature; DataType dataType = dataField.getDataType(); switch(dataType){ case STRING: feature = new WildcardFeature(this, dataField); break; case INTEGER: case DOUBLE: feature = new ContinuousFeature(this, dataField); break; case BOOLEAN: feature = new BooleanFeature(this, dataField); break; default: throw new IllegalArgumentException("Data type " + dataType + " is not supported"); } return Collections.singletonList(feature); } return features; }
@Override public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){ List<Feature> result = new ArrayList<>(); OpType opType = getOpType(); DataType dataType = getDataType(); for(Feature feature : features){ WildcardFeature wildcardFeature = (WildcardFeature)feature; DataField dataField = (DataField)encoder.getField(wildcardFeature.getName()); dataField .setOpType(opType) .setDataType(dataType); feature = new ObjectFeature(encoder, dataField.getName(), dataField.getDataType()); result.add(feature); } return super.encodeFeatures(result, encoder); } }
rowFeatures.add(new WildcardFeature(encoder, dataField));
features.add(new WildcardFeature(encoder, dataField));