public DocumentFeature(SparkMLEncoder encoder, Field<?> field, String wordSeparatorRE){ super(encoder, field.getName(), field.getDataType()); setWordSeparatorRE(wordSeparatorRE); }
public OpType getOpType(){ Field<?> field = getField(); return field.getOpType(); }
@Override public FieldName getKey(){ return getName(); } }
label = new CategoricalLabel(field.getName(), field.getDataType(), categories); } else field.setDataType(DataType.DOUBLE); label = new ContinuousLabel(field.getName(), field.getDataType());
DerivedField derivedField = encoder.createDerivedField(FeatureUtil.createName("imputer", feature), field.getOpType(), field.getDataType(), expression);
static public DataType getDataType(Field<?> field){ return field.getDataType(); }
@Override public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){ String function = getFunction(); Boolean trimBlanks = getTrimBlanks(); if(function == null && !trimBlanks){ return features; } List<Feature> result = new ArrayList<>(); for(Feature feature : features){ Expression expression = feature.ref(); if(function != null){ expression = PMMLUtil.createApply(function, expression); } // End if if(trimBlanks){ expression = PMMLUtil.createApply("trimBlanks", expression); } Field<?> field = encoder.toCategorical(feature.getName(), Collections.emptyList()); // XXX: Should have been set by the previous transformer field.setDataType(DataType.STRING); DerivedField derivedField = encoder.createDerivedField(FeatureUtil.createName("normalize", feature), OpType.CATEGORICAL, DataType.STRING, expression); feature = new StringFeature(encoder, derivedField); result.add(feature); } return result; }
@Override public FieldName getKey(){ return getName(); } }
public DataType getDataType(){ Field<?> field = getField(); return field.getDataType(); }
private MLModelField getModelField(Field dataField) { return new MLModelField(dataField.getName().getValue(), dataField.getDataType().toString()); }
public FieldName getName(){ if(this.name == null){ Field<?> field = getField(); return field.getName(); } return this.name; }
static private <F extends Field<F> & HasDiscreteDomain<F>> List<Object> parseValidValues(F field){ List<Object> result = new ArrayList<>(); DataType dataType = field.getDataType(); if(dataType == null){ throw new MissingAttributeException(MissingAttributeException.formatMessage(XPathUtil.formatElement(field.getClass()) + "@dataType"), field); } // End if if(field.hasValues()){ List<Value> pmmlValues = field.getValues(); for(Value pmmlValue : pmmlValues){ String stringValue = pmmlValue.getValue(); if(stringValue == null){ throw new MissingAttributeException(pmmlValue, PMMLAttributes.VALUE_VALUE); } Value.Property property = pmmlValue.getProperty(); switch(property){ case VALID: result.add(TypeUtil.parse(dataType, stringValue)); break; default: break; } } } return result; }
static public OpType getOpType(Field<?> field, MiningField miningField){ OpType opType = field.getOpType(); // "A MiningField overrides a (Data)Field" if(miningField != null){ opType = firstNonNull(miningField.getOpType(), opType); } return opType; }
private MLModelField getModelField(Field dataField) { return new MLModelField(dataField.getName().getValue(), dataField.getDataType().toString()); }
static public <F extends Field<?>> Map<FieldName, F> nameMap(Collection<? extends F> fields){ Map<FieldName, F> result = new LinkedHashMap<>(); for(F field : fields){ FieldName name = field.getName(); F previousField = result.put(name, field); if(previousField != null){ throw new IllegalArgumentException("Fields " + format(field) + " and " + format(previousField) + " have the same name " + name); } } return result; }
public void addFeature(Field<?> field){ Feature feature; OpType opType = field.getOpType(); switch(opType){ case CATEGORICAL: feature = new CategoricalFeature(this, (DataField)field); break; case CONTINUOUS: feature = new ContinuousFeature(this, field); break; default: throw new IllegalArgumentException(); } addFeature(feature); }
public void addField(Field<?> field, List<String> categoryNames, List<String> categoryValues){ RExpEncoder encoder = getEncoder(); if(categoryNames.size() != categoryValues.size()){ throw new IllegalArgumentException(); } CategoricalFeature categoricalFeature; if((DataType.BOOLEAN).equals(field.getDataType()) && (BooleanFeature.VALUES).equals(categoryValues)){ categoricalFeature = new BooleanFeature(encoder, field); } else { categoricalFeature = new CategoricalFeature(encoder, field, categoryValues); } putFeature(field.getName(), categoricalFeature); for(int i = 0; i < categoryNames.size(); i++){ String categoryName = categoryNames.get(i); String categoryValue = categoryValues.get(i); BinaryFeature binaryFeature = new BinaryFeature(encoder, field, categoryValue); putFeature(FieldName.create((field.getName()).getValue() + categoryName), binaryFeature); } this.fields.add(field); }
public InputField(Field<?> field, MiningField miningField){ super(field); setMiningField(Objects.requireNonNull(miningField)); if(!Objects.equals(field.getName(), miningField.getName())){ throw new IllegalArgumentException(); } }
static public OpType getOpType(Field<?> field, MiningField miningField, Target target){ OpType opType = field.getOpType(); // "A MiningField overrides a (Data)Field, and a Target overrides a MiningField" if(miningField != null){ opType = firstNonNull(miningField.getOpType(), opType); if(target != null){ opType = firstNonNull(target.getOpType(), opType); } } return opType; }
static public <F extends Field<?>> Map<FieldName, F> nameMap(Collection<? extends F> fields){ Map<FieldName, F> result = new LinkedHashMap<>(); for(F field : fields){ FieldName name = field.getName(); F previousField = result.put(name, field); if(previousField != null){ throw new IllegalArgumentException("Fields " + format(field) + " and " + format(previousField) + " have the same name " + name); } } return result; }