/** * @return The raw inputs extracted from the tuple for all 'active fields' */ @Override public Map<FieldName, Object> extractRawInputs(Tuple tuple) { LOG.debug("Extracting raw inputs from tuple: = [{}]", tuple); final Map<FieldName, Object> rawInputs = new LinkedHashMap<>(); for (FieldName activeField : activeFields) { rawInputs.put(activeField, tuple.getValueByField(activeField.getValue())); } LOG.debug("Raw inputs = [{}]", rawInputs); return rawInputs; }
dataType = null; DataField field = new DataField(FieldName.create(featureName), opType, dataType); if (schema.isCategorical(featureName)) { Objects.requireNonNull(categoricalValueEncodings);
if(!(name).equals(binaryFeature.getName())){ continue; .setDefaultValue(ValueUtil.formatValue(0d)); DerivedField derivedField = encoder.createDerivedField(FieldName.create("lookup(" + name.getValue() + (identifier != null ? (", " + identifier) : "") + ")"), OpType.CONTINUOUS, DataType.DOUBLE, mapValues);
@Test public void readResolve() throws Exception { FieldName name = FieldName.create("x"); assertTrue(name.isInterned()); FieldName clonedName = SerializationUtil.clone(name); assertTrue(clonedName.isInterned()); assertSame(name, clonedName); name = new FieldName("x"); assertFalse(name.isInterned()); clonedName = SerializationUtil.clone(name); assertTrue(clonedName.isInterned()); assertNotSame(name, clonedName); } }
List<String> outputFields = getSchemaFieldViaUsageType(schema, FieldUsageType.TARGET); pmmlModel.withTargetFieldName(new FieldName(outputFields.get(0))); table.withTargetCategory(outputFields.get(0)); if(expression instanceof NormContinuous) { NormContinuous norm = (NormContinuous) expression; if(activeFields.contains(norm.getField().getValue())) table.withNumericPredictors(new NumericPredictor(dField.getName(), weights[index++]));
@JsonCreator static public FieldName create(String value){ if(value == null || ("").equals(value)){ throw new IllegalArgumentException(); } WeakReference<FieldName> reference = FieldName.cache.get(value); if(reference != null){ FieldName cachedName = reference.get(); if(cachedName != null){ return cachedName; } } FieldName name = new FieldName(value); FieldName.cache.put(value, new WeakReference<>(name)); return name; }
private void putFeature(FieldName name, Feature feature){ FieldName validName = RExpUtil.makeName(name); if(!(name).equals(validName)){ this.validNames.put(validName, name); } this.features.put(name, feature); }
static private boolean shouldMeasure(Object object){ if(object != null){ Class<?> clazz = object.getClass(); if(clazz.isEnum()){ return false; } // End if if(object instanceof FieldName){ FieldName name = (FieldName)object; return !name.isInterned(); } return !(object instanceof Visitable); } return false; } }
public static PMML encodePMML(FieldName targetField, List<String> targetCategories, FeatureList featureList, List<RegressionTree> regTrees, float base_score){ LSBoostEncoder encoder = new LSBoostEncoder(); if(targetField == null){ targetField = FieldName.create("_target"); } Label label = encodeLabel(targetField, targetCategories, encoder); //todo List<Feature> features = new ArrayList<>(); for (int i=0;i<featureList.size();i++){ FieldName fieldName = new FieldName("feature_"+i); DataField dataField = encoder.createDataField(fieldName, OpType.CONTINUOUS, DataType.FLOAT); Feature feature = new ContinuousFeature(encoder, dataField); features.add(feature); } Schema schema = new Schema(label, features); MiningModel miningModel = encodeMiningModel(regTrees, base_score, schema); PMML pmml = encoder.encodePMML(miningModel); return pmml; }
@JsonCreator static public FieldName create(String value){ if(value == null || ("").equals(value)){ throw new IllegalArgumentException(); } WeakReference<FieldName> reference = FieldName.cache.get(value); if(reference != null){ FieldName cachedName = reference.get(); if(cachedName != null){ return cachedName; } } FieldName name = new FieldName(value); FieldName.cache.put(value, new WeakReference<>(name)); return name; }
private boolean isRootInMiningList(FieldName root, List<MiningField> miningList) { for(int i = 0; i < miningList.size(); i++) { MiningField mField = miningList.get(i); if(mField.getUsageType() != FieldUsageType.ACTIVE) continue; FieldName mFieldName = mField.getName(); if(root.equals(mFieldName)) { return true; } } return false; }
static private boolean shouldMeasure(Object object){ if(object != null){ Class<?> clazz = object.getClass(); if(clazz.isEnum()){ return false; } // End if if(object instanceof FieldName){ FieldName name = (FieldName)object; return !name.isInterned(); } return !(object instanceof Visitable); } return false; } }
private static ModelOutputs create(PMML pmmlModel, List<String> streams) { final Set<String> fieldNames = new LinkedHashSet<>(); final Evaluator evaluator = JpmmlFactory.newEvaluator(pmmlModel); for (FieldName predictedField : evaluator.getPredictedFields()) { fieldNames.add(predictedField.getValue()); } for (FieldName outputField : evaluator.getOutputFields()) { fieldNames.add(outputField.getValue()); } final Map<String, Fields> toDeclare = streams.stream() .collect(Collectors.toMap(Function.identity(), (x) -> new Fields(new ArrayList<>(fieldNames)))); return new JpmmlModelOutputs(toDeclare); } }
private Predicate buildPredicate(Split split, CategoricalValueEncodings categoricalValueEncodings) { if (split == null) { // Left child always applies, but is evaluated second return new True(); } int featureIndex = inputSchema.predictorToFeatureIndex(split.feature()); FieldName fieldName = FieldName.create(inputSchema.getFeatureNames().get(featureIndex)); if (split.featureType().equals(FeatureType.Categorical())) { // Note that categories in MLlib model select the *left* child but the // convention here will be that the predicate selects the *right* child // So the predicate will evaluate "not in" this set // More ugly casting @SuppressWarnings("unchecked") Collection<Double> javaCategories = (Collection<Double>) (Collection<?>) JavaConversions.seqAsJavaList(split.categories()); Set<Integer> negativeEncodings = javaCategories.stream().map(Double::intValue).collect(Collectors.toSet()); Map<Integer,String> encodingToValue = categoricalValueEncodings.getEncodingValueMap(featureIndex); List<String> negativeValues = negativeEncodings.stream().map(encodingToValue::get).collect(Collectors.toList()); String joinedValues = TextUtils.joinPMMLDelimited(negativeValues); return new SimpleSetPredicate(fieldName, SimpleSetPredicate.BooleanOperator.IS_NOT_IN, new Array(Array.Type.STRING, joinedValues)); } else { // For MLlib, left means <= threshold, so right means > return new SimplePredicate(fieldName, SimplePredicate.Operator.GREATER_THAN) .setValue(Double.toString(split.threshold())); } }
public static PMML encodePMML(FieldName targetField, List<String> targetCategories, FeatureList featureList, List<List<RegressionTree>> regTrees, float base_score, int numClasses){ LKBoostEncoder encoder = new LKBoostEncoder(); if(targetField == null){ targetField = FieldName.create("_target"); } Label label = encodeLabel(targetField, targetCategories, encoder, numClasses); //todo List<Feature> features = new ArrayList<>(); for (int i=0;i<featureList.size();i++){ FieldName fieldName = new FieldName("feature_"+i); DataField dataField = encoder.createDataField(fieldName, OpType.CONTINUOUS, DataType.FLOAT); Feature feature = new ContinuousFeature(encoder, dataField); features.add(feature); } Schema schema = new Schema(label, features); MiningModel miningModel = encodeMiningModel(regTrees, base_score, schema); PMML pmml = encoder.encodePMML(miningModel); return pmml; }