@Override public Expression createExpression(FieldRef fieldRef){ return PMMLUtil.createApply("lessOrEqual", fieldRef, PMMLUtil.createConstant(0d)); } };
static private MapValues createMapValues(FieldName name, Map<String, String> mapping, List<String> categories){ Set<String> inputs = new LinkedHashSet<>(mapping.keySet()); Set<String> outputs = new LinkedHashSet<>(mapping.values()); for(String category : categories){ // Assume disjoint input and output value spaces if(outputs.contains(category)){ continue; } mapping.put(category, category); } return PMMLUtil.createMapValues(name, mapping); }
@Override public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){ String function = getFunction(); Boolean trimBlanks = getTrimBlanks(); if(function == null && !trimBlanks){ return features; } List<Feature> result = new ArrayList<>(); for(Feature feature : features){ Expression expression = feature.ref(); if(function != null){ expression = PMMLUtil.createApply(function, expression); } // End if if(trimBlanks){ expression = PMMLUtil.createApply("trimBlanks", expression); } Field<?> field = encoder.toCategorical(feature.getName(), Collections.emptyList()); // XXX: Should have been set by the previous transformer field.setDataType(DataType.STRING); DerivedField derivedField = encoder.createDerivedField(FeatureUtil.createName("normalize", feature), OpType.CATEGORICAL, DataType.STRING, expression); feature = new StringFeature(encoder, derivedField); result.add(feature); } return result; }
@Override public ClusteringModel encodeModel(Schema schema){ KMeansModel model = getTransformer(); List<Cluster> clusters = new ArrayList<>(); Vector[] clusterCenters = model.clusterCenters(); for(int i = 0; i < clusterCenters.length; i++){ Cluster cluster = new Cluster() .setId(String.valueOf(i)) .setArray(PMMLUtil.createRealArray(VectorUtil.toList(clusterCenters[i]))); clusters.add(cluster); } ComparisonMeasure comparisonMeasure = new ComparisonMeasure(ComparisonMeasure.Kind.DISTANCE) .setCompareFunction(CompareFunction.ABS_DIFF) .setMeasure(new SquaredEuclidean()); return new ClusteringModel(MiningFunction.CLUSTERING, ClusteringModel.ModelClass.CENTER_BASED, clusters.size(), ModelUtil.createMiningSchema(schema.getLabel()), comparisonMeasure, ClusteringModelUtil.createClusteringFields(schema.getFeatures()), clusters); } }
@Override public Apply createApply(){ Number weight = getWeight(); Apply apply = super.createApply() .addExpressions(PMMLUtil.createConstant(weight)); return apply; }
mapValues.setInlineTable(PMMLUtil.createInlineTable(data));
PMMLUtil.addValues(dataField, Collections.singletonList(value));
static public DiscrStats createDiscrStats(Object[] objects){ List<Object> values = (List)asArray(objects[0]); List<Integer> counts = ValueUtil.asIntegers((List)asArray(objects[1])); ClassDictUtil.checkSize(values, counts); DiscrStats discrStats = new DiscrStats() .addArrays(PMMLUtil.createStringArray(values), PMMLUtil.createIntArray(counts)); return discrStats; } }
final public Array ListMakerExpression() throws ParseException {Object predicate; List<String> values = new ArrayList<String>(); jj_consume_token(LBRACKET); predicate = UnaryExpression(); values.add((String)predicate); label_3: while (true) { switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { case COMMA:{ ; break; } default: jj_la1[9] = jj_gen; break label_3; } jj_consume_token(COMMA); predicate = UnaryExpression(); values.add((String)predicate); } jj_consume_token(RBRACKET); return PMMLUtil.createStringArray(values); }
PMMLUtil.addIntervals(dataField, Arrays.asList(interval));
@Override public List<Feature> encodeFeatures(SparkMLEncoder encoder){ Tokenizer transformer = getTransformer(); Feature feature = encoder.getOnlyFeature(transformer.getInputCol()); Apply apply = PMMLUtil.createApply("lowercase", feature.ref()); DerivedField derivedField = encoder.createDerivedField(FeatureUtil.createName("lowercase", feature), OpType.CATEGORICAL, DataType.STRING, apply); return Collections.singletonList(new DocumentFeature(encoder, derivedField, "\\s+")); } }
@Override public ClusteringModel encodeModel(Schema schema){ int[] shape = getClusterCentersShape(); int numberOfClusters = shape[0]; int numberOfFeatures = shape[1]; List<? extends Number> clusterCenters = getClusterCenters(); List<Integer> labels = getLabels(); Multiset<Integer> labelCounts = HashMultiset.create(); if(labels != null){ labelCounts.addAll(labels); } List<Cluster> clusters = new ArrayList<>(); for(int i = 0; i < numberOfClusters; i++){ Cluster cluster = new Cluster() .setId(String.valueOf(i)) .setSize((labelCounts.size () > 0 ? labelCounts.count(i) : null)) .setArray(PMMLUtil.createRealArray(CMatrixUtil.getRow(clusterCenters, numberOfClusters, numberOfFeatures, i))); clusters.add(cluster); } ComparisonMeasure comparisonMeasure = new ComparisonMeasure(ComparisonMeasure.Kind.DISTANCE) .setCompareFunction(CompareFunction.ABS_DIFF) .setMeasure(new SquaredEuclidean()); ClusteringModel clusteringModel = new ClusteringModel(MiningFunction.CLUSTERING, ClusteringModel.ModelClass.CENTER_BASED, numberOfClusters, ModelUtil.createMiningSchema(schema.getLabel()), comparisonMeasure, ClusteringModelUtil.createClusteringFields(schema.getFeatures()), clusters) .setOutput(ClusteringModelUtil.createOutput(FieldName.create("Cluster"), DataType.DOUBLE, clusters)); return clusteringModel; }
@Override public Apply encodeApply(String function, Feature feature, int index, String term){ TfidfTransformer transformer = getTransformer(); Apply apply = super.encodeApply(function, feature, index, term); Boolean useIdf = transformer.getUseIdf(); if(useIdf){ Number weight = transformer.getWeight(index); apply.addExpressions(PMMLUtil.createConstant(weight)); } return apply; }
.setInlineTable(PMMLUtil.createInlineTable(data));
@Override public Expression createExpression(FieldRef fieldRef){ return PMMLUtil.createApply("-", PMMLUtil.createConstant(0.5d), PMMLUtil.createApply("pow", PMMLUtil.createConstant(2d), PMMLUtil.createApply("*", PMMLUtil.createConstant(-1d), fieldRef))); } };
Apply apply = PMMLUtil.createApply("lowercase", feature.ref());
MapValues mapValues = PMMLUtil.createMapValues(name, inputValues, outputValues) .setDefaultValue(ValueUtil.formatValue(0d));
.setName(rowNames.getValue(i)) .setSize(size.getValue(i)) .setArray(PMMLUtil.createRealArray(FortranMatrixUtil.getRow(centers.getValues(), rows, columns, i)));
.setCaseSensitive(stopWordSet.isCaseSensitive()) .setInlineTable(PMMLUtil.createInlineTable(data));