@Test public void testBuildDataDictionary() { Map<Integer,Collection<String>> distinctValues = new HashMap<>(); distinctValues.put(1, Arrays.asList("one", "two", "three", "four", "five")); CategoricalValueEncodings categoricalValueEncodings = new CategoricalValueEncodings(distinctValues); DataDictionary dictionary = AppPMMLUtils.buildDataDictionary(buildTestSchema(), categoricalValueEncodings); assertEquals(4, dictionary.getNumberOfFields().intValue()); checkDataField(dictionary.getDataFields().get(0), "foo", null); checkDataField(dictionary.getDataFields().get(1), "bar", true); checkDataField(dictionary.getDataFields().get(2), "baz", null); checkDataField(dictionary.getDataFields().get(3), "bing", false); List<Value> dfValues = dictionary.getDataFields().get(1).getValues(); assertEquals(5, dfValues.size()); String[] categoricalValues = { "one", "two", "three", "four", "five" }; for (int i = 0; i < categoricalValues.length; i++) { assertEquals(categoricalValues[i], dfValues.get(i).getValue()); } }
protected static void checkDataDictionary(InputSchema schema, DataDictionary dataDictionary) { assertNotNull(dataDictionary); assertEquals("Wrong number of features", schema.getNumFeatures(), dataDictionary.getNumberOfFields().intValue()); List<DataField> dataFields = dataDictionary.getDataFields(); assertEquals(schema.getNumFeatures(), dataFields.size()); for (DataField dataField : dataFields) { String featureName = dataField.getName().getValue(); if (schema.isNumeric(featureName)) { assertEquals("Wrong op type for feature " + featureName, OpType.CONTINUOUS, dataField.getOpType()); assertEquals("Wrong data type for feature " + featureName, DataType.DOUBLE, dataField.getDataType()); } else if (schema.isCategorical(featureName)) { assertEquals("Wrong op type for feature " + featureName, OpType.CATEGORICAL, dataField.getOpType()); assertEquals("Wrong data type for feature " + featureName, DataType.STRING, dataField.getDataType()); } else { assertNull(dataField.getOpType()); assertNull(dataField.getDataType()); } } }
@Override public Integer getSize(){ return dataDictionary.getNumberOfFields(); }
public static Map<FieldName, Integer> getFieldNumMap(DataDictionary dataDictionary) { Map<FieldName, Integer> fieldNumMap = new HashMap<FieldName, Integer>(); int size = dataDictionary.getNumberOfFields(); for(int i = 0; i < size; i++) { DataField dataField = dataDictionary.getDataFields().get(i); fieldNumMap.put(dataField.getName(), i); } return fieldNumMap; }
public static Map<FieldName, Integer> getFieldNumMap(DataDictionary dataDictionary) { Map<FieldName, Integer> fieldNumMap = new HashMap<FieldName, Integer>(); int size = dataDictionary.getNumberOfFields(); for(int i = 0; i < size; i++) { DataField dataField = dataDictionary.getDataFields().get(i); fieldNumMap.put(dataField.getName(), i); } return fieldNumMap; }
public static Integer getTargetFieldNumByName(DataDictionary dataDictionary, String name) { int size = dataDictionary.getNumberOfFields(); for(int i = 0; i < size; i++) { DataField dataField = dataDictionary.getDataFields().get(i); if(dataField.getName().getValue().equals(name)) { return i; } } throw new RuntimeException("Target Field Not Found: " + name); }
public static Integer getTargetFieldNumByName(DataDictionary dataDictionary, String name) { int size = dataDictionary.getNumberOfFields(); for(int i = 0; i < size; i++) { DataField dataField = dataDictionary.getDataFields().get(i); if(dataField.getName().getValue().equals(name)) { return i; } } throw new RuntimeException("Target Field Not Found: " + name); }