public static Dictionary fromList(Iterable<String> values) { Dictionary dict = new Dictionary(); for (String value : values) { dict.intern(value); } return dict; } }
public static Dictionary fromList(Iterable<String> values) { Dictionary dict = new Dictionary(); for (String value : values) { dict.intern(value); } return dict; } }
/** * Defines the values and thus the encoding of values of the target variables. Note * that any values of the target variable not present in this list will be given the * value of the last member of the list. * @param values The values the target variable can have. */ @Override public void defineTargetCategories(List<String> values) { Preconditions.checkArgument( values.size() <= maxTargetValue, "Must have less than or equal to " + maxTargetValue + " categories for target variable, but found " + values.size()); if (maxTargetValue == Integer.MAX_VALUE) { maxTargetValue = values.size(); } for (String value : values) { targetDictionary.intern(value); } }
public static Dictionary fromList(Iterable<String> values) { Dictionary dict = new Dictionary(); for (String value : values) { dict.intern(value); } return dict; } }
/** * Defines the values and thus the encoding of values of the target variables. Note * that any values of the target variable not present in this list will be given the * value of the last member of the list. * @param values The values the target variable can have. */ @Override public void defineTargetCategories(List<String> values) { Preconditions.checkArgument( values.size() <= maxTargetValue, "Must have less than or equal to " + maxTargetValue + " categories for target variable, but found " + values.size()); if (maxTargetValue == Integer.MAX_VALUE) { maxTargetValue = values.size(); } for (String value : values) { targetDictionary.intern(value); } }
/*** * Extract the corresponding raw target label according to a code * @param code the integer code encoded during training process * @return the raw target label */ public String getTargetLabel(int code) { for (String key : targetDictionary.values()) { if (targetDictionary.intern(key) == code) { return key; } } return null; }
/** * Defines the values and thus the encoding of values of the target variables. Note * that any values of the target variable not present in this list will be given the * value of the last member of the list. * @param values The values the target variable can have. */ @Override public void defineTargetCategories(List<String> values) { Preconditions.checkArgument( values.size() <= maxTargetValue, "Must have less than or equal to " + maxTargetValue + " categories for target variable, but found " + values.size()); if (maxTargetValue == Integer.MAX_VALUE) { maxTargetValue = values.size(); } for (String value : values) { targetDictionary.intern(value); } }
/*** * Extract the corresponding raw target label according to a code * @param code the integer code encoded during training process * @return the raw target label */ public String getTargetLabel(int code) { for (String key : targetDictionary.values()) { if (targetDictionary.intern(key) == code) { return key; } } return null; }
/*** * Extract the corresponding raw target label according to a code * @param code the integer code encoded during training process * @return the raw target label */ public String getTargetLabel(int code) { for (String key : targetDictionary.values()) { if (targetDictionary.intern(key) == code) { return key; } } return null; }
/*** * Decodes a single line of CSV data and records the target(if retrunTarget is true) * and predictor variables in a record. As a side effect, features are added into the featureVector. * Returns the value of the target variable. When used during classify against production data without * target value, the method will be called with returnTarget = false. * @param line The raw data. * @param featureVector Where to fill in the features. Should be zeroed before calling * processLine. * @param returnTarget whether process and return target value, -1 will be returned if false. * @return The value of the target variable. */ public int processLine(CharSequence line, Vector featureVector, boolean returnTarget) { List<String> values = parseCsvLine(line); int targetValue = -1; if (returnTarget) { targetValue = targetDictionary.intern(values.get(target)); if (targetValue >= maxTargetValue) { targetValue = maxTargetValue - 1; } } for (Integer predictor : predictors) { String value = predictor >= 0 ? values.get(predictor) : null; predictorEncoders.get(predictor).addToVector(value, featureVector); } return targetValue; }
/*** * Decodes a single line of CSV data and records the target(if retrunTarget is true) * and predictor variables in a record. As a side effect, features are added into the featureVector. * Returns the value of the target variable. When used during classify against production data without * target value, the method will be called with returnTarget = false. * @param line The raw data. * @param featureVector Where to fill in the features. Should be zeroed before calling * processLine. * @param returnTarget whether process and return target value, -1 will be returned if false. * @return The value of the target variable. */ public int processLine(CharSequence line, Vector featureVector, boolean returnTarget) { List<String> values = parseCsvLine(line); int targetValue = -1; if (returnTarget) { targetValue = targetDictionary.intern(values.get(target)); if (targetValue >= maxTargetValue) { targetValue = maxTargetValue - 1; } } for (Integer predictor : predictors) { String value = predictor >= 0 ? values.get(predictor) : null; predictorEncoders.get(predictor).addToVector(value, featureVector); } return targetValue; }
/*** * Decodes a single line of CSV data and records the target(if retrunTarget is true) * and predictor variables in a record. As a side effect, features are added into the featureVector. * Returns the value of the target variable. When used during classify against production data without * target value, the method will be called with returnTarget = false. * @param line The raw data. * @param featureVector Where to fill in the features. Should be zeroed before calling * processLine. * @param returnTarget whether process and return target value, -1 will be returned if false. * @return The value of the target variable. */ public int processLine(CharSequence line, Vector featureVector, boolean returnTarget) { List<String> values = parseCsvLine(line); int targetValue = -1; if (returnTarget) { targetValue = targetDictionary.intern(values.get(target)); if (targetValue >= maxTargetValue) { targetValue = maxTargetValue - 1; } } for (Integer predictor : predictors) { String value = predictor >= 0 ? values.get(predictor) : null; predictorEncoders.get(predictor).addToVector(value, featureVector); } return targetValue; }
/** * Decodes a single line of CSV data and records the target and predictor variables in a record. * As a side effect, features are added into the featureVector. Returns the value of the target * variable. * * @param line The raw data. * @param featureVector Where to fill in the features. Should be zeroed before calling * processLine. * @return The value of the target variable. */ @Override public int processLine(String line, Vector featureVector) { List<String> values = parseCsvLine(line); int targetValue = targetDictionary.intern(values.get(target)); if (targetValue >= maxTargetValue) { targetValue = maxTargetValue - 1; } for (Integer predictor : predictors) { String value; if (predictor >= 0) { value = values.get(predictor); } else { value = null; } predictorEncoders.get(predictor).addToVector(value, featureVector); } return targetValue; }
/** * Decodes a single line of CSV data and records the target and predictor variables in a record. * As a side effect, features are added into the featureVector. Returns the value of the target * variable. * * @param line The raw data. * @param featureVector Where to fill in the features. Should be zeroed before calling * processLine. * @return The value of the target variable. */ @Override public int processLine(String line, Vector featureVector) { List<String> values = parseCsvLine(line); int targetValue = targetDictionary.intern(values.get(target)); if (targetValue >= maxTargetValue) { targetValue = maxTargetValue - 1; } for (Integer predictor : predictors) { String value; if (predictor >= 0) { value = values.get(predictor); } else { value = null; } predictorEncoders.get(predictor).addToVector(value, featureVector); } return targetValue; }
/** * Decodes a single line of CSV data and records the target and predictor variables in a record. * As a side effect, features are added into the featureVector. Returns the value of the target * variable. * * @param line The raw data. * @param featureVector Where to fill in the features. Should be zeroed before calling * processLine. * @return The value of the target variable. */ @Override public int processLine(String line, Vector featureVector) { List<String> values = parseCsvLine(line); int targetValue = targetDictionary.intern(values.get(target)); if (targetValue >= maxTargetValue) { targetValue = maxTargetValue - 1; } for (Integer predictor : predictors) { String value; if (predictor >= 0) { value = values.get(predictor); } else { value = null; } predictorEncoders.get(predictor).addToVector(value, featureVector); } return targetValue; }
@Test public void testDictionaryOrder() { Dictionary dict = new Dictionary(); dict.intern("a"); dict.intern("d"); dict.intern("c"); dict.intern("b"); dict.intern("qrz"); assertEquals("[a, d, c, b, qrz]", dict.values().toString()); Dictionary dict2 = Dictionary.fromList(dict.values()); assertEquals("[a, d, c, b, qrz]", dict2.values().toString()); } }
newsGroups.intern(newsgroup.getName()); files.addAll(Arrays.asList(newsgroup.listFiles()));
target.add(dict.intern(Iterables.get(values, 4)));