@Override public Object copy() { Instance result = new Instance(this); return result; }
public Instance instanceToWeka(net.sf.javaml.core.Instance inst) { double[] values = new double[classSet ? inst.noAttributes() + 1 : inst.noAttributes()]; // System.arraycopy(i.values().t.toArray(), 0, values, 0, classSet ? // values.length - 1 : values.length); for (int i = 0; i < (classSet ? values.length - 1 : values.length); i++) { values[i] = inst.get(i); } // if (classSet) // values[values.length - 1] = inst.classValue(); Instance wI = null; if (inst instanceof net.sf.javaml.core.SparseInstance) wI = new SparseInstance(1, values); else wI = new Instance(1, values); wI.setDataset(wData); if (inst.classValue() != null) { wI.setClassValue(inst.classValue().toString()); } return wI; }
/** * Creates a Weka Instance object * @param values feature vector * @param dataset example weka dataset for feature counts/types * @return */ public static Instance makeCompatible(Vector<String> values, Instances dataset) { Instance retInstance=new Instance(values.size()+1); // dataset knows about possible nominal values retInstance.setDataset(dataset); for (int i = 0; i < values.size(); i++) { // check for existence of nominal values, otherwise convert to "missing": unknown vocabulary as features if (values.get(i)==null) {retInstance.setMissing(i);} else if (dataset.attribute(i).indexOfValue(values.get(i))>=0) {// ((possValues[i]!=null)&&(possValues[i].containsKey(values[i]))) { retInstance.setValue(i, values.get(i)); } else {retInstance.setMissing(i);} } return retInstance; }
Instance inst = new Instance(attributeInfo.size());
Instance inst = new Instance(attributeInfo.size());
@Override public String classify(Instance instance) throws Exception { weka.core.Instance wekaInstance = new weka.core.Instance(wekaTrainingData.numAttributes()); wekaInstance.setDataset(wekaTrainingData); double[] histogramPercent = instance.getHistogramPercent(); for (int i = 0; i < histogramPercent.length; i++) { wekaInstance.setValue(i, histogramPercent[i]); } wekaInstance.setMissing(wekaTrainingData.attribute("class")); double wekaClassification = classifier.classifyInstance(wekaInstance); String classification = wekaTrainingData.attribute("class").value((int)wekaClassification); return classification; }
@Override public int addRow(Tuple tuple) { Schema tupleSchema = tuple.getSchema(); for (int i = 0; i < tupleSchema.getColumnCount(); i++) { if (!tupleSchema.getColumnName(i).equals(schema.getColumnName(i)) || !tupleSchema.getColumnType(i).equals(schema.getColumnType(i))) { return -1; } } Instance inst = new Instance(schema.getColumnCount()); for (int i = 0; i < tuple.getSchema().getColumnCount(); i++) { Attribute att = instances.attribute(i); if (att.isNumeric()) { inst.setValue(att, Double.parseDouble(tuple.get(i).toString())); } else if (att.isString() || att.isNominal()) { inst.setValue(att, tuple.getString(i)); } else if (att.isDate()) { try { inst.setValue(att, att.parseDate(tuple.getDate(i).toString())); } catch (ParseException e) { e.printStackTrace(); } } } instances.add(inst); return getRowCount(); }
private void trainModel(Map<Long, Double> metricData) throws Exception { //Model has a single metric_value attribute Attribute value = new Attribute("metric_value"); FastVector attributes = new FastVector(); attributes.addElement(value); trainingData = new Instances("metric_value_data", attributes, 0); for (Double val : metricData.values()) { double[] valArray = new double[] { val }; Instance instance = new Instance(1.0, valArray); trainingData.add(instance); } //Create and train the model model = new SimpleKMeans(); model.setNumClusters(k); model.setMaxIterations(20); model.setPreserveInstancesOrder(true); model.buildClusterer(trainingData); clusterCentroids = model.getClusterCentroids(); centroidAssignments = model.getAssignments(); setMeanDistancesToCentroids(); }
private void trainModel(Map<Long, Double> metricData) throws Exception { //Model has a single metric_value attribute Attribute value = new Attribute("metric_value"); FastVector attributes = new FastVector(); attributes.addElement(value); trainingData = new Instances("metric_value_data", attributes, 0); for (Double val : metricData.values()) { double[] valArray = new double[] { val }; Instance instance = new Instance(1.0, valArray); trainingData.add(instance); } //Create and train the model model = new SimpleKMeans(); model.setNumClusters(k); model.setMaxIterations(20); model.setPreserveInstancesOrder(true); model.buildClusterer(trainingData); clusterCentroids = model.getClusterCentroids(); centroidAssignments = model.getAssignments(); setMeanDistancesToCentroids(); }
instances.add(new Instance(1.0, values));
newInst[1] = data.attribute(1).addStringValue(text); newInst[2] = Instance.missingValue(); data.add(new Instance(1.0, newInst));
data.add(new Instance(1.0, newInst));
newInst[1] = data.attribute(1).addStringValue(text); newInst[2] = Instance.missingValue(); data.add(new Instance(1.0, newInst));
countPos++; Instance inst = new Instance(current.weight(), vals);
/** * Converts an instance. A phrase boundary is inserted where a number is * found. */ private void convertInstance(Instance instance) throws Exception { double[] instVals = new double[instance.numAttributes()]; for (int i = 0; i < instance.numAttributes(); i++) { if ((!instance.attribute(i).isString()) || instance.isMissing(i)) { instVals[i] = instance.value(i); } else { String str = instance.stringValue(i); // if it is the document string only! if (i == 1) { str = filterNumbers(str); } int index = getOutputFormat().attribute(i).addStringValue(str); instVals[i] = (double) index; } } Instance inst = new Instance(instance.weight(), instVals); inst.setDataset(getOutputFormat()); push(inst); }
data.add(new Instance(1.0, newInst));
/** * Converts an instance by removing all non-alphanumeric characters from its * string attribute values. */ private void convertInstance(Instance instance) throws Exception { double[] instVals = new double[instance.numAttributes()]; for (int i = 0; i < instance.numAttributes(); i++) { if (!instance.attribute(i).isString() || instance.isMissing(i)) { instVals[i] = instance.value(i); } else { if (!m_SelectCols.isInRange(i)) { int index = getOutputFormat().attribute(i).addStringValue( instance.stringValue(i)); instVals[i] = (double) index; continue; } String text = instance.stringValue(i); String tokenizedText = tokenize(text); int index = getOutputFormat().attribute(i).addStringValue( tokenizedText); instVals[i] = (double) index; } } Instance inst = new Instance(instance.weight(), instVals); inst.setDataset(getOutputFormat()); push(inst); }