Refine search
@Override public Object getValue(int rowId, int col) { if (!isValueValid(rowId, col)) { return null; } Attribute att = instances.attribute(col); if (att.isNumeric()) { return instances.attributeToDoubleArray(col)[rowId]; } else if (att.isNominal() || att.isString()) { return instances.instance(rowId).stringValue(col); } else if (att.isDate()) { double dateValue = instances.attributeToDoubleArray(col)[rowId]; return att.formatDate(dateValue); } return null; }
protected void init(Instances structure) { List<SortRule> sortRules = new ArrayList<SortRule>(); if (m_sortDetails != null && m_sortDetails.length() > 0) { String[] sortParts = m_sortDetails.split("@@sort-rule@@"); for (String s : sortParts) { SortRule r = new SortRule(s.trim()); r.init(m_env, structure); sortRules.add(r); } m_sortComparator = new SortComparator(sortRules); } // check for string attributes m_stringAttIndexes = new HashMap<String, Integer>(); for (int i = 0; i < structure.numAttributes(); i++) { if (structure.attribute(i).isString()) { m_stringAttIndexes.put(structure.attribute(i).name(), new Integer(i)); } } if (m_stringAttIndexes.size() == 0) { m_stringAttIndexes = null; } }
protected void init(Instances structure) { List<SortRule> sortRules = new ArrayList<SortRule>(); if (m_sortDetails != null && m_sortDetails.length() > 0) { String[] sortParts = m_sortDetails.split("@@sort-rule@@"); for (String s : sortParts) { SortRule r = new SortRule(s.trim()); r.init(m_env, structure); sortRules.add(r); } m_sortComparator = new SortComparator(sortRules); } // check for string attributes m_stringAttIndexes = new HashMap<String, Integer>(); for (int i = 0; i < structure.numAttributes(); i++) { if (structure.attribute(i).isString()) { m_stringAttIndexes.put(structure.attribute(i).name(), new Integer(i)); } } if (m_stringAttIndexes.size() == 0) { m_stringAttIndexes = null; } }
/** * Validates the input dataset * * @param data the input dataset * @throws InvalidInputDataException if validation is unsuccessful */ public void validate(Instances data) throws InvalidInputDataException { if (!getTextsLocation().isDirectory()) { throw new InvalidInputDataException("Directory not valid: " + getTextsLocation()); } if (!((data.attribute(0).isString() && data.classIndex() == 1) || (data.attribute(1).isString() && data.classIndex() == 0))) { throw new InvalidInputDataException( "An ARFF is required with a string attribute and a class attribute"); } }
/** * initializes the buffers for sparse instances to be read * * @see #m_ValueBuffer * @see #m_IndicesBuffer */ protected void initBuffers() { m_ValueBuffer = new double[m_Data.numAttributes()]; m_IndicesBuffer = new int[m_Data.numAttributes()]; m_stringAttIndices = new ArrayList<Integer>(); if (m_Data.checkForStringAttributes()) { for (int i = 0; i < m_Data.numAttributes(); i++) { if (m_Data.attribute(i).isString()) { m_stringAttIndices.add(i); } } } }
@Override public void validate(Instances data) throws InvalidInputDataException { if (!((data.attribute(0).isString() && data.classIndex() == 1) || (data.attribute(1).isString() && data.classIndex() == 0))) { throw new InvalidInputDataException( "An ARFF is required with a string attribute and a class attribute"); } if (data.numAttributes() != 2) { throw new InvalidInputDataException( "There must be exactly two attributes: 1) Text 2) Label. " + "The given data consists of " + data.numAttributes() + " attributes."); } }
/** * initializes the buffers for sparse instances to be read * * @see #m_ValueBuffer * @see #m_IndicesBuffer */ protected void initBuffers() { m_ValueBuffer = new double[m_Data.numAttributes()]; m_IndicesBuffer = new int[m_Data.numAttributes()]; m_stringAttIndices = new ArrayList<Integer>(); if (m_Data.checkForStringAttributes()) { for (int i = 0; i < m_Data.numAttributes(); i++) { if (m_Data.attribute(i).isString()) { m_stringAttIndices.add(i); } } } }
@Override public String graph() throws Exception { if (numberOfClusters() == 0) { return "Newick:(no,clusters)"; } int attIndex = m_instances.classIndex(); if (attIndex < 0) { // try find a string, or last attribute otherwise attIndex = 0; while (attIndex < m_instances.numAttributes() - 1) { if (m_instances.attribute(attIndex).isString()) { break; } attIndex++; } } String sNewick = null; if (m_instances.attribute(attIndex).isString()) { sNewick = m_clusters[0].toString(attIndex); } else { sNewick = m_clusters[0].toString2(attIndex); } return "Newick:" + sNewick; }
@Override public String graph() throws Exception { if (numberOfClusters() == 0) { return "Newick:(no,clusters)"; } int attIndex = m_instances.classIndex(); if (attIndex < 0) { // try find a string, or last attribute otherwise attIndex = 0; while (attIndex < m_instances.numAttributes() - 1) { if (m_instances.attribute(attIndex).isString()) { break; } attIndex++; } } String sNewick = null; if (m_instances.attribute(attIndex).isString()) { sNewick = m_clusters[0].toString(attIndex); } else { sNewick = m_clusters[0].toString2(attIndex); } return "Newick:" + sNewick; }
/** * Tries to fetch a variable of an instance value</p> * * The same invariant of {@link VariableDeclarations} applies here too. * * @param name name of the variable * @return node representing the instance value */ @Override public Node getVariable(String name) { int index = getIndex(name); if (index < 0 || index >= dataset.numAttributes()) throw new RuntimeException("Variable '" + name + "' undefined!"); if (dataset.attribute(index).isNumeric()) return new DoubleValue(index); if (dataset.attribute(index).isString() || dataset.attribute(index).isNominal()) return new StringValue(index); throw new RuntimeException("Attributes of type '" + dataset.attribute(index).toString() + "' not supported!"); }
/** * Tries to fetch a variable of an instance value</p> * * The same invariant of {@link VariableDeclarations} applies here too. * * @param name name of the variable * @return node representing the instance value */ @Override public Node getVariable(String name) { int index = getIndex(name); if (index < 0 || index >= dataset.numAttributes()) throw new RuntimeException("Variable '" + name + "' undefined!"); if (dataset.attribute(index).isNumeric()) return new DoubleValue(index); if (dataset.attribute(index).isString() || dataset.attribute(index).isNominal()) return new StringValue(index); throw new RuntimeException("Attributes of type '" + dataset.attribute(index).toString() + "' not supported!"); }
/** * Validates the input dataset * * @param data the input dataset * @throws InvalidInputDataException if validation is unsuccessful */ public void validate(Instances data) throws InvalidInputDataException { Environment env = Environment.getSystemWide(); String resolved = getImagesLocation().toString(); try { resolved = env.substitute(getImagesLocation().toString()); } catch (Exception ex) { // ignore } File imagesLoc = new File(resolved); if (!imagesLoc.isDirectory()) { throw new InvalidInputDataException("Directory not valid: " + resolved); } if (!(data.attribute(0).isString() && data.classIndex() == 1)) { throw new InvalidInputDataException( "An ARFF is required with a string attribute and a class attribute"); } }
@Override public void set(int rowId, int col, Object val) { if (!isValueValid(rowId, col)) { return; } Attribute att = instances.attribute(col); if (att.isNumeric()) { instances.instance(rowId).setValue(att, Double.parseDouble( val.toString())); return; } else if (att.isString() || att.isNominal()) { instances.instance(rowId).setValue(att, val.toString()); return; } else if (att.isDate()) { try { instances.instance(rowId).setValue(att, att.parseDate(val.toString())); } catch (ParseException e) { e.printStackTrace(); } return; } }
Attribute summary = headerWithSummary .attribute(CSVToARFFHeaderMapTask.ARFF_SUMMARY_ATTRIBUTE_PREFIX + headerNoSummary.attribute(i).name()); if (summary != null) { if (headerNoSummary.attribute(i).isNumeric()) { stats[i] = NumericStats.attributeToStats(summary); } else if (headerNoSummary.attribute(i).isString()) { stats[i] = StringStats.attributeToStats(summary); } else if (headerNoSummary.attribute(i).isNominal()) {
/** * Checks if the given instance is compatible with this dataset. Only looks at * the size of the instance and the ranges of the values for nominal and * string attributes. * * @param instance the instance to check * @return true if the instance is compatible with the dataset */ public/* @pure@ */boolean checkInstance(Instance instance) { if (instance.numAttributes() != numAttributes()) { return false; } for (int i = 0; i < numAttributes(); i++) { if (instance.isMissing(i)) { continue; } else if (attribute(i).isNominal() || attribute(i).isString()) { if (instance.value(i) != (int) instance.value(i)) { return false; } else if ((instance.value(i) < 0) || (instance.value(i) > attribute(i).numValues() - 1)) { return false; } } } return true; }
/** * Checks if the given instance is compatible with this dataset. Only looks at * the size of the instance and the ranges of the values for nominal and * string attributes. * * @param instance the instance to check * @return true if the instance is compatible with the dataset */ public/* @pure@ */boolean checkInstance(Instance instance) { if (instance.numAttributes() != numAttributes()) { return false; } for (int i = 0; i < numAttributes(); i++) { if (instance.isMissing(i)) { continue; } else if (attribute(i).isNominal() || attribute(i).isString()) { if (instance.value(i) != (int) instance.value(i)) { return false; } else if ((instance.value(i) < 0) || (instance.value(i) > attribute(i).numValues() - 1)) { return false; } } } return true; }
/** * Make an output instance given an input one * * @param inputI the input instance to process * @return the output instance with substrings replaced */ public Instance makeOutputInstance(Instance inputI) { double[] vals = new double[m_outputStructure.numAttributes()]; String[] stringVals = new String[m_outputStructure.numAttributes()]; for (int i = 0; i < inputI.numAttributes(); i++) { if (inputI.attribute(i).isString() && !inputI.isMissing(i)) { stringVals[i] = inputI.stringValue(i); } else { vals[i] = inputI.value(i); } } for (SubstringReplacerMatchRule mr : m_matchRules) { mr.apply(stringVals); } for (int i = 0; i < m_outputStructure.numAttributes(); i++) { if (m_outputStructure.attribute(i).isString() && stringVals[i] != null) { m_outputStructure.attribute(i).setStringValue(stringVals[i]); } } Instance result = new DenseInstance(inputI.weight(), vals); result.setDataset(m_outputStructure); return result; }