/** * Sets whether if the word frequencies for a document (instance) should be * normalized or not. * * @param normalize the new type. */ @OptionMetadata(displayName = "Normalize word frequencies", description = "Whether to normalize to average length of documents seen " + "during dictionary construction", commandLineParamName = "N", commandLineParamSynopsis = "-N", commandLineParamIsFlag = true, displayOrder = 9) public void setNormalizeDocLength(boolean normalize) { m_vectorizer.setNormalize(normalize); }
/** * Sets whether if the word frequencies for a document (instance) should be * normalized or not. * * @param normalize the new type. */ @OptionMetadata(displayName = "Normalize word frequencies", description = "Whether to normalize to average length of documents seen " + "during dictionary construction", commandLineParamName = "N", commandLineParamSynopsis = "-N", commandLineParamIsFlag = true, displayOrder = 9) public void setNormalizeDocLength(boolean normalize) { m_vectorizer.setNormalize(normalize); }
/** * Sets the format of the input instances. * * @param instanceInfo an Instances object containing the input instance * structure (any instances contained in the object are ignored - * only the structure is required). * @return true if the outputFormat may be collected immediately * @throws Exception if the input format can't be set successfully */ @Override public boolean setInputFormat(Instances instanceInfo) throws Exception { super.setInputFormat(instanceInfo); m_dictionaryBuilder.reset(); m_dictionaryBuilder.setSortDictionary(true); m_dictionaryBuilder.setNormalize(false); m_dictionaryBuilder.setup(instanceInfo); return false; }
/** * Sets the format of the input instances. * * @param instanceInfo an Instances object containing the input instance * structure (any instances contained in the object are ignored - * only the structure is required). * @return true if the outputFormat may be collected immediately * @throws Exception if the input format can't be set successfully */ @Override public boolean setInputFormat(Instances instanceInfo) throws Exception { super.setInputFormat(instanceInfo); m_dictionaryBuilder.reset(); m_dictionaryBuilder.setSortDictionary(true); m_dictionaryBuilder.setNormalize(false); m_dictionaryBuilder.setup(instanceInfo); return false; }
m_dictionaryBuilder.setNormalize(m_filterType != FILTER_NONE); Instances converted = m_dictionaryBuilder.vectorizeBatch( getInputFormat(), m_filterType != FILTER_NONE);
m_dictionaryBuilder.setNormalize(m_filterType != FILTER_NONE); Instances converted = m_dictionaryBuilder.vectorizeBatch( getInputFormat(), m_filterType != FILTER_NONE);
setNormalize(Utils.getFlag('N', options));
setNormalize(Utils.getFlag('N', options));
public void testSaveLoadDictionaryPlainTextNormalize() throws Exception { Instances data1 = getData1(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(2); builder.setNormalize(true); builder.setup(structure); for (int i = 0; i < data1.numInstances(); i++) { builder.processInstance(data1.instance(i)); } builder.finalizeDictionary(); StringWriter sw = new StringWriter(); builder.saveDictionary(sw); String dictText = sw.toString(); assertTrue(dictText.startsWith("@@@3.39036")); StringReader sr = new StringReader(dictText); DictionaryBuilder builder2 = new DictionaryBuilder(); builder2.setup(structure); builder2.loadDictionary(sr); // just returns the loaded dictionary Map<String, int[]> consolidated = builder2.finalizeDictionary(); assertEquals(2, consolidated.size()); }
public void testSaveLoadDictionaryPlainTextNormalize() throws Exception { Instances data1 = getData1(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(2); builder.setNormalize(true); builder.setup(structure); for (int i = 0; i < data1.numInstances(); i++) { builder.processInstance(data1.instance(i)); } builder.finalizeDictionary(); StringWriter sw = new StringWriter(); builder.saveDictionary(sw); String dictText = sw.toString(); assertTrue(dictText.startsWith("@@@3.39036")); StringReader sr = new StringReader(dictText); DictionaryBuilder builder2 = new DictionaryBuilder(); builder2.setup(structure); builder2.loadDictionary(sr); // just returns the loaded dictionary Map<String, int[]> consolidated = builder2.finalizeDictionary(); assertEquals(2, consolidated.size()); }