public FTBDataset() { super(); //Need to use a MemoryTreebank so that we can compute gross corpus //stats for MWE pre-processing // The treebank may be reset if setOptions changes CC_TAGSET treebank = new MemoryTreebank(new FrenchXMLTreeReaderFactory(CC_TAGSET), FrenchTreebankLanguagePack.FTB_ENCODING); treeFileExtension = "xml"; }
public TreeReaderFactory treeReaderFactory() { return (readPennFormat) ? new FrenchTreeReaderFactory() : new FrenchXMLTreeReaderFactory(false); }
static Map<String, Tree> readTrees(String[] filenames) throws IOException { // TODO: perhaps we can just pass in CC_TAGSET and get rid of replacePOSTags // need to test that final TreeReaderFactory trf = new FrenchXMLTreeReaderFactory(false); Map<String, Tree> treeMap = Generics.newHashMap(); for (String filename : filenames) { File file = new File(filename); String canonicalFilename = file.getName().substring(0, file.getName().lastIndexOf('.')); FrenchXMLTreeReader tr = (FrenchXMLTreeReader) trf.newTreeReader(new BufferedReader (new InputStreamReader (new FileInputStream(file),"ISO8859_1"))); Tree t = null; int numTrees; for (numTrees = 0; (t = tr.readTree()) != null; numTrees++) { String id = canonicalFilename + "-" + ((CoreLabel) t.label()).get(CoreAnnotations.SentenceIDAnnotation.class); treeMap.put(id, t); } tr.close(); System.err.printf("%s: %d trees%n", file.getName(), numTrees); } return treeMap; }
for (String arg : args) fileList.add(new File(arg)); TreeReaderFactory trf = new FrenchXMLTreeReaderFactory(false); int totalTrees = 0; Set<String> morphAnalyses = Generics.newHashSet();
@Override public boolean setOptions(Properties opts) { boolean ret = super.setOptions(opts); if (opts.containsKey(ConfigParser.paramSplit)) { String splitFileName = opts.getProperty(ConfigParser.paramSplit); splitSet = makeSplitSet(splitFileName); } CC_TAGSET = PropertiesUtils.getBool(opts, ConfigParser.paramCCTagset, false); treebank = new MemoryTreebank(new FrenchXMLTreeReaderFactory(CC_TAGSET), FrenchTreebankLanguagePack.FTB_ENCODING); if(lexMapper == null) { lexMapper = new DefaultMapper(); lexMapper.setup(null, lexMapOptions.split(",")); } if(pathsToMappings.size() != 0) { if(posMapper == null) posMapper = new DefaultMapper(); for(File path : pathsToMappings) posMapper.setup(path); } return ret; }
public FTBDataset() { super(); //Need to use a MemoryTreebank so that we can compute gross corpus //stats for MWE pre-processing // The treebank may be reset if setOptions changes CC_TAGSET treebank = new MemoryTreebank(new FrenchXMLTreeReaderFactory(CC_TAGSET), FrenchTreebankLanguagePack.FTB_ENCODING); treeFileExtension = "xml"; }
public TreeReaderFactory treeReaderFactory() { return (readPennFormat) ? new FrenchTreeReaderFactory() : new FrenchXMLTreeReaderFactory(false); }
public TreeReaderFactory treeReaderFactory() { return (readPennFormat) ? new FrenchTreeReaderFactory() : new FrenchXMLTreeReaderFactory(false); }
public TreeReaderFactory treeReaderFactory() { return (readPennFormat) ? new FrenchTreeReaderFactory() : new FrenchXMLTreeReaderFactory(false); }
for (String arg : args) fileList.add(new File(arg)); TreeReaderFactory trf = new FrenchXMLTreeReaderFactory(false); int totalTrees = 0; Set<String> morphAnalyses = Generics.newHashSet();
for (String arg : args) fileList.add(new File(arg)); TreeReaderFactory trf = new FrenchXMLTreeReaderFactory(false); int totalTrees = 0; Set<String> morphAnalyses = Generics.newHashSet();
fileList.add(new File(args[i])); TreeReaderFactory trf = new FrenchXMLTreeReaderFactory(false); int totalTrees = 0; Set<String> morphAnalyses = Generics.newHashSet();
@Override public boolean setOptions(Properties opts) { boolean ret = super.setOptions(opts); if (opts.containsKey(ConfigParser.paramSplit)) { String splitFileName = opts.getProperty(ConfigParser.paramSplit); splitSet = makeSplitSet(splitFileName); } CC_TAGSET = PropertiesUtils.getBool(opts, ConfigParser.paramCCTagset, false); treebank = new MemoryTreebank(new FrenchXMLTreeReaderFactory(CC_TAGSET), FrenchTreebankLanguagePack.FTB_ENCODING); if(lexMapper == null) { lexMapper = new DefaultMapper(); lexMapper.setup(null, lexMapOptions.split(",")); } if(pathsToMappings.size() != 0) { if(posMapper == null) posMapper = new DefaultMapper(); for(File path : pathsToMappings) posMapper.setup(path); } return ret; }