/** * @return A string id that, hopefully, uniquely identifies a certain subtree. This is useful, for example, for differentiating between assignments to variables, etc. * @note id is not enough, because ids are not guaranteed to be unique. */ public static <T extends Info,S extends AbstractNode<T,S>> String uniqueTreesId(List<S> parseTrees) { if (parseTrees==null) return null; List<String> ids = new ArrayList<String>(); for (S node: parseTrees) ids.add(AbstractNodeStringUtils.uniqueSubtreeId(node)); return StringUtil.join(ids, ". "); }
/** * @return an English sentence that is approximately equivalent to the given parse tree. * @note Currently handles only subject/object issues. */ @StandardSpecific(value = { "Stanford" }) public static <T extends Info,S extends AbstractNode<T,S>> String toEnglishSentence(S parseTree) { if (parseTree==null) throw new NullPointerException("parseTree is null"); List<String> words = new ArrayList<String> (); addWordsOfEnglishSentence(parseTree, words); String sentence = StringUtil.join(words, " "); sentence = sentence.toLowerCase(); sentence = sentence.replaceAll(" ([,.';:\\)])", "$1"); return sentence; }
/** * Convert a dependency parse tree with a single chain of nodes, to a string in DIRT-like format. * <p>Examples of unary paths: n<nsubj<v:convict:v , n<nsubj<v:use:v>dobj>n:keyboard:n * <p>Examples of binary paths: n<dobj<v:convict:v>nsubj>n , n<pobj<p:against:p<prep<n:charge:n<dobj<v:drop:v>nsubj>n * * @param parseTree the root of the parse tree. * @param maxChildCount the maximum number of children that this tree can have (1 for unary, 2 for binary). * @param parentToChildDirection [INPUT] if RIGHT_TO_LEFT, the first child (child 0) will be inserted to the left of the parent. Otherwise, it will be inserted to the right of the parent. * @param writeRootLemma [INPUT] if true, the root node will be written with both POS and lemma (e.g. v:convict:v). Otherwise, only POS will be written (v). * @param writeLeftLeafLemma [INPUT] if true, the left leaf node will be written with both POS and lemma (e.g. a:wise:a). Otherwise, only POS will be written (a). * @param writeRightLeafLemma [INPUT] if true, the right leaf node will be written with both POS and lemma (e.g. a:wise:a). Otherwise, only POS will be written (a). * @return a DIRT-like dependency path that represents the given parse tree. */ public static <T extends Info,S extends AbstractNode<T,S>> String toDependencyPath(S parseTree, int maxChildCount, Direction parentToChildDirection, boolean writeRootLemma, boolean writeLeftLeafLemma, boolean writeRightLeafLemma) { if (maxChildCount<1 || maxChildCount>2) //AS Why do you throw a runtime exception? throw new IllegalArgumentException("maxChildCount can only be 1 (unary) or 2 (binary)"); List<String> words = new ArrayList<String> (); AbstractNodeDependencyPathsUtils.addComponentsOfDependencyPath(parseTree, maxChildCount, parentToChildDirection, writeRootLemma, writeLeftLeafLemma, writeRightLeafLemma, words); return StringUtil.join(words, ""); }
/** * Run specific steps in the BIUTEE flow, according to specific values specified as a comma-separated list in parameter flowList: * <tt>lap_train, train, lap_test, test</tt>, or <tt>full</tt> for all steps.<BR> * @throws Throwable */ public static void runBiuteeCustomFlow(String configPath, String flowList) throws Throwable { // (Reminder: the first command-line parameter is the configuration file name). // Read the second command-line parameter. This might be something like "lap_train,train" Set<String> flow = new LinkedHashSet<String>(Arrays.asList(flowList.split(","))); if (flow.size()==0) { throw new BiuteeMainException("At least one flow step must be provided, got none."); } // Validate correctness of second command-line parameter Set<String> diff = new LinkedHashSet<String>(flow); diff.removeAll(ALLOWED_STEPS); if (diff.size() != 0) { throw new BiuteeMainException("Disallowed flow steps: " + StringUtil.join(diff, ",")); } if (flow.contains("full") && flow.size()!=1) { throw new BiuteeMainException("Flow step \"full\" must not be provided with other steps."); } boolean hasFull = flow.contains("full"); // Run the appropriate action, according to the second command-line parameter if (hasFull || flow.contains("lap_train")) doLAP(configPath, RTEPairsPreProcessor.TrainTestEnum.TRAIN.name()); if (hasFull || flow.contains("train")) doTraining(configPath); if (hasFull || flow.contains("lap_test")) doLAP(configPath, RTEPairsPreProcessor.TrainTestEnum.TEST.name()); if (hasFull || flow.contains("test")) doTesting(configPath); }