public static void setLevel1AggregationFeatures(Data data, boolean useGoldData) { logger.debug("Extracting features for level 2 inference"); for (int docid = 0; docid < data.documents.size(); docid++) { ArrayList<LinkedVector> sentences = data.documents.get(docid).sentences; for (LinkedVector twords : sentences) { for (int j = 0; j < twords.size(); j++) { setLevel1AggregationFeatures((NEWord) twords.get(j), useGoldData); } } } logger.debug("Done - Extracting features for level 2 inference"); }
public static void setLevel1AggregationFeatures(Data data, boolean useGoldData) { logger.debug("Extracting features for level 2 inference"); for (int docid = 0; docid < data.documents.size(); docid++) { ArrayList<LinkedVector> sentences = data.documents.get(docid).sentences; for (LinkedVector twords : sentences) { for (int j = 0; j < twords.size(); j++) { setLevel1AggregationFeatures((NEWord) twords.get(j), useGoldData); } } } logger.debug("Done - Extracting features for level 2 inference"); }
public static void setLevel1AggregationFeatures(Data data, boolean useGoldData) { logger.debug("Extracting features for level 2 inference"); for (int docid = 0; docid < data.documents.size(); docid++) { ArrayList<LinkedVector> sentences = data.documents.get(docid).sentences; for (LinkedVector twords : sentences) { for (int j = 0; j < twords.size(); j++) { setLevel1AggregationFeatures((NEWord) twords.get(j), useGoldData); } } } logger.debug("Done - Extracting features for level 2 inference"); }
public static void clearPredictions(Data data) { for (int docid = 0; docid < data.documents.size(); docid++) { ArrayList<LinkedVector> sentences = data.documents.get(docid).sentences; for (LinkedVector sentence : sentences) { for (int i = 0; i < sentence.size(); i++) { ((NEWord) sentence.get(i)).neTypeLevel1 = null; ((NEWord) sentence.get(i)).neTypeLevel2 = null; } } } } }
public static void clearPredictions(Data data) { for (int docid = 0; docid < data.documents.size(); docid++) { ArrayList<LinkedVector> sentences = data.documents.get(docid).sentences; for (LinkedVector sentence : sentences) { for (int i = 0; i < sentence.size(); i++) { ((NEWord) sentence.get(i)).neTypeLevel1 = null; ((NEWord) sentence.get(i)).neTypeLevel2 = null; } } } } }
public static void clearPredictions(Data data) { for (int docid = 0; docid < data.documents.size(); docid++) { ArrayList<LinkedVector> sentences = data.documents.get(docid).sentences; for (LinkedVector sentence : sentences) { for (int i = 0; i < sentence.size(); i++) { ((NEWord) sentence.get(i)).neTypeLevel1 = null; ((NEWord) sentence.get(i)).neTypeLevel2 = null; } } } } }
public static void write(Data data, String outFile) { OutFile out = new OutFile(outFile); for (int did = 0; did < data.documents.size(); did++) { for (int i = 0; i < data.documents.get(did).sentences.size(); i++) { StringBuilder buf = new StringBuilder(2000); for (int j = 0; j < data.documents.get(did).sentences.get(i).size(); j++) buf.append(((NEWord) data.documents.get(did).sentences.get(i).get(j)).form) .append(" "); out.println(buf.toString()); } } out.close(); }
public static void write(Data data, String outFile) { OutFile out = new OutFile(outFile); for (int did = 0; did < data.documents.size(); did++) { for (int i = 0; i < data.documents.get(did).sentences.size(); i++) { StringBuilder buf = new StringBuilder(2000); for (int j = 0; j < data.documents.get(did).sentences.get(i).size(); j++) buf.append(((NEWord) data.documents.get(did).sentences.get(i).get(j)).form) .append(" "); out.println(buf.toString()); } } out.close(); }
public static void write(Data data, String outFile) { OutFile out = new OutFile(outFile); for (int did = 0; did < data.documents.size(); did++) { for (int i = 0; i < data.documents.get(did).sentences.size(); i++) { StringBuilder buf = new StringBuilder(2000); for (int j = 0; j < data.documents.get(did).sentences.get(i).size(); j++) buf.append(((NEWord) data.documents.get(did).sentences.get(i).get(j)).form) .append(" "); out.println(buf.toString()); } } out.close(); }
public static void write(Vector<LinkedVector> data, String outFile) { OutFile out = new OutFile(outFile); for (int i = 0; i < data.size(); i++) { StringBuilder buf = new StringBuilder(2000); for (int j = 0; j < data.elementAt(i).size(); j++) buf.append(((NEWord) data.elementAt(i).get(j)).form).append(" "); out.println(buf.toString()); } out.close(); } }
public static boolean mixedCase(LinkedVector sentence) { if (lowercasedToNormalizedTokensMap == null) init(); boolean hasLowecaseLetters = false; boolean hasUppercaseLetters = false; for (int i = 0; i < sentence.size(); i++) { String s = ((NEWord) sentence.get(i)).originalForm; for (int j = 0; j < s.length(); j++) { if (Character.isLowerCase(s.charAt(j))) hasLowecaseLetters = true; if (Character.isUpperCase(s.charAt(j))) hasUppercaseLetters = true; } } return hasLowecaseLetters && hasUppercaseLetters; } }
public static void write(Vector<LinkedVector> data, String outFile) { OutFile out = new OutFile(outFile); for (int i = 0; i < data.size(); i++) { StringBuilder buf = new StringBuilder(2000); for (int j = 0; j < data.elementAt(i).size(); j++) buf.append(((NEWord) data.elementAt(i).get(j)).form).append(" "); out.println(buf.toString()); } out.close(); } }
public static void write(Vector<LinkedVector> data, String outFile) { OutFile out = new OutFile(outFile); for (int i = 0; i < data.size(); i++) { StringBuilder buf = new StringBuilder(2000); for (int j = 0; j < data.elementAt(i).size(); j++) buf.append(((NEWord) data.elementAt(i).get(j)).form).append(" "); out.println(buf.toString()); } out.close(); } }
public static String showSentenceVector(Vector<LinkedVector> sentences) { String display = ""; for (LinkedVector v : sentences) { for (int i = 0; i < v.size(); ++i) { NEWord s = (NEWord) (v.get(i)); display += (s.toString()); } } return display; }
public static String showSentenceVector(Vector<LinkedVector> sentences) { String display = ""; for (LinkedVector v : sentences) { for (int i = 0; i < v.size(); ++i) { NEWord s = (NEWord) (v.get(i)); display += (s.toString()); } } return display; }
public static String showSentenceVector(Vector<LinkedVector> sentences) { String display = ""; for (LinkedVector v : sentences) { for (int i = 0; i < v.size(); ++i) { NEWord s = (NEWord) (v.get(i)); display += (s.toString()); } } return display; }
private static void Bilou2Bio(Data data, LabelToLookAt labelType) { for (int docid = 0; docid < data.documents.size(); docid++) { ArrayList<LinkedVector> sentences = data.documents.get(docid).sentences; for (LinkedVector v : sentences) { for (int j = 0; j < v.size(); j++) { NEWord w = (NEWord) v.get(j); String label = w.getPrediction(labelType); if (!label.equalsIgnoreCase("O")) { if (w.getPrediction(labelType).startsWith("U-")) w.setPrediction("B-" + label.substring(2), labelType); if (w.getPrediction(labelType).startsWith("L-")) w.setPrediction("I-" + label.substring(2), labelType); } } } } }
private static void Bilou2Bio(Data data, LabelToLookAt labelType) { for (int docid = 0; docid < data.documents.size(); docid++) { ArrayList<LinkedVector> sentences = data.documents.get(docid).sentences; for (LinkedVector v : sentences) { for (int j = 0; j < v.size(); j++) { NEWord w = (NEWord) v.get(j); String label = w.getPrediction(labelType); if (!label.equalsIgnoreCase("O")) { if (w.getPrediction(labelType).startsWith("U-")) w.setPrediction("B-" + label.substring(2), labelType); if (w.getPrediction(labelType).startsWith("L-")) w.setPrediction("I-" + label.substring(2), labelType); } } } } }
private static void Bilou2Bio(Data data, LabelToLookAt labelType) { for (int docid = 0; docid < data.documents.size(); docid++) { ArrayList<LinkedVector> sentences = data.documents.get(docid).sentences; for (LinkedVector v : sentences) { for (int j = 0; j < v.size(); j++) { NEWord w = (NEWord) v.get(j); String label = w.getPrediction(labelType); if (!label.equalsIgnoreCase("O")) { if (w.getPrediction(labelType).startsWith("U-")) w.setPrediction("B-" + label.substring(2), labelType); if (w.getPrediction(labelType).startsWith("L-")) w.setPrediction("I-" + label.substring(2), labelType); } } } } }
private static Vector<NEWord> splitWord(NEWord word) { String[] sentence = {word.form + " "}; Parser parser = new WordSplitter(new SentenceSplitter(sentence)); LinkedVector words = (LinkedVector) parser.next(); Vector<NEWord> res = new Vector<>(); if (words == null) { res.add(word); return res; } String label = word.neLabel; for (int i = 0; i < words.size(); i++) { if (label.contains("B-") && i > 0) label = "I-" + label.substring(2); NEWord w = new NEWord(new Word(((Word) words.get(i)).form), null, label); res.addElement(w); } return res; }