@Override public Iterator<StringPair> iterator() { return new TabReader(); }
@Override public Iterator<StringPair> iterator() { return new TabReader(); }
@Test public void testReader() throws Exception { // Create a simple WordFast text memory. File input = File.createTempFile("input", ".txt"); input.deleteOnExit(); // Populate the file with data. try (PrintWriter writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(input), "UTF-8"))) { writer.println("%20100801~111517\t%UserID,AHLJat,AHLJat\t%TU=00008580\t%EN-US\t%Wordfast TM v.546/00\t%PL-PL\t%\t."); writer.println("20100727~145333\tAHLJat\t2\tEN-US\tObjection:\tPL-PL\tZarzut: "); writer.println("20100727~051350\tAHLJat\t2\tEN-US\tWhy not?&tA;\tPL-PL\tDlaczego nie?&tA; "); } WordFastTMReader reader = new WordFastTMReader(input.getAbsolutePath(), "UTF-8"); int i = 1; for (StringPair srcAndTrg : reader) { assertTrue(srcAndTrg.getSource() != null); assertTrue(srcAndTrg.getTarget() != null); if (i == 1) { assertEquals("Objection:", srcAndTrg.getSource()); } else if (i == 2) { assertEquals("Why not?&tA;", srcAndTrg.getSource()); } i++; } } }
@Test public void testReader() throws Exception { // Create a simple plain text file. File input = File.createTempFile("input", "txt"); input.deleteOnExit(); // Populate the file with data. PrintWriter writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(input), "UTF-8")); writer.println("This is not actual.\tTo nie jest aktualne."); writer.println("Test\tTest"); writer.println("ab\tVery strange data indeed, much longer than input"); writer.close(); TabBitextReader reader = new TabBitextReader(input.getAbsolutePath(), "UTF-8"); int i = 1; for (StringPair srcAndTrg : reader) { assertTrue(srcAndTrg.getSource() != null); assertTrue(srcAndTrg.getTarget() != null); if (i == 1) { assertEquals("This is not actual.", srcAndTrg.getSource()); } else if (i == 2) { assertEquals("Test", srcAndTrg.getSource()); } else if (i == 3) { assertEquals("Very strange data indeed, much longer than input", srcAndTrg.getTarget()); } i++; } } }
@Nullable @Override public final StringPair tab2StringPair(String line) { if (line == null) { return null; } String[] fields = line.split("\t"); sentencePos = fields[4].length() + 1; return new StringPair(fields[4], fields[6]); }
@Override public StringPair next() { try { StringPair result = nextPair; sentencePos = nextPair.getSource().length() + 1; if (nextLine != null) { prevLine = nextLine; nextLine = in.readLine(); nextPair = tab2StringPair(nextLine); lineCount++; if (nextLine == null) { in.close(); } } return result; } catch (IOException e) { throw new RuntimeException(e); } }
public WordFastTMReader(String filename, String encoding) throws IOException { super(filename, encoding); //skip the header (first line) if (nextLine != null) { nextLine = in.readLine(); nextPair = tab2StringPair(nextLine); } }
/** * @param encoding input encoding or {@code null} to use the platform default */ public TabBitextReader(String filename, String encoding) { try { if (encoding == null) { in = new BufferedReader(new InputStreamReader(new FileInputStream(filename))); } else { in = new BufferedReader(new InputStreamReader(new FileInputStream(filename), encoding)); } nextLine = in.readLine(); prevLine = ""; nextPair = tab2StringPair(nextLine); } catch (IOException e) { throw new RuntimeException(e); } }
List<StringPair> goodSentences = rule.getCorrectBitextExamples(); for (StringPair goodSentence : goodSentences) { assertTrue("Got good sentence: '" + goodSentence.getSource() + "'", cleanSentence(goodSentence.getSource()).trim().length() > 0); assertTrue("Got good sentence: '" + goodSentence.getTarget() + "'", cleanSentence(goodSentence.getTarget()).trim().length() > 0); assertFalse(lang + ": Did not expect error in: " + goodSentence + " (Rule: " + rule + ")", match(rule, goodSentence.getSource(), goodSentence.getTarget(), srcTool, languageTool)); String origBadSrcSentence = example.getSource().replaceAll("[\\n\\t]+", ""); String origBadTrgSentence = example.getTarget().replaceAll("[\\n\\t]+", ""); List<String> suggestedCorrection = origBadExample.getCorrections(); int expectedSrcMatchStart = origBadSrcSentence.indexOf("<marker>");
@Nullable protected StringPair tab2StringPair(String line) { if (line == null) { return null; } String[] fields = line.split("\t"); if (fields.length < 2) { throw new RuntimeException("Unexpected format, expected two tab-separated columns: " + line); } return new StringPair(fields[0], fields[1]); }
@Override public StringPair next() { try { StringPair result = nextPair; if (nextLine != null) { nextLine = in.readLine(); nextPair = tab2StringPair(nextLine); if (nextLine == null) { in.close(); } } return result; } catch (IOException e) { throw new RuntimeException(e); } }
@Override public Iterator<StringPair> iterator() { return new TabReader(); }
@Override public Iterator<StringPair> iterator() { return new TabReader(); }
case EXAMPLE: if (inCorrectExample) { correctExamples.add(new StringPair(srcExample.getExample(), trgExample.getExample())); } else if (inIncorrectExample) { StringPair examplePair = new StringPair(srcExample.getExample(), trgExample.getExample()); if (trgExample.getCorrections().isEmpty()) { incorrectExamples.add(new IncorrectBitextExample(examplePair));
public WordFastTMReader(String filename, String encoding) throws IOException { super(filename, encoding); //skip the header (first line) if (nextLine != null) { nextLine = in.readLine(); nextPair = tab2StringPair(nextLine); } }
builderStringPairs.add(instance.new StringPair(a, b));
cache.get(new StringPair("AB", "AB")).addAndGet(123)
@Nullable @Override public final StringPair tab2StringPair(String line) { if (line == null) { return null; } String[] fields = line.split("\t"); sentencePos = fields[4].length() + 1; return new StringPair(fields[4], fields[6]); }