public TestCRFPipe(String trainingFilename) throws IOException { ArrayList<Pipe> pipes = new ArrayList<Pipe>(); PrintWriter out = new PrintWriter("test.out"); int[][] conjunctions = new int[3][]; conjunctions[0] = new int[] { -1 }; conjunctions[1] = new int[] { 1 }; conjunctions[2] = new int[] { -2, -1 }; pipes.add(new SimpleTaggerSentence2TokenSequence()); //pipes.add(new FeaturesInWindow("PREV-", -1, 1)); //pipes.add(new FeaturesInWindow("NEXT-", 1, 2)); pipes.add(new OffsetConjunctions(conjunctions)); pipes.add(new TokenTextCharSuffix("C1=", 1)); pipes.add(new TokenTextCharSuffix("C2=", 2)); pipes.add(new TokenTextCharSuffix("C3=", 3)); pipes.add(new RegexMatches("CAPITALIZED", Pattern.compile("^\\p{Lu}.*"))); pipes.add(new RegexMatches("STARTSNUMBER", Pattern.compile("^[0-9].*"))); pipes.add(new RegexMatches("HYPHENATED", Pattern.compile(".*\\-.*"))); pipes.add(new RegexMatches("DOLLARSIGN", Pattern.compile("\\$.*"))); pipes.add(new TokenFirstPosition("FIRSTTOKEN")); pipes.add(new TokenSequence2FeatureVectorSequence()); pipes.add(new SequencePrintingPipe(out)); Pipe pipe = new SerialPipes(pipes); InstanceList trainingInstances = new InstanceList(pipe); trainingInstances.addThruPipe(new LineGroupIterator(new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(trainingFilename)))), Pattern.compile("^\\s*$"), true)); out.close(); }
if (iters.length == currIndex) { // base case: add feature for current conjunction of iters if (redundant (conjunctions, j, iterIndices)) { return newfs; iters[currIndex].next(); iterIndices[currIndex]++; newfs = makeConjunctions (iters, currIndex+1, conjunctions, j, tsSize, newfs, tsi, oldfs, iterIndices); iters[currIndex] = getOffsetIter (conjunctions, j, currIndex, tsSize, tsi, oldfs); iterIndices[currIndex] = -1;
private void readObject (ObjectInputStream in) throws IOException, ClassNotFoundException { int size1, size2; int version = in.readInt (); size1 = in.readInt(); // Deserialization doesn't call the unnamed class initializer, so do it here if (startfs[0] == null) initStartEndFs (); if (size1 == NULL_INTEGER) { conjunctions = null; } else { conjunctions = new int[size1][]; for (int i = 0; i < size1; i++) { size2 = in.readInt(); if (size2 == NULL_INTEGER) { conjunctions[i] = null; } else { conjunctions[i] = new int[size2]; for (int j = 0; j < size2; j++) { conjunctions[i][j] = in.readInt(); } } } } includeOriginalSingletons = in.readBoolean(); featureRegex = (Pattern) in.readObject();//add by fuchun } }
for (int j = 0; j < conjunctions.length; j++) { PropertyList.Iterator[] iters = getOffsetIters (conjunctions, j, tsSize, i, oldfs); if (iters == null) continue; for (int ii=0; ii < iterIndices.length; ii++) iterIndices[ii] = -1; newfs[i] = makeConjunctions (iters, 0, conjunctions, j, tsSize, newfs[i], i, oldfs, iterIndices);
/** Get iterators for each token in this offset */ private PropertyList.Iterator[] getOffsetIters (int [][] conjunctions, int j, int tsSize, int tsi, PropertyList[] oldfs) { PropertyList.Iterator[] iters = new PropertyList.Iterator[conjunctions[j].length]; // get iterators for offsets for (int iteri=0; iteri < iters.length; iteri++) { iters[iteri] = getOffsetIter (conjunctions, j, iteri, tsSize, tsi, oldfs); if (iters[iteri]==null) return null; } return iters; }
for (int j = 0; j < conjunctions.length; j++) { PropertyList.Iterator[] iters = getOffsetIters (conjunctions, j, tsSize, i, oldfs); if (iters == null) continue; for (int ii=0; ii < iterIndices.length; ii++) iterIndices[ii] = -1; newfs[i] = makeConjunctions (iters, 0, conjunctions, j, tsSize, newfs[i], i, oldfs, iterIndices);
/** Get iterators for each token in this offset */ private PropertyList.Iterator[] getOffsetIters (int [][] conjunctions, int j, int tsSize, int tsi, PropertyList[] oldfs) { PropertyList.Iterator[] iters = new PropertyList.Iterator[conjunctions[j].length]; // get iterators for offsets for (int iteri=0; iteri < iters.length; iteri++) { iters[iteri] = getOffsetIter (conjunctions, j, iteri, tsSize, tsi, oldfs); if (iters[iteri]==null) return null; } return iters; }
public TestCRFPipe(String trainingFilename) throws IOException { ArrayList<Pipe> pipes = new ArrayList<Pipe>(); PrintWriter out = new PrintWriter("test.out"); int[][] conjunctions = new int[3][]; conjunctions[0] = new int[] { -1 }; conjunctions[1] = new int[] { 1 }; conjunctions[2] = new int[] { -2, -1 }; pipes.add(new SimpleTaggerSentence2TokenSequence()); //pipes.add(new FeaturesInWindow("PREV-", -1, 1)); //pipes.add(new FeaturesInWindow("NEXT-", 1, 2)); pipes.add(new OffsetConjunctions(conjunctions)); pipes.add(new TokenTextCharSuffix("C1=", 1)); pipes.add(new TokenTextCharSuffix("C2=", 2)); pipes.add(new TokenTextCharSuffix("C3=", 3)); pipes.add(new RegexMatches("CAPITALIZED", Pattern.compile("^\\p{Lu}.*"))); pipes.add(new RegexMatches("STARTSNUMBER", Pattern.compile("^[0-9].*"))); pipes.add(new RegexMatches("HYPHENATED", Pattern.compile(".*\\-.*"))); pipes.add(new RegexMatches("DOLLARSIGN", Pattern.compile("\\$.*"))); pipes.add(new TokenFirstPosition("FIRSTTOKEN")); pipes.add(new TokenSequence2FeatureVectorSequence()); pipes.add(new SequencePrintingPipe(out)); Pipe pipe = new SerialPipes(pipes); InstanceList trainingInstances = new InstanceList(pipe); trainingInstances.addThruPipe(new LineGroupIterator(new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(trainingFilename)))), Pattern.compile("^\\s*$"), true)); out.close(); }
if (iters.length == currIndex) { // base case: add feature for current conjunction of iters if (redundant (conjunctions, j, iterIndices)) { return newfs; iters[currIndex].next(); iterIndices[currIndex]++; newfs = makeConjunctions (iters, currIndex+1, conjunctions, j, tsSize, newfs, tsi, oldfs, iterIndices); iters[currIndex] = getOffsetIter (conjunctions, j, currIndex, tsSize, tsi, oldfs); iterIndices[currIndex] = -1;
for (int j = 0; j < conjunctions.length; j++) { PropertyList.Iterator[] iters = getOffsetIters (conjunctions, j, tsSize, i, oldfs); if (iters == null) continue; for (int ii=0; ii < iterIndices.length; ii++) iterIndices[ii] = -1; newfs[i] = makeConjunctions (iters, 0, conjunctions, j, tsSize, newfs[i], i, oldfs, iterIndices);
/** Get iterators for each token in this offset */ private PropertyList.Iterator[] getOffsetIters (int [][] conjunctions, int j, int tsSize, int tsi, PropertyList[] oldfs) { PropertyList.Iterator[] iters = new PropertyList.Iterator[conjunctions[j].length]; // get iterators for offsets for (int iteri=0; iteri < iters.length; iteri++) { iters[iteri] = getOffsetIter (conjunctions, j, iteri, tsSize, tsi, oldfs); if (iters[iteri]==null) return null; } return iters; }
private void readObject (ObjectInputStream in) throws IOException, ClassNotFoundException { int size1, size2; int version = in.readInt (); size1 = in.readInt(); // Deserialization doesn't call the unnamed class initializer, so do it here if (startfs[0] == null) initStartEndFs (); if (size1 == NULL_INTEGER) { conjunctions = null; } else { conjunctions = new int[size1][]; for (int i = 0; i < size1; i++) { size2 = in.readInt(); if (size2 == NULL_INTEGER) { conjunctions[i] = null; } else { conjunctions[i] = new int[size2]; for (int j = 0; j < size2; j++) { conjunctions[i][j] = in.readInt(); } } } } includeOriginalSingletons = in.readBoolean(); featureRegex = (Pattern) in.readObject();//add by fuchun } }
public TestCRFPipe(String trainingFilename) throws IOException { ArrayList<Pipe> pipes = new ArrayList<Pipe>(); PrintWriter out = new PrintWriter("test.out"); int[][] conjunctions = new int[3][]; conjunctions[0] = new int[] { -1 }; conjunctions[1] = new int[] { 1 }; conjunctions[2] = new int[] { -2, -1 }; pipes.add(new SimpleTaggerSentence2TokenSequence()); //pipes.add(new FeaturesInWindow("PREV-", -1, 1)); //pipes.add(new FeaturesInWindow("NEXT-", 1, 2)); pipes.add(new OffsetConjunctions(conjunctions)); pipes.add(new TokenTextCharSuffix("C1=", 1)); pipes.add(new TokenTextCharSuffix("C2=", 2)); pipes.add(new TokenTextCharSuffix("C3=", 3)); pipes.add(new RegexMatches("CAPITALIZED", Pattern.compile("^\\p{Lu}.*"))); pipes.add(new RegexMatches("STARTSNUMBER", Pattern.compile("^[0-9].*"))); pipes.add(new RegexMatches("HYPHENATED", Pattern.compile(".*\\-.*"))); pipes.add(new RegexMatches("DOLLARSIGN", Pattern.compile("\\$.*"))); pipes.add(new TokenFirstPosition("FIRSTTOKEN")); pipes.add(new TokenSequence2FeatureVectorSequence()); pipes.add(new SequencePrintingPipe(out)); Pipe pipe = new SerialPipes(pipes); InstanceList trainingInstances = new InstanceList(pipe); trainingInstances.addThruPipe(new LineGroupIterator(new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(trainingFilename)))), Pattern.compile("^\\s*$"), true)); out.close(); }
if (iters.length == currIndex) { // base case: add feature for current conjunction of iters if (redundant (conjunctions, j, iterIndices)) { return newfs; iters[currIndex].next(); iterIndices[currIndex]++; newfs = makeConjunctions (iters, currIndex+1, conjunctions, j, tsSize, newfs, tsi, oldfs, iterIndices); iters[currIndex] = getOffsetIter (conjunctions, j, currIndex, tsSize, tsi, oldfs); iterIndices[currIndex] = -1;
private void readObject (ObjectInputStream in) throws IOException, ClassNotFoundException { int size1, size2; int version = in.readInt (); size1 = in.readInt(); // Deserialization doesn't call the unnamed class initializer, so do it here if (startfs[0] == null) initStartEndFs (); if (size1 == NULL_INTEGER) { conjunctions = null; } else { conjunctions = new int[size1][]; for (int i = 0; i < size1; i++) { size2 = in.readInt(); if (size2 == NULL_INTEGER) { conjunctions[i] = null; } else { conjunctions[i] = new int[size2]; for (int j = 0; j < size2; j++) { conjunctions[i][j] = in.readInt(); } } } } includeOriginalSingletons = in.readBoolean(); featureRegex = (Pattern) in.readObject();//add by fuchun } }
pipes.add(new OffsetConjunctions(conjunctions));
pipes.add(new OffsetConjunctions(conjunctions));
new OffsetConjunctions(new int[][] { { -1 }, { 1 } }),
pipes.add(new OffsetConjunctions(conjunctions));
pipes.add(new RegexMatches("EXCLAMATION_QUESTION_MARK_REGEX", Pattern .compile(TextUtil.EXCLAMATION_QUESTION_MARK_REGEX))); pipes.add(new OffsetConjunctions(new int[][] { { -1 }, { 1 } })); pipes.add(new TokenSequence2FeatureVectorSequence(targetAlphabet)); SerialPipes serialPipes = new SerialPipes(pipes);