SloppyPhraseMatcher(PhraseQuery.PostingsAndFreq[] postings, int slop, float matchCost, boolean captureLeadMatch) { super(approximation(postings), matchCost); this.slop = slop; this.numPostings = postings.length; this.captureLeadMatch = captureLeadMatch; pq = new PhraseQueue(postings.length); phrasePositions = new PhrasePositions[postings.length]; for (int i = 0; i < postings.length; ++i) { phrasePositions[i] = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms); } }
/** move all PPs to their first position */ private void placeFirstPositions() throws IOException { for (PhrasePositions pp : phrasePositions) { pp.firstPosition(); } }
/** advance a PhrasePosition and update 'end', return false if exhausted */ private boolean advancePP(PhrasePositions pp) throws IOException { if (!pp.nextPosition()) { return false; } if (pp.position > end) { end = pp.position; } return true; }
protected final float phraseFreq() throws IOException { // sort list with pq for (PhrasePositions pp = first; pp != null; pp = pp.next) { pp.firstPosition(); pq.put(pp); // build pq from list } pqToList(); // rebuild list from pq int freq = 0; do { // find position w/ all terms while (first.position < last.position) { // scan forward in first do { if (!first.nextPosition()) return (float)freq; } while (first.position < last.position); firstToLast(); } freq++; // all equal: a match } while (last.nextPosition()); return (float)freq; } }
private boolean doNext() throws IOException { while (more) { while (more && first.doc < last.doc) { // find doc w/ all the terms more = first.skipTo(last.doc); // skip first upto last firstToLast(); // and move it to the end } if (more) { // found a doc with all of the terms freq = phraseFreq(); // check for phrase if (freq == 0.0f) // no match more = last.next(); // trigger further scanning else return true; // found a match } } return false; // no more matches }
private void init() throws IOException { for (PhrasePositions pp = first; more && pp != null; pp = pp.next) more = pp.next(); if(more) sort(); }
public boolean skipTo(int target) throws IOException { for (PhrasePositions pp = first; more && pp != null; pp = pp.next) { more = pp.skipTo(target); } if (more) sort(); // re-sort return doNext(); }
protected final float phraseFreq() throws IOException { // sort list with pq pq.clear(); for (PhrasePositions pp = first; pp != null; pp = pp.next) { pp.firstPosition(); pq.put(pp); // build pq from list } pqToList(); // rebuild list from pq // for counting how many times the exact phrase is found in current document, // just count how many times all PhrasePosition's have exactly the same position. int freq = 0; do { // find position w/ all terms while (first.position < last.position) { // scan forward in first do { if (!first.nextPosition()) return (float)freq; } while (first.position < last.position); firstToLast(); } freq++; // all equal: a match } while (last.nextPosition()); return (float)freq; } }
private boolean doNext() throws IOException { while (more) { while (more && first.doc < last.doc) { // find doc w/ all the terms more = first.skipTo(last.doc); // skip first upto last firstToLast(); // and move it to the end } if (more) { // found a doc with all of the terms freq = phraseFreq(); // check for phrase if (freq == 0.0f) // no match more = last.next(); // trigger further scanning else return true; // found a match } } return false; // no more matches }
private void init() throws IOException { for (PhrasePositions pp = first; more && pp != null; pp = pp.next) more = pp.next(); if(more) sort(); }
public boolean skipTo(int target) throws IOException { firstTime = false; for (PhrasePositions pp = first; more && pp != null; pp = pp.next) { more = pp.skipTo(target); } if (more) sort(); // re-sort return doNext(); }
protected final float phraseFreq() throws IOException { // sort list with pq pq.clear(); for (PhrasePositions pp = first; pp != null; pp = pp.next) { pp.firstPosition(); pq.put(pp); // build pq from list } pqToList(); // rebuild list from pq // for counting how many times the exact phrase is found in current document, // just count how many times all PhrasePosition's have exactly the same position. int freq = 0; do { // find position w/ all terms while (first.position < last.position) { // scan forward in first do { if (!first.nextPosition()) return (float)freq; } while (first.position < last.position); firstToLast(); } freq++; // all equal: a match } while (last.nextPosition()); return (float)freq; } }
final void firstPosition() throws IOException { count = postings.freq(); // read first pos nextPosition(); }
/** no repeats: simplest case, and most common. It is important to keep this piece of the code simple and efficient */ private void initSimple() throws IOException { //System.err.println("initSimple: doc: "+min.doc); pq.clear(); // position pps and build queue from list for (PhrasePositions pp : phrasePositions) { pp.firstPosition(); if (pp.position > end) { end = pp.position; } pq.add(pp); } }
SloppyPhraseMatcher(PhraseQuery.PostingsAndFreq[] postings, int slop, float matchCost, boolean captureLeadMatch) { super(approximation(postings), matchCost); this.slop = slop; this.numPostings = postings.length; this.captureLeadMatch = captureLeadMatch; pq = new PhraseQueue(postings.length); phrasePositions = new PhrasePositions[postings.length]; for (int i = 0; i < postings.length; ++i) { phrasePositions[i] = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms); } }
private boolean doNext() throws IOException { while (more) { while (more && first.doc < last.doc) { // find doc w/ all the terms more = first.skipTo(last.doc); // skip first upto last firstToLast(); // and move it to the end } if (more) { // found a doc with all of the terms freq = phraseFreq(); // check for phrase if (freq == 0.0f) // no match more = last.next(); // trigger further scanning else return true; // found a match } } return false; // no more matches }
private void init() throws IOException { for (PhrasePositions pp = first; more && pp != null; pp = pp.next) more = pp.next(); if(more) sort(); }
public boolean skipTo(int target) throws IOException { firstTime = false; for (PhrasePositions pp = first; more && pp != null; pp = pp.next) { more = pp.skipTo(target); } if (more) sort(); // re-sort return doNext(); }
pp.firstPosition(); if (pp.position > end) end = pp.position; pp.firstPosition(); PhrasePositions pp2; while ((pp2 = termPositionsDiffer(pp)) != null) { if (!pp2.nextPosition()) // out of pps that do not differ, advance the pp with higher offset return -1; // ran out of a term -- done
if (!rg[j].nextPosition()) { return false; // PPs exhausted