@Override public void copyTo(AttributeImpl target) { PositionLengthAttribute t = (PositionLengthAttribute) target; t.setPositionLength(positionLength); }
@Override public void copyTo(AttributeImpl target) { if (target instanceof PackedTokenAttributeImpl) { final PackedTokenAttributeImpl to = (PackedTokenAttributeImpl) target; to.copyBuffer(buffer(), 0, length()); to.positionIncrement = positionIncrement; to.positionLength = positionLength; to.startOffset = startOffset; to.endOffset = endOffset; to.type = type; to.termFrequency = termFrequency; } else { super.copyTo(target); ((OffsetAttribute) target).setOffset(startOffset, endOffset); ((PositionIncrementAttribute) target).setPositionIncrement(positionIncrement); ((PositionLengthAttribute) target).setPositionLength(positionLength); ((TypeAttribute) target).setType(type); ((TermFrequencyAttribute) target).setTermFrequency(termFrequency); } }
/** * Flushes a bigram token to output from our buffer * This is the normal case, e.g. ABC -> AB BC */ private void flushBigram() { clearAttributes(); char termBuffer[] = termAtt.resizeBuffer(4); // maximum bigram length in code units (2 supplementaries) int len1 = Character.toChars(buffer[index], termBuffer, 0); int len2 = len1 + Character.toChars(buffer[index+1], termBuffer, len1); termAtt.setLength(len2); offsetAtt.setOffset(startOffset[index], endOffset[index+1]); typeAtt.setType(DOUBLE_TYPE); // when outputting unigrams, all bigrams are synonyms that span two unigrams if (outputUnigrams) { posIncAtt.setPositionIncrement(0); posLengthAtt.setPositionLength(2); } index++; }
/** * Constructs a compound token. */ private void gramToken() { buffer.append(termAttribute.buffer(), 0, termAttribute.length()); int endOffset = offsetAttribute.endOffset(); clearAttributes(); int length = buffer.length(); char termText[] = termAttribute.buffer(); if (length > termText.length) { termText = termAttribute.resizeBuffer(length); } buffer.getChars(0, length, termText, 0); termAttribute.setLength(length); posIncAttribute.setPositionIncrement(0); posLenAttribute.setPositionLength(2); // bigram offsetAttribute.setOffset(lastStartOffset, endOffset); typeAttribute.setType(GRAM_TYPE); buffer.setLength(0); } }
posLenAtt.setPositionLength(toInputNode.outputNode - outputFrom); lastOutputFrom = outputFrom; inputNode.nextOut++;
posIncAttribute.setPositionIncrement(1); posLengthAttribute.setPositionLength(1); posIncAttribute.setPositionIncrement(1); posLengthAttribute.setPositionLength(1);
posLenAtt.setPositionLength(builtGramSize); } else { posLenAtt.setPositionLength(Math.max(1, (builtGramSize - minShingleSize) + 1));
@Override public void copyTo(AttributeImpl target) { PositionLengthAttribute t = (PositionLengthAttribute) target; t.setPositionLength(positionLength); }
private void releaseBufferedToken() throws IOException { //System.out.println(" releaseBufferedToken"); BufferedOutputToken token = outputBuffer.pollFirst(); if (token.state != null) { // This is an original input token (keepOrig=true case): //System.out.println(" hasState"); restoreState(token.state); //System.out.println(" startOffset=" + offsetAtt.startOffset() + " endOffset=" + offsetAtt.endOffset()); } else { clearAttributes(); //System.out.println(" no state"); termAtt.append(token.term); // We better have a match already: assert matchStartOffset != -1; offsetAtt.setOffset(matchStartOffset, matchEndOffset); //System.out.println(" startOffset=" + matchStartOffset + " endOffset=" + matchEndOffset); typeAtt.setType(TYPE_SYNONYM); } //System.out.println(" lastNodeOut=" + lastNodeOut); //System.out.println(" term=" + termAtt); posIncrAtt.setPositionIncrement(token.startNode - lastNodeOut); lastNodeOut = token.startNode; posLenAtt.setPositionLength(token.endNode - token.startNode); }
@Override public void copyTo(AttributeImpl target) { PositionLengthAttribute t = (PositionLengthAttribute) target; t.setPositionLength(positionLength); }
@Override public void copyTo(AttributeImpl target) { PositionLengthAttribute t = (PositionLengthAttribute) target; t.setPositionLength(positionLength); } }
posLenAtt.setPositionLength(1); posIncrAtt.setPositionIncrement(1); typeAtt.setType("fingerprint");
offsetAttribute.setOffset(0, endOffset); typeAttribute.setType(MIN_HASH_TYPE); posLenAttribute.setPositionLength(1); return true; } else {
posLenAttribute.setPositionLength(endPos - startPos); wordPos = startPos; return true;
private void setAttribute(Morpheme morpheme) throws IOException { if (aUnitSize != 0) { posLengthAtt.setPositionLength(aUnitSize); aUnitSize = 0; } else if (oovSize != 0) { posLengthAtt.setPositionLength(oovSize); oovSize = 0; } else { posLengthAtt.setPositionLength(1); } posIncAtt.setPositionIncrement(1); setMorphemeAttributes(morpheme); }
termAtt.setLength(length); posIncAtt.setPositionIncrement(1); posLenAtt.setPositionLength(1); offsetAtt.setOffset(correctOffset(offset), correctOffset(offset + length)); ++gramSize;
private void setAUnitAttribute(Morpheme morpheme) throws IOException { posLengthAtt.setPositionLength(1); if (aUnitIterator.previousIndex() == 0) { posIncAtt.setPositionIncrement(0); } else { posIncAtt.setPositionIncrement(1); } setMorphemeAttributes(morpheme); }
private void setOOVAttribute(String str) throws IOException { offsetAtt.setOffset(baseOffset + oovBegin, baseOffset + oovBegin + 1); oovBegin += 1; posLengthAtt.setPositionLength(1); if (oovIterator.previousIndex() == 0) { posIncAtt.setPositionIncrement(0); } else { posIncAtt.setPositionIncrement(1); } setTermAttribute(str); }
posLenAtt.setPositionLength(outputs.getLastPosLength()); if (outputs.count == 0) {
private void setAttributes(Pos token) { posIncrAtt.setPositionIncrement(token.getPositionIncr()); posLenAtt.setPositionLength(token.getPositionLength()); offsetAtt.setOffset( correctOffset(token.getStartOffset()), correctOffset(token.getEndOffset())); charTermAtt.copyBuffer( token.getSurface().toCharArray(), 0, token.getSurfaceLength()); typeAtt.setType(token.getPosId().toString()); posAtt.setPartOfSpeech(token.getMophemes()); semanticClassAtt.setSemanticClass(token.getSemanticClass()); }