This block outputs:
Term: quick, posInc: 2, startOffset: 4, endOffset: 9, type: <ALPHANUM>
Term: fox, posInc: 1, startOffset: 10, endOffset: 13, type: <ALPHANUM>
Term: jumped, posInc: 1, startOffset: 14, endOffset: 20, type: <ALPHANUM>
Term: over, posInc: 1, startOffset: 21, endOffset: 25, type: <ALPHANUM>
Term: lazy, posInc: 2, startOffset: 30, endOffset: 34, type: <ALPHANUM>
Term: brown, posInc: 1, startOffset: 36, endOffset: 41, type: <ALPHANUM>
Term: dog, posInc: 1, startOffset: 42, endOffset: 45, type: <ALPHANUM>
Term: 867, posInc: 1, startOffset: 47, endOffset: 50, type: <NUM>
Term: 5309, posInc: 1, startOffset: 51, endOffset: 55, type: <NUM>
Term: マイクル, posInc: 1, startOffset: 57, endOffset: 61, type: <KATAKANA>
Term: と, posInc: 1, startOffset: 61, endOffset: 62, type: <HIRAGANA>
Term: 言, posInc: 1, startOffset: 62, endOffset: 63, type: <IDEOGRAPHIC>
Term: い, posInc: 1, startOffset: 63, endOffset: 64, type: <HIRAGANA>
Term: ま, posInc: 1, startOffset: 64, endOffset: 65, type: <HIRAGANA>
Term: す, posInc: 1, startOffset: 65, endOffset: 66, type: <HIRAGANA>
Term: ☺, posInc: 1, startOffset: 67, endOffset: 68, type: <EMOJI>
CharTermAttribute charTermAttribute = tokenStream.getAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncAttribute = tokenStream.getAttribute(PositionIncrementAttribute.class);
OffsetAttribute offsetAttribute = tokenStream.getAttribute(OffsetAttribute.class);
TypeAttribute typeAttribute = tokenStream.getAttribute(TypeAttribute.class);
tokenStream.reset();
while (tokenStream.incrementToken()) {
System.out.println("Term: " + new String(charTermAttribute.buffer(), 0, charTermAttribute.length()) +
", posInc: " + posIncAttribute.getPositionIncrement() +
", startOffset: " + offsetAttribute.startOffset() +
", endOffset: " + offsetAttribute.endOffset() +
", type: " + typeAttribute.type());
}
}
}
}
}