@Override public TokenStream create(final TokenStream input) { return new URINormalisationFilter(input); }
/** * Given the type of URI normalisation, apply the right sequence of operations * and filters to the token stream. */ private TokenStream applyURINormalisation(TokenStream in) { switch (normalisationType) { case NONE: return new URITrailingSlashFilter(in); // here, trailing slash filter is after localname filtering, in order to // avoid filtering subdirectory instead of localname case LOCALNAME: in = new URILocalnameFilter(in); return new URITrailingSlashFilter(in); // here, trailing slash filter is before localname filtering, in order to // avoid trailing slash checking on every tokens generated by the // URI normalisation filter case FULL: in = new URITrailingSlashFilter(in); return new URINormalisationFilter(in); default: throw new EnumConstantNotPresentException(URINormalisation.class, normalisationType.toString()); } }
public void assertNormalisesTo(final Tokenizer t, final String input, final String[] expectedImages, final String[] expectedTypes) throws Exception { assertTrue("has CharTermAttribute", t.hasAttribute(CharTermAttribute.class)); final CharTermAttribute termAtt = t.getAttribute(CharTermAttribute.class); TypeAttribute typeAtt = null; if (expectedTypes != null) { assertTrue("has TypeAttribute", t.hasAttribute(TypeAttribute.class)); typeAtt = t.getAttribute(TypeAttribute.class); } t.setReader(new StringReader(input)); t.reset(); final TokenStream filter = new URINormalisationFilter(t); for (int i = 0; i < expectedImages.length; i++) { assertTrue("token "+i+" exists", filter.incrementToken()); assertEquals(expectedImages[i], termAtt.toString()); if (expectedTypes != null) { assertEquals(expectedTypes[i], typeAtt.type()); } } assertFalse("end of stream", filter.incrementToken()); filter.end(); filter.close(); }