public boolean start(BoilerpipeHTMLContentHandler instance, final String localName, final String qName, final Attributes atts) throws SAXException { if (instance.inAnchor++ > 0) { // as nested A elements are not allowed per specification, we // are probably reaching this branch due to a bug in the XML // parser System.err.println("Warning: SAX input contains nested A elements -- You have probably hit a bug in your HTML parser (e.g., NekoHTML bug #2909310). Please clean the HTML externally and feed it to boilerpipe again. Trying to recover somehow..."); end(instance, localName, qName); } if (instance.inIgnorableElement == 0) { instance.addWhitespaceIfNecessary(); instance.tokenBuffer .append(BoilerpipeHTMLContentHandler.ANCHOR_TEXT_START); instance.tokenBuffer.append(' '); instance.sbLastWasWhitespace = true; } return false; }
public boolean start(BoilerpipeHTMLContentHandler instance, final String localName, final String qName, final Attributes atts) throws SAXException { if (instance.inAnchor++ > 0) { // as nested A elements are not allowed per specification, we // are probably reaching this branch due to a bug in the XML // parser System.err.println("Warning: SAX input contains nested A elements -- You have probably hit a bug in your HTML parser (e.g., NekoHTML bug #2909310). Please clean the HTML externally and feed it to boilerpipe again. Trying to recover somehow..."); end(instance, localName, qName); } if (instance.inIgnorableElement == 0) { instance.addWhitespaceIfNecessary(); instance.tokenBuffer .append(BoilerpipeHTMLContentHandler.ANCHOR_TEXT_START); instance.tokenBuffer.append(' '); instance.sbLastWasWhitespace = true; } return false; }
public boolean start(BoilerpipeHTMLContentHandler instance, final String localName, final String qName, final Attributes atts) throws SAXException { if (instance.inAnchor++ > 0) { // as nested A elements are not allowed per specification, we // are probably reaching this branch due to a bug in the XML // parser System.err.println("Warning: SAX input contains nested A elements -- You have probably hit a bug in your HTML parser (e.g., NekoHTML bug #2909310). Please clean the HTML externally and feed it to boilerpipe again. Trying to recover somehow..."); end(instance, localName, qName); } if (instance.inIgnorableElement == 0) { instance.addWhitespaceIfNecessary(); instance.tokenBuffer .append(BoilerpipeHTMLContentHandler.ANCHOR_TEXT_START); instance.tokenBuffer.append(' '); instance.sbLastWasWhitespace = true; } return false; }