CleanerProperties props = new CleanerProperties(); props.setTranslateSpecialEntities(true); props.setTransResCharsToNCR(true); props.setOmitComments(true); TagNode tagNode = new HtmlCleaner(props).clean(new File("C:\\Users\\MyComputer\\Desktop\\aspose.html"));
public CleanHtmlFunction() { this.cleaner = new HtmlCleaner(); CleanerProperties p = cleaner.getProperties(); p.setOmitComments(true); p.setTranslateSpecialEntities(true); p.setTransResCharsToNCR(true); // remove all tags that contain uninteresting content p.setPruneTags("style,script,form,object,audio,video"); }
/** * Cleans the relevant file and generates a valid XML file ready for processing to Sel 2 java File. * * @param absoluteFilename - name of the file to convert. * @return String - location of the converted file. */ public String convertToXML(String absoluteFilename) throws Exception { FileHandler fromSelIDE = new FileHandler(absoluteFilename); FileHandler toXML = new FileHandler(System.getProperty("java.io.tmpdir") + File.separator + fromSelIDE.getFileName() + ".xml", true); if (fromSelIDE.getFile().isDirectory()) { LOGGER.error("Cannot convert directory {} into a Selenium Test!", fromSelIDE.getFileName()); return null; } //Clean up html so that we can read it as XML properly HtmlCleaner cleaner = new HtmlCleaner(); CleanerProperties XMLPrefs = cleaner.getProperties(); XMLPrefs.setUseEmptyElementTags(true); XMLPrefs.setTranslateSpecialEntities(true); XMLPrefs.setTransResCharsToNCR(true); XMLPrefs.setOmitComments(true); XMLPrefs.setOmitComments(true); XMLPrefs.setOmitDoctypeDeclaration(true); XMLPrefs.setNamespacesAware(false); TagNode tagNode = new HtmlCleaner(XMLPrefs).clean(fromSelIDE.getFile()); new PrettyXmlSerializer(XMLPrefs).writeToStream(tagNode, toXML.getWritableFileOutputStream(), "utf-8"); toXML.close(); return toXML.getAbsoluteFile(); }