private static void startSpider(Params params) { ScriptProcessor pageProcessor = ScriptProcessorBuilder.custom() .language(params.getLanguage()).scriptFromFile(params.getScriptFileName()).thread(params.getThread()).build(); pageProcessor.getSite().setSleepTime(params.getSleepTime()); pageProcessor.getSite().setRetryTimes(3); pageProcessor.getSite().setAcceptStatCode(WMCollections.<Integer>newHashSet(200, 404,403, 500,502)); Spider spider = Spider.create(pageProcessor).thread(params.getThread()); spider.clearPipeline().addPipeline(new Pipeline() { @Override public void process(ResultItems resultItems, Task task) { } }); if (params.getUrls() == null || params.getUrls().size() == 0) { System.err.println("Need at least one argument"); System.out.println("Usage: java -jar webmagic.jar [-l language] -f script file [-t threadnum] [-s sleep time] url1 [url2 url3]"); System.exit(-1); } for (String url : params.getUrls()) { spider.addUrl(url); } spider.run(); }