/** * Get MD5 of the dump of the specified language and dumpDate. * Maps download name to MD5 sum. * @param links * @return * @throws IOException */ protected Map<String, String> getMd5Sums(List<String> links) throws IOException { HashMap<String, String> md5s = new HashMap<String, String>(); if (links.isEmpty()) { return md5s; } FileMatcher md5Matcher = FileMatcher.MD5; URL md5Url = new URL(BASEURL_STRING + md5Matcher.match(links).get(0)); List<String> lines = IOUtils.readLines(md5Url.openStream(), "UTF-8"); for (String line : lines) { String[] parsedInfo = line.split("\\W{2}"); String md5 = parsedInfo[0]; String fileName = parsedInfo[1]; md5s.put(fileName, md5); } return md5s; }
/** * Return all links of a particular language the fits one of the patterns * @return hashmap with dump urls and names of dump type */ public Multimap<FileMatcher, DumpLinkInfo> getDumpFiles(List<String> links) throws IOException { Multimap<FileMatcher, DumpLinkInfo> dumpLinks = HashMultimap.create(); Map<String, String> md5s = getMd5Sums(links); for(FileMatcher linkMatcher : matchers){ List<String> results = linkMatcher.match(links); if (!results.isEmpty()) { for (String url : results){ URL linkURL = new URL(BASEURL_STRING + url); DumpLinkInfo linkInfo = new DumpLinkInfo(lang, dumpDate, linkMatcher, linkURL, linkMatcher.getNumber(url)); linkInfo.setMd5(md5s.get(linkInfo.getDownloadName())); dumpLinks.put(linkMatcher, linkInfo); } } } return dumpLinks; }