private String checkDupByContent(DictionaryInfo dictInfo, Dictionary<String> dict) throws IOException { ResourceStore store = getStore(); NavigableSet<String> existings = store.listResources(dictInfo.getResourceDir()); if (existings == null) return null; logger.info("{} existing dictionaries of the same column", existings.size()); if (existings.size() > 100) { logger.warn("Too many dictionaries under {}, dict count: {}", dictInfo.getResourceDir(), existings.size()); } for (String existing : existings) { DictionaryInfo existingInfo = getDictionaryInfo(existing); if (existingInfo != null) { if ((config.isDictResuable() && existingInfo.getDictionaryObject().contains(dict)) || dict.equals(existingInfo.getDictionaryObject())) { return existing; } } } return null; }
private DictionaryInfo findLargestDictInfo(DictionaryInfo dictInfo) throws IOException { final ResourceStore store = getStore(); final List<DictionaryInfo> allResources = store.getAllResources(dictInfo.getResourceDir(), DictionaryInfoSerializer.INFO_SERIALIZER); DictionaryInfo largestDict = null; for (DictionaryInfo dictionaryInfo : allResources) { if (largestDict == null) { largestDict = dictionaryInfo; continue; } if (largestDict.getCardinality() < dictionaryInfo.getCardinality()) { largestDict = dictionaryInfo; } } return largestDict; }
@Override public void init(DictionaryInfo dictInfo, int baseId, String hdfsDir) throws IOException { sourceColumn = dictInfo.getSourceTable() + "." + dictInfo.getSourceColumn(); KylinConfig config = KylinConfig.getInstanceFromEnv(); int maxEntriesPerSlice = config.getAppendDictEntrySize(); if (hdfsDir == null) { //build in Kylin job server hdfsDir = KylinConfig.getInstanceFromEnv().getHdfsWorkingDirectory(); } //use UUID to make each segment dict in different HDFS dir and support concurrent build //use timestamp to make the segment dict easily to delete String baseDir = hdfsDir + "resources/SegmentDict" + dictInfo.getResourceDir() + "/" + RandomUtil.randomUUID().toString() + "_" + System.currentTimeMillis() + "/"; this.builder = new AppendTrieDictionaryBuilder(baseDir, maxEntriesPerSlice, false); this.baseId = baseId; }
public void removeDictionaries(String srcTable, String srcCol) throws IOException { DictionaryInfo info = new DictionaryInfo(); info.setSourceTable(srcTable); info.setSourceColumn(srcCol); ResourceStore store = getStore(); NavigableSet<String> existings = store.listResources(info.getResourceDir()); if (existings == null) return; for (String existing : existings) removeDictionary(existing); }
private String checkDupByInfo(DictionaryInfo dictInfo) throws IOException { final ResourceStore store = getStore(); final List<DictionaryInfo> allResources = store.getAllResources(dictInfo.getResourceDir(), DictionaryInfoSerializer.INFO_SERIALIZER); TableSignature input = dictInfo.getInput(); for (DictionaryInfo dictionaryInfo : allResources) { if (input.equals(dictionaryInfo.getInput())) { return dictionaryInfo.getResourcePath(); } } return null; }
if (basedir.startsWith(KylinConfig.getInstanceFromEnv().getHdfsWorkingDirectory() + "/resources/GlobalDict")) { activeResources.add(KylinConfig.getInstanceFromEnv().getHdfsWorkingDirectory() + "resources/GlobalDict" + dictInfo.getResourceDir()); } else if (basedir.startsWith(KylinConfig.getInstanceFromEnv().getHdfsWorkingDirectory() + "/resources/SegmentDict")) { activeResources.add(KylinConfig.getInstanceFromEnv().getHdfsWorkingDirectory() + "resources/SegmentDict" + dictInfo.getResourceDir());
@Override public void init(DictionaryInfo dictInfo, int baseId, String hdfsDir) throws IOException { sourceColumn = dictInfo.getSourceTable() + "_" + dictInfo.getSourceColumn(); lock = KylinConfig.getInstanceFromEnv().getDistributedLockFactory().lockForCurrentThread(); lock.lock(getLockPath(sourceColumn), Long.MAX_VALUE); int maxEntriesPerSlice = KylinConfig.getInstanceFromEnv().getAppendDictEntrySize(); if (hdfsDir == null) { //build in Kylin job server hdfsDir = KylinConfig.getInstanceFromEnv().getHdfsWorkingDirectory(); } String baseDir = hdfsDir + "resources/GlobalDict" + dictInfo.getResourceDir() + "/"; try { this.builder = new AppendTrieDictionaryBuilder(baseDir, maxEntriesPerSlice, true); } catch (Throwable e) { lock.unlock(getLockPath(sourceColumn)); throw new RuntimeException( String.format(Locale.ROOT, "Failed to create global dictionary on %s ", sourceColumn), e); } this.baseId = baseId; }
private String checkDupByContent(DictionaryInfo dictInfo, Dictionary<?> dict) throws IOException { ResourceStore store = MetadataManager.getInstance(config).getStore(); ArrayList<String> existings = store.listResources(dictInfo.getResourceDir()); if (existings == null) return null; for (String existing : existings) { logger.info("Checking dup dict :" + existing); DictionaryInfo existingInfo = load(existing, true); // skip cache, // direct load // from store if (existingInfo == null) logger.info("existingInfo is null"); if (existingInfo != null && dict.equals(existingInfo.getDictionaryObject())) return existing; } return null; }
public void removeDictionaries(String srcTable, String srcCol) throws IOException { DictionaryInfo info = new DictionaryInfo(); info.setSourceTable(srcTable); info.setSourceColumn(srcCol); ResourceStore store = MetadataManager.getInstance(config).getStore(); ArrayList<String> existings = store.listResources(info.getResourceDir()); if (existings == null) return; for (String existing : existings) removeDictionary(existing); }
private String checkDupByInfo(DictionaryInfo dictInfo) throws IOException { ResourceStore store = MetadataManager.getInstance(config).getStore(); ArrayList<String> existings = store.listResources(dictInfo.getResourceDir()); if (existings == null) return null; TableSignature input = dictInfo.getInput(); for (String existing : existings) { DictionaryInfo existingInfo = load(existing, false); // skip cache, // direct // load from // store if (input.equals(existingInfo.getInput())) return existing; } return null; }
private String checkDupByContent(DictionaryInfo dictInfo, Dictionary<String> dict) throws IOException { ResourceStore store = getStore(); NavigableSet<String> existings = store.listResources(dictInfo.getResourceDir()); if (existings == null) return null; logger.info("{} existing dictionaries of the same column", existings.size()); if (existings.size() > 100) { logger.warn("Too many dictionaries under {}, dict count: {}", dictInfo.getResourceDir(), existings.size()); } for (String existing : existings) { DictionaryInfo existingInfo = getDictionaryInfo(existing); if (existingInfo != null) { if ((config.isDictResuable() && existingInfo.getDictionaryObject().contains(dict)) || dict.equals(existingInfo.getDictionaryObject())) { return existing; } } } return null; }
private String checkDupByContent(DictionaryInfo dictInfo, Dictionary<?> dict) throws IOException { ResourceStore store = MetadataManager.getInstance(config).getStore(); ArrayList<String> existings = store.listResources(dictInfo.getResourceDir()); if (existings == null) return null; for (String existing : existings) { logger.info("Checking dup dict :" + existing); DictionaryInfo existingInfo = load(existing, true); // skip cache, direct load from store if (existingInfo == null) logger.info("existingInfo is null"); if (existingInfo != null && dict.equals(existingInfo.getDictionaryObject())) return existing; } return null; }
private DictionaryInfo findLargestDictInfo(DictionaryInfo dictInfo) throws IOException { final ResourceStore store = getStore(); final List<DictionaryInfo> allResources = store.getAllResources(dictInfo.getResourceDir(), DictionaryInfoSerializer.INFO_SERIALIZER); DictionaryInfo largestDict = null; for (DictionaryInfo dictionaryInfo : allResources) { if (largestDict == null) { largestDict = dictionaryInfo; continue; } if (largestDict.getCardinality() < dictionaryInfo.getCardinality()) { largestDict = dictionaryInfo; } } return largestDict; }
@Override public void init(DictionaryInfo dictInfo, int baseId, String hdfsDir) throws IOException { sourceColumn = dictInfo.getSourceTable() + "." + dictInfo.getSourceColumn(); KylinConfig config = KylinConfig.getInstanceFromEnv(); int maxEntriesPerSlice = config.getAppendDictEntrySize(); if (hdfsDir == null) { //build in Kylin job server hdfsDir = KylinConfig.getInstanceFromEnv().getHdfsWorkingDirectory(); } //use UUID to make each segment dict in different HDFS dir and support concurrent build //use timestamp to make the segment dict easily to delete String baseDir = hdfsDir + "resources/SegmentDict" + dictInfo.getResourceDir() + "/" + RandomUtil.randomUUID().toString() + "_" + System.currentTimeMillis() + "/"; this.builder = new AppendTrieDictionaryBuilder(baseDir, maxEntriesPerSlice, false); this.baseId = baseId; }
private String checkDupByInfo(DictionaryInfo dictInfo) throws IOException { final ResourceStore store = getStore(); final List<DictionaryInfo> allResources = store.getAllResources(dictInfo.getResourceDir(), DictionaryInfoSerializer.INFO_SERIALIZER); TableSignature input = dictInfo.getInput(); for (DictionaryInfo dictionaryInfo : allResources) { if (input.equals(dictionaryInfo.getInput())) { return dictionaryInfo.getResourcePath(); } } return null; }
public void removeDictionaries(String srcTable, String srcCol) throws IOException { DictionaryInfo info = new DictionaryInfo(); info.setSourceTable(srcTable); info.setSourceColumn(srcCol); ResourceStore store = getStore(); NavigableSet<String> existings = store.listResources(info.getResourceDir()); if (existings == null) return; for (String existing : existings) removeDictionary(existing); }
public void removeDictionaries(String srcTable, String srcCol) throws IOException { DictionaryInfo info = new DictionaryInfo(); info.setSourceTable(srcTable); info.setSourceColumn(srcCol); ResourceStore store = MetadataManager.getInstance(config).getStore(); ArrayList<String> existings = store.listResources(info.getResourceDir()); if (existings == null) return; for (String existing : existings) removeDictionary(existing); }
private String checkDupByInfo(DictionaryInfo dictInfo) throws IOException { ResourceStore store = MetadataManager.getInstance(config).getStore(); ArrayList<String> existings = store.listResources(dictInfo.getResourceDir()); if (existings == null) return null; TableSignature input = dictInfo.getInput(); for (String existing : existings) { DictionaryInfo existingInfo = load(existing, false); // skip cache, direct load from store if (input.equals(existingInfo.getInput())) return existing; } return null; }
@Override public void init(DictionaryInfo dictInfo, int baseId, String hdfsDir) throws IOException { sourceColumn = dictInfo.getSourceTable() + "_" + dictInfo.getSourceColumn(); lock = KylinConfig.getInstanceFromEnv().getDistributedLockFactory().lockForCurrentThread(); lock.lock(getLockPath(sourceColumn), Long.MAX_VALUE); int maxEntriesPerSlice = KylinConfig.getInstanceFromEnv().getAppendDictEntrySize(); if (hdfsDir == null) { //build in Kylin job server hdfsDir = KylinConfig.getInstanceFromEnv().getHdfsWorkingDirectory(); } String baseDir = hdfsDir + "resources/GlobalDict" + dictInfo.getResourceDir() + "/"; try { this.builder = new AppendTrieDictionaryBuilder(baseDir, maxEntriesPerSlice, true); } catch (Throwable e) { lock.unlock(getLockPath(sourceColumn)); throw new RuntimeException( String.format(Locale.ROOT, "Failed to create global dictionary on %s ", sourceColumn), e); } this.baseId = baseId; }