public DictionaryInfo saveDictionary(TblColRef col, IReadableTable inpTable, Dictionary<String> dictionary) throws IOException { DictionaryInfo dictInfo = createDictionaryInfo(col, inpTable); String dupInfo = checkDupByInfo(dictInfo); if (dupInfo != null) { logger.info("Identical dictionary input " + dictInfo.getInput() + ", reuse existing dictionary at " + dupInfo); return getDictionaryInfo(dupInfo); } return trySaveNewDict(dictionary, dictInfo); }
public DictionaryInfo buildDictionary(TblColRef col, IReadableTable inpTable, String builderClass) throws IOException { if (inpTable.exists() == false) return null; logger.info("building dictionary for " + col); DictionaryInfo dictInfo = createDictionaryInfo(col, inpTable); String dupInfo = checkDupByInfo(dictInfo); if (dupInfo != null) { logger.info("Identical dictionary input " + dictInfo.getInput() + ", reuse existing dictionary at " + dupInfo); return getDictionaryInfo(dupInfo); } logger.info("Building dictionary object " + JsonUtil.writeValueAsString(dictInfo)); Dictionary<String> dictionary; dictionary = buildDictFromReadableTable(inpTable, dictInfo, builderClass, col); return trySaveNewDict(dictionary, dictInfo); }
@SuppressWarnings("unchecked") public static Map<TblColRef, Dictionary<String>> writeDictionary(CubeSegment cubeSegment, Map<TblColRef, Dictionary<String>> dictionaryMap, long startOffset, long endOffset) { Map<TblColRef, Dictionary<String>> realDictMap = Maps.newHashMap(); for (Map.Entry<TblColRef, Dictionary<String>> entry : dictionaryMap.entrySet()) { final TblColRef tblColRef = entry.getKey(); final Dictionary<String> dictionary = entry.getValue(); IReadableTable.TableSignature signature = new IReadableTable.TableSignature(); signature.setLastModifiedTime(System.currentTimeMillis()); signature.setPath(String.format(Locale.ROOT, "streaming_%s_%s", startOffset, endOffset)); signature.setSize(endOffset - startOffset); DictionaryInfo dictInfo = new DictionaryInfo(tblColRef.getColumnDesc(), tblColRef.getDatatype(), signature); logger.info("writing dictionary for TblColRef:" + tblColRef.toString()); DictionaryManager dictionaryManager = DictionaryManager.getInstance(cubeSegment.getCubeDesc().getConfig()); try { DictionaryInfo realDict = dictionaryManager.trySaveNewDict(dictionary, dictInfo); cubeSegment.putDictResPath(tblColRef, realDict.getResourcePath()); realDictMap.put(tblColRef, (Dictionary<String>) realDict.getDictionaryObject()); } catch (IOException e) { throw new RuntimeException("error save dictionary for column:" + tblColRef, e); } } return realDictMap; }
} else { Dictionary<String> newDict = DictionaryGenerator.mergeDictionaries(DataType.getType(newDictInfo.getDataType()), dicts); return trySaveNewDict(newDict, newDictInfo);
DictionaryInfo dicInfoHbase = dictMgrHbase.trySaveNewDict(dictInfoHdfs.getDictionaryObject(), dictInfoHdfs);
dictSrc.setLastModified(0);//to avoid resource store write conflict Dictionary dictObj = dictSrc.getDictionaryObject().copyToAnotherMeta(srcConfig, dstConfig); DictionaryInfo dictSaved = dstDictMgr.trySaveNewDict(dictObj, dictSrc); dictSrc.setLastModified(ts);
dictSrc.setLastModified(0);//to avoid resource store write conflict Dictionary dictObj = dictSrc.getDictionaryObject().copyToAnotherMeta(srcConfig, dstConfig); DictionaryInfo dictSaved = dstDictMgr.trySaveNewDict(dictObj, dictSrc); dictSrc.setLastModified(ts);
public DictionaryInfo buildDictionary(DataModelDesc model, String dict, TblColRef col, String factColumnsPath) throws IOException { logger.info("building dictionary for " + col); Object[] tmp = decideSourceData(model, dict, col, factColumnsPath); String srcTable = (String) tmp[0]; String srcCol = (String) tmp[1]; int srcColIdx = (Integer) tmp[2]; ReadableTable inpTable = (ReadableTable) tmp[3]; DictionaryInfo dictInfo = new DictionaryInfo(srcTable, srcCol, srcColIdx, col.getDatatype(), inpTable.getSignature(), inpTable.getColumnDelimeter()); String dupDict = checkDupByInfo(dictInfo); if (dupDict != null) { logger.info("Identical dictionary input " + dictInfo.getInput() + ", reuse existing dictionary at " + dupDict); return getDictionaryInfo(dupDict); } Dictionary<?> dictionary = DictionaryGenerator.buildDictionary(dictInfo, inpTable); return trySaveNewDict(dictionary, dictInfo); }
public DictionaryInfo mergeDictionary(List<DictionaryInfo> dicts) throws IOException { DictionaryInfo firstDictInfo = null; int totalSize = 0; for (DictionaryInfo info : dicts) { // check if (firstDictInfo == null) { firstDictInfo = info; } else { if (!firstDictInfo.isDictOnSameColumn(info)) { throw new IllegalArgumentException("Merging dictionaries are not structurally equal(regardless of signature)."); } } totalSize += info.getInput().getSize(); } if (firstDictInfo == null) { throw new IllegalArgumentException("DictionaryManager.mergeDictionary input cannot be null"); } DictionaryInfo newDictInfo = new DictionaryInfo(firstDictInfo); TableSignature signature = newDictInfo.getInput(); signature.setSize(totalSize); signature.setLastModifiedTime(System.currentTimeMillis()); signature.setPath("merged_with_no_original_path"); String dupDict = checkDupByInfo(newDictInfo); if (dupDict != null) { logger.info("Identical dictionary input " + newDictInfo.getInput() + ", reuse existing dictionary at " + dupDict); return getDictionaryInfo(dupDict); } Dictionary<?> newDict = DictionaryGenerator.mergeDictionaries(newDictInfo, dicts); return trySaveNewDict(newDict, newDictInfo); }
private DictionaryInfo makeSharedDict() throws IOException { TableSignature signature = new TableSignature(); signature.setSize(100); signature.setLastModifiedTime(System.currentTimeMillis()); signature.setPath("fake_common_dict"); DictionaryInfo newDictInfo = new DictionaryInfo("", "", 0, "string", signature, ""); List<byte[]> values = new ArrayList<byte[]>(); values.add(new byte[] { 101, 101, 101 }); values.add(new byte[] { 102, 102, 102 }); Dictionary<?> dict = DictionaryGenerator.buildDictionaryFromValueList(newDictInfo, values); dictionaryManager.trySaveNewDict(dict, newDictInfo); ((TrieDictionary) dict).dump(System.out); return newDictInfo; }
DictionaryInfo dictSaved = dstDictMgr.trySaveNewDict(dictSrc.getDictionaryObject(), dictSrc); dictSrc.setLastModified(ts);
values.add(new byte[] { 98, 98, 98 }); Dictionary<?> dict = DictionaryGenerator.buildDictionaryFromValueList(newDictInfo, values); dictionaryManager.trySaveNewDict(dict, newDictInfo); ((TrieDictionary) dict).dump(System.out);
public DictionaryInfo saveDictionary(TblColRef col, IReadableTable inpTable, Dictionary<String> dictionary) throws IOException { DictionaryInfo dictInfo = createDictionaryInfo(col, inpTable); String dupInfo = checkDupByInfo(dictInfo); if (dupInfo != null) { logger.info("Identical dictionary input " + dictInfo.getInput() + ", reuse existing dictionary at " + dupInfo); return getDictionaryInfo(dupInfo); } return trySaveNewDict(dictionary, dictInfo); }
public DictionaryInfo buildDictionary(TblColRef col, IReadableTable inpTable, String builderClass) throws IOException { if (inpTable.exists() == false) return null; logger.info("building dictionary for " + col); DictionaryInfo dictInfo = createDictionaryInfo(col, inpTable); String dupInfo = checkDupByInfo(dictInfo); if (dupInfo != null) { logger.info("Identical dictionary input " + dictInfo.getInput() + ", reuse existing dictionary at " + dupInfo); return getDictionaryInfo(dupInfo); } logger.info("Building dictionary object " + JsonUtil.writeValueAsString(dictInfo)); Dictionary<String> dictionary; dictionary = buildDictFromReadableTable(inpTable, dictInfo, builderClass, col); return trySaveNewDict(dictionary, dictInfo); }
@SuppressWarnings("unchecked") public static Map<TblColRef, Dictionary<String>> writeDictionary(CubeSegment cubeSegment, Map<TblColRef, Dictionary<String>> dictionaryMap, long startOffset, long endOffset) { Map<TblColRef, Dictionary<String>> realDictMap = Maps.newHashMap(); for (Map.Entry<TblColRef, Dictionary<String>> entry : dictionaryMap.entrySet()) { final TblColRef tblColRef = entry.getKey(); final Dictionary<String> dictionary = entry.getValue(); IReadableTable.TableSignature signature = new IReadableTable.TableSignature(); signature.setLastModifiedTime(System.currentTimeMillis()); signature.setPath(String.format(Locale.ROOT, "streaming_%s_%s", startOffset, endOffset)); signature.setSize(endOffset - startOffset); DictionaryInfo dictInfo = new DictionaryInfo(tblColRef.getColumnDesc(), tblColRef.getDatatype(), signature); logger.info("writing dictionary for TblColRef:" + tblColRef.toString()); DictionaryManager dictionaryManager = DictionaryManager.getInstance(cubeSegment.getCubeDesc().getConfig()); try { DictionaryInfo realDict = dictionaryManager.trySaveNewDict(dictionary, dictInfo); cubeSegment.putDictResPath(tblColRef, realDict.getResourcePath()); realDictMap.put(tblColRef, (Dictionary<String>) realDict.getDictionaryObject()); } catch (IOException e) { throw new RuntimeException("error save dictionary for column:" + tblColRef, e); } } return realDictMap; }
return trySaveNewDict(newDict, newDictInfo);
} else { Dictionary<String> newDict = DictionaryGenerator.mergeDictionaries(DataType.getType(newDictInfo.getDataType()), dicts); return trySaveNewDict(newDict, newDictInfo);
public DictionaryInfo buildDictionary(DataModelDesc model, String dict, TblColRef col, String factColumnsPath) throws IOException { logger.info("building dictionary for " + col); Object[] tmp = decideSourceData(model, dict, col, factColumnsPath); String srcTable = (String) tmp[0]; String srcCol = (String) tmp[1]; int srcColIdx = (Integer) tmp[2]; ReadableTable inpTable = (ReadableTable) tmp[3]; if (!inpTable.exists()) return null; DictionaryInfo dictInfo = new DictionaryInfo(srcTable, srcCol, srcColIdx, col.getDatatype(), inpTable.getSignature()); String dupDict = checkDupByInfo(dictInfo); if (dupDict != null) { logger.info("Identical dictionary input " + dictInfo.getInput() + ", reuse existing dictionary at " + dupDict); return getDictionaryInfo(dupDict); } Dictionary<?> dictionary = DictionaryGenerator.buildDictionary(dictInfo, inpTable); return trySaveNewDict(dictionary, dictInfo); }
DictionaryInfo dicInfoHbase = dictMgrHbase.trySaveNewDict(dictInfoHdfs.getDictionaryObject(), dictInfoHdfs);
dictSrc.setLastModified(0);//to avoid resource store write conflict Dictionary dictObj = dictSrc.getDictionaryObject().copyToAnotherMeta(srcConfig, dstConfig); DictionaryInfo dictSaved = dstDictMgr.trySaveNewDict(dictObj, dictSrc); dictSrc.setLastModified(ts);