protected String extract_format(String value) { try { DcsFormat format = DcsFormat.parseDcsFormat(value); if (format.getSchemeUri() != null && format.getSchemeUri().equals(PRONOM_URI)) { return PRONOM_FORMAT_PREFIX + format.getFormat(); } else { return format.getFormat(); } } catch (IllegalArgumentException e) { return null; } }
private Map<String, Integer> getFileCountByFormat(Set<AttributeSet> fileAttributeSets) { //TODO: map keyed by format's name. keyed by DcsFormat object would be more reliable. Report consumer can //decide what field of DcsFormat to use. But IngestReport doesn't current support mapping keyed on DcsFormat type. Map<String, Integer> fileCountByFormat = new HashMap<String, Integer>(); for (AttributeSet fileAttributeSet : fileAttributeSets) { //extract file-formats java.util.Collection<Attribute> matchingAttributes = fileAttributeSet.getAttributesByName(Metadata.FILE_FORMAT); for (Attribute attribute : matchingAttributes) { if (attribute.getType().equals(AttributeValueType.DCS_FORMAT)) { DcsFormat format = DcsFormat.parseDcsFormat(attribute.getValue()); // Only add the Pronom format to the report to be displayed or the unknown mime-type // (application/octet-stream) if (format.getSchemeUri().contains("PRONOM") || format.getName().equalsIgnoreCase("application/octet-stream")) { // if an entry for this format does not exist yet if (!fileCountByFormat.containsKey(format.getName())) { // create the new entry fileCountByFormat.put(format.getName(), 0); } // increase count for the format entry. fileCountByFormat.put(format.getName(), fileCountByFormat.get(format.getName()) + 1); } } } } return fileCountByFormat; } /**
if (formatAttribute.getType().equals(AttributeValueType.DCS_FORMAT)) { try { detectedFormatUriString = createFormatURIString(DcsFormat.parseDcsFormat(detectedFormatAttributeValue)); if(detectedFormatUriString != null) { detectedFormatURIs.add(detectedFormatUriString);