@Override public Object clone() { return new DocumentURI(uri); }
@Override public int compareTo(DocumentURI o) { return uri.compareTo(o.getUri()); }
/** * If outputDir is available and valid, modify DocumentURI, and return uri * in string * * @param key * @param outputDir * @return URI */ public static String getUriWithOutputDir(DocumentURI key, String outputDir){ String uri = key.getUri(); if (outputDir != null && !outputDir.isEmpty()) { uri = outputDir.endsWith("/") || uri.startsWith("/") ? outputDir + uri : outputDir + '/' + uri; key.setUri(uri); key.validate(); } return uri; }
public static void main(String args[]) throws IOException { System.out.println("Sequence File Creator"); String uri = args[0]; // output sequence file name Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(uri), conf); Path path = new Path(uri); SequenceFile.Writer writer = null; SimpleSequenceFileKey key = new SimpleSequenceFileKey(); SimpleSequenceFileValue<BytesWritable> value = new SimpleSequenceFileValue<BytesWritable>(); try { BytesWritable bw = new BytesWritable(); byte byteArray[] = {2,3,4}; bw.set(byteArray, 0, byteArray.length); writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass()); key.setDocumentURI(new DocumentURI("ABC")); value.setValue(bw); writer.append(key, value); System.err.println(key.getDocumentURI().getUri() + value); } finally { IOUtils.closeStream(writer); } } }
@SuppressWarnings("unchecked") @Override protected boolean nextResult(ResultItem result) { if (currentKey != null) { currentKey.setUri(result.getDocumentURI()); } else { currentKey = new DocumentURI(result.getDocumentURI()); } if (currentValue == null) { currentValue = (VALUEIN)ReflectionUtils.newInstance(valueClass, getConf()); } InternalUtilities.assignResultValue(valueClass, result, currentValue); return true; }
pendingURIs[sid].add((DocumentURI)key.clone()); if (++counts[sid] == batchSize) { if (sessions[sid] == null) {
uri = URIUtil.applyUriReplace(uri, conf); uri = URIUtil.applyPrefixSuffix(uri, conf); currentKey.setUri(uri); if (metadata != null) { currentValue.setMeta(metadata); uri = URIUtil.applyUriReplace(uri, conf); uri = URIUtil.applyPrefixSuffix(uri, conf); currentKey.setUri(uri); currentValue.setMeta(metadata); currentValue.setContentType(ContentType.XML);
@Override public void readFields(DataInput in) throws IOException { uri.readFields(in); }
@Override public void write(DataOutput out) throws IOException { uri.write(out); }
value.getClass()); key.setDocumentURI(new DocumentURI("Large")); value.setValue(bw); writer.append(key, value); System.err.println(key.getDocumentURI().getUri() + " loaded."); } finally { IOUtils.closeStream(writer);
fId = 0; pendingUris[sid].put(content, (DocumentURI)key.clone()); boolean inserted = false; forestContents[fId][counts[fId]++] = content;
@Override public boolean equals(Object uri) { if (uri instanceof DocumentURI) { return this.uri.equals(((DocumentURI)uri).getUri()); } return false; }
public static void main(String[] args) throws URISyntaxException { HashMap<String, DocumentURI> map = new HashMap<String, DocumentURI>(); for (String arg : args) { URI uri = new URI(null, null, null, 0, arg, null, null); System.out.println("URI encoded: " + uri.toString()); URI outuri = new URI(uri.toString()); System.out.println("URI decoded: " + outuri.getPath()); map.put(arg, new DocumentURI(arg)); } } }
opt.setFormat(doc.getContentType().getDocumentFormat()); addValue(uri, value, sid, opt, effectiveVersion<PROPS_MIN_VERSION?null:meta.getProperties()); pendingURIs[sid].add((DocumentURI)key.clone()); if (++counts[sid] == batchSize) { queries[sid].setNewVariables(uriName, uris[sid]);
protected static String getPathFromURI(DocumentURI uri) { String uriStr = uri.getUri(); try { URI child = new URI(uriStr); String childPath; if (child.isOpaque()) { childPath = child.getSchemeSpecificPart(); } else { childPath = child.getPath(); } return childPath; } catch (Exception ex) { LOG.error("Error parsing URI " + uriStr + "."); return uriStr; } } }
public DatabaseContentReader(Configuration conf) { super(conf); copyCollection = conf.getBoolean(MarkLogicConstants.COPY_COLLECTIONS, true); copyPermission = conf.getBoolean(CONF_COPY_PERMISSIONS, true); copyProperties = conf.getBoolean(CONF_COPY_PROPERTIES,true); copyQuality = conf.getBoolean(MarkLogicConstants.COPY_QUALITY, true); copyMetadata = conf.getBoolean(MarkLogicConstants.COPY_METADATA, true); currentKey = new DocumentURI(); metadataMap = new HashMap<String, DocumentMetadata>(); }
public static String getPathFromURI(DocumentURI uri) { String uriStr = uri.getUri(); try { URI child = new URI(uriStr); String childPath; if (child.isOpaque()) { childPath = child.getSchemeSpecificPart(); } else { childPath = child.getPath(); } return childPath; } catch (Exception ex) { LOG.warn("Error parsing URI " + uriStr + "."); return uriStr; } } }
value.getClass()); while ((line = buffer.readLine()) != null) { key.setDocumentURI(new DocumentURI(line)); if ((line = buffer.readLine()) == null) { break;
public void map(DocumentURI uri, VALUE fileContent, Context context) throws IOException, InterruptedException { synchronized(readCount) { readCount.increment(1); } if (uri == null) { return; } else if (uri.getUri().isEmpty()) { LOG.warn("Skipped record: " + uri); return; } synchronized(attemptedCount) { attemptedCount.increment(1); } context.write(uri, fileContent); }
value.getClass(), CompressionType.RECORD, new GzipCodec()); while ((line = buffer.readLine()) != null) { key.setDocumentURI(new DocumentURI(line)); if ((line = buffer.readLine()) == null) { break;