@Override public void delete() { try { FileUtil.delete(refFile); } catch (IOException e) { LOG.error("Could not delete commit file: " + refFile, e); } }
/** * Gets the latest index file created for a job suite (if one exists). * @param suiteWorkdir suite working directory * @param suiteId suite unique ID (ID of the root job) * @return file the index file */ public static File getSuiteIndexFile( String suiteWorkdir, String suiteId) { return new File(suiteWorkdir + File.separator + "latest" + File.separator + FileUtil.toSafeFileName(suiteId) + ".index"); } }
private void deleteEmptyOldDirs(File parentDir) { final long someTimeAgo = System.currentTimeMillis() - (DateUtils.MILLIS_PER_SECOND * EMPTY_DIRS_SECONDS_LIMIT); Date date = new Date(someTimeAgo); int dirCount = FileUtil.deleteEmptyDirs(parentDir, date); if (LOG.isDebugEnabled()) { LOG.debug("Deleted " + dirCount + " empty directories under " + parentDir); } }
ContentType ct = ContentType.valueOf(name); if (ct != null) { embedMeta.setEmbeddedType("file-object"); return "embedded-" + embedCount + "." + ct.getExtension();
/** * Detects the content type from the given input stream. * @param content the content on which to detect content type * @return the detected content type * @throws IOException problem detecting content type */ public ContentType detect(InputStream content) throws IOException { Tika tika = new Tika(); String contentType = tika.detect(content); if (LOG.isDebugEnabled()) { LOG.debug("Detected \"" + contentType + "\" content-type for input stream."); } return ContentType.valueOf(contentType); } /**
/** * Gets a parser based on content type, regardless of document reference * (ignoring it). * All parsers are assumed to have been configured properly * before the first call to this method. */ @Override public final IDocumentParser getParser( String documentReference, ContentType contentType) { // If ignoring content-type, do not even return a parser if (contentType != null && StringUtils.isNotBlank(ignoredContentTypesRegex) && contentType.toString().matches(ignoredContentTypesRegex)) { return null; } ensureParseHintsState(); IDocumentParser parser = parsers.get(contentType); if (parser == null) { return fallbackParser; } return parser; }
@Override public final void backup( final String suiteName, final String jobId, final Date backupDate) throws IOException { File progressFile = getStatusFile(suiteName, jobId); File backupFile = getBackupFile(suiteName, jobId, backupDate); if (progressFile.exists()) { FileUtil.moveFile(progressFile, backupFile); } }
@Override public int hashCode() { int hash = new HashCodeBuilder() .append(ignoredContentTypesRegex) .append(parseHints) .append(parsersAreUpToDate) .append(parsers.size()) .toHashCode(); hash += fallbackParser.hashCode(); for (Entry<ContentType, IDocumentParser> entry : parsers.entrySet()) { ContentType ct = entry.getKey(); hash += ct.hashCode(); IDocumentParser parser = entry.getValue(); if (parser == null) { continue; } hash += parser.hashCode(); } return hash; }
@Override protected long getInitialQueueDocCount() { final MutableLong fileCount = new MutableLong(); // --- Additions and Deletions --- FileUtil.visitAllFiles( new File(queue.getDirectory()), new IFileVisitor() { @Override public void visit(File file) { fileCount.increment(); } }, REF_FILTER); return fileCount.longValue(); }
/** * Gets the file used to store the job progress. * @param suiteName name space given to the job progress * @param jobId the job unique name * @return file used to store the job process */ private File getStatusFile(final String suiteName, final String jobId) { resolveDirsIfNeeded(); return new File(jobdirLatest + "/" + FileUtil.toSafeFileName(suiteName) + "__" + FileUtil.toSafeFileName(jobId) + ".job"); } /**
@Override public void delete() { File fileToDelete = null; try { fileToDelete = metaFile; FileUtil.delete(fileToDelete); fileToDelete = refFile; FileUtil.delete(fileToDelete); fileToDelete = contentFile; FileUtil.delete(fileToDelete); } catch (IOException e) { LOG.error("Could not delete commit file: " + fileToDelete, e); } }
protected void initDefaultParsers() { // Fallback parser fallbackParser = new FallbackParser(); //TODO delete when released in Tika: //https://issues.apache.org/jira/browse/TIKA-2222 // PureEdge XFDL parsers.put( ContentType.valueOf("application/vnd.xfdl"), new XFDLParser()); }
@Override public Object[] getInsertCrawlDataValues( String table, ICrawlData crawlData) { String contentType = null; if (crawlData.getContentType() != null) { contentType = crawlData.getContentType().toString(); } long crawlDate = 0; if (crawlData.getCrawlDate() != null) { crawlDate = crawlData.getCrawlDate().getTime(); } return new Object[] { crawlData.getReference(), crawlData.getParentRootReference(), crawlData.isRootParentReference(), crawlData.getState().toString(), crawlData.getMetaChecksum(), crawlData.getContentChecksum(), contentType, crawlDate }; }
/** * Gets the log file used by this log manager. * @param suiteId log file suiteId * @return log file */ public File getLogFile(final String suiteId) { if (suiteId == null) { return null; } resolveDirsIfNeeded(); return new File(logdirLatest + "/" + FileUtil.toSafeFileName(suiteId) + LOG_SUFFIX); }
@Override public final void remove(final String suiteName, final String jobId) throws IOException { File file = getStatusFile(suiteName, jobId); FileUtil.delete(file); }
/*default*/ File getSuiteStopFile() { return new File(getWorkdir() + File.separator + "latest" + File.separator + FileUtil.toSafeFileName(getId()) + ".stop"); } /*default*/ List<IJobLifeCycleListener> getJobLifeCycleListeners() {
public synchronized void stopMonitoring() { monitoring = false; if (stopFile.exists()) { try { FileUtil.delete(stopFile); } catch (IOException e) { throw new JEFException( "Cannot delete stop file: " + stopFile, e); } } }
@Override public final Appender createAppender(final String suiteId) throws IOException { resolveDirsIfNeeded(); return new FileAppender(new PatternLayout(LAYOUT_PATTERN), logdirLatest + "/" + FileUtil.toSafeFileName(suiteId) + LOG_SUFFIX); }
@Override public ICrawlDataStore createCrawlDataStore(ICrawlerConfig config, boolean resume) { String storeDir = config.getWorkDir().getPath() + "/crawlstore/mvstore/" + FileUtil.toSafeFileName(config.getId()) + "/"; return new MVStoreCrawlDataStore(storeDir, resume); }
@Override public ICrawlDataStore createCrawlDataStore( ICrawlerConfig config, boolean resume) { String storeDir = config.getWorkDir().getPath() + "/crawlstore/jdbc/" + FileUtil.toSafeFileName(config.getId()) + "/"; return new JDBCCrawlDataStore( storeDir, resume, createJDBCSerializer()); }