Codota Logo
Corpus
Code IndexAdd Codota to your IDE (free)

How to use
Corpus
in
eu.fbk.dkm.pikes.naflib

Best Java code snippets using eu.fbk.dkm.pikes.naflib.Corpus (Showing top 17 results out of 315)

  • Add the Codota plugin to your IDE and get smart completions
private void myMethod () {
FileOutputStream f =
  • Codota IconFile file;new FileOutputStream(file)
  • Codota IconString name;new FileOutputStream(name)
  • Codota IconFile file;new FileOutputStream(file, true)
  • Smart code suggestions by Codota
}
origin: eu.fbk.pikes/pikes-naflib

public static Corpus create(final boolean recursive, final Object... filesOrDirs) {
  return create(recursive, Arrays.asList(filesOrDirs));
}
origin: eu.fbk.pikes/pikes-naflib

@Override
public KAFDocument next() {
  return get(this.index++);
}
origin: eu.fbk.pikes/pikes-naflib

public Corpus fixURIs() {
  return transform((final Path file, final KAFDocument document) -> {
    final String relativePath = file.toString().substring(path().toString().length());
    document.getPublic().uri = "doc:" + relativePath;
    document.getPublic().publicId = relativePath;
  });
}
origin: eu.fbk.pikes/pikes-rdf

while (true) {
  final int i = counter.getAndIncrement();
  if (i >= Runner.this.corpus.size()) {
    break;
  final Path path = Runner.this.corpus.file(i);
      final Path base = Runner.this.corpus.path();
      final Path relative = base.toAbsolutePath().relativize(
          path.toAbsolutePath());
    final KAFDocument document = Runner.this.corpus.get(i);
    docName = document.getPublic().publicId;
    MDC.put("context", docName);
origin: eu.fbk.pikes/pikes-naflib

Boolean recursive = cmd.hasOption("recursive");
Corpus corpus = Corpus.create(recursive, inputFolder);
corpus.parallelStream().forEach(document -> {
  if (document != null) {
    tokens.addAndGet(document.getTerms().size());
origin: eu.fbk.pikes/pikes-resources

Corpus corpus = Corpus.create(false, inputFolder);
for (Path file : corpus.files()) {
origin: eu.fbk.pikes/pikes-naflib

@Override
public String toString() {
  if (this.files.length == 0) {
    return "Empty corpus";
  } else {
    return this.files.length + " document(s) corpus (path: " + path() + ")";
  }
}
origin: eu.fbk.pikes/pikes-naflib

public Corpus transform(final BiConsumer<Path, KAFDocument> transformer) {
  return new Corpus(this.files, this.transformer == null ? transformer
      : this.transformer.andThen(transformer));
}
origin: eu.fbk.pikes/pikes-naflib

public Stream<KAFDocument> stream() {
  return StreamSupport.stream(spliterator(), false);
}
origin: eu.fbk.pikes/pikes-rdf

@Override
public void run() {
  LOGGER.info("Converting {} NAF files to RDF", this.corpus.size());
  LOGGER.info("Successfully converted {}/{} files", succeeded, this.corpus.size());
origin: eu.fbk.pikes/pikes-naflib

final String relativePath = file.toString().substring(path().toString().length());
document.getPublic().publicId = relativePath;
if ("http://www.example.com".equals(document.getPublic().uri)) {
origin: eu.fbk.pikes/pikes-naflib

public Corpus[] split(@Nullable final Long shuffleSeed, final float... percentages) {
  // Shuffle the files if necessary, using the supplied seed
  Path[] files = this.files;
  if (shuffleSeed != null) {
    final List<Path> list = Lists.newArrayList(files);
    final Random random = new Random(shuffleSeed);
    Collections.shuffle(list, random);
    files = list.toArray(new Path[list.size()]);
  }
  // Split the (shuffled) file array based on supplied percentages
  final Corpus[] corpora = new Corpus[percentages.length];
  int index = 0;
  float cumulated = 0.0f;
  for (int i = 0; i < percentages.length; ++i) {
    cumulated += percentages[i];
    if (cumulated > 1.0f) {
      throw new IllegalArgumentException("Invalid percentages (sum must be 1.0f): "
          + Arrays.toString(percentages));
    }
    final int endIndex = (int) Math.ceil(files.length * cumulated);
    final Path[] partition = Arrays.copyOfRange(files, index, endIndex);
    if (shuffleSeed != null) {
      Arrays.sort(partition);
    }
    corpora[i] = new Corpus(partition, this.transformer);
    index = endIndex;
  }
  return corpora;
}
origin: eu.fbk.pikes/pikes-naflib

public Stream<KAFDocument> parallelStream() {
  return StreamSupport.stream(spliterator(), true);
}
origin: eu.fbk.pikes/pikes-resources

Iterable<KAFDocument> corpus = Corpus.create(false, inputFolder);
origin: eu.fbk.pikes/pikes-naflib

@Override
public boolean tryAdvance(final Consumer<? super KAFDocument> action) {
  return delegate.tryAdvance(file -> {
    action.accept(get(file));
  });
}
origin: eu.fbk.pikes/pikes-naflib

public static Corpus create(final boolean recursive, final Iterable<?> filesOrDirs) {
  final List<Path> paths = Lists.newArrayList();
  for (final Object fileOrDir : filesOrDirs) {
    if (fileOrDir instanceof Path) {
      paths.add((Path) fileOrDir);
    } else if (fileOrDir instanceof File) {
      paths.add(((File) fileOrDir).toPath());
    } else {
      paths.add(Paths.get(fileOrDir.toString()));
    }
  }
  // todo: this uses Util, a class included in utils-svm
  final List<Path> files = Util.fileMatch(paths, ImmutableList.of(".naf", ".naf.gz",
      ".naf.bz2", ".naf.xz", ".xml", ".xml.gz", ".xml.bz2", ".xml.xz"), recursive);
  for (int i = 0; i < files.size(); ++i) {
    files.set(i, files.get(i).toAbsolutePath().normalize());
  }
  if (files.isEmpty()) {
    return EMPTY;
  } else {
    return new Corpus(files.toArray(new Path[files.size()]), null);
  }
}
origin: eu.fbk.pikes/pikes-rdf

static Runner create(final String name, final String... args) {
  final Options options = Options.parse(
      "r,recursive|o,output!|m,merge|n,normalize|i,intermediate|+", args);
  final File outputFile = options.getOptionArg("o", File.class);
  final boolean recursive = options.hasOption("r");
  final boolean merge = options.hasOption("m");
  final boolean normalize = options.hasOption("n");
  final boolean intermediate = options.hasOption("i");
  final Corpus corpus = Corpus.create(recursive, options.getPositionalArgs(File.class));
  final RDFGenerator generator = RDFGenerator.builder()
      .withProperties(Util.PROPERTIES, "eu.fbk.dkm.pikes.rdf.RDFGenerator")
      .withMerging(merge).withNormalization(normalize).build();
  return new Runner(corpus, generator, outputFile, intermediate);
}
eu.fbk.dkm.pikes.naflibCorpus

Most used methods

  • create
  • get
  • path
  • <init>
  • file
  • files
  • parallelStream
  • size
  • spliterator
  • spliteratorHelper
  • transform
  • transform

Popular in Java

  • Finding current android device location
  • putExtra (Intent)
  • setScale (BigDecimal)
    Returns a BigDecimal whose scale is the specified value, and whose value is numerically equal to thi
  • compareTo (BigDecimal)
    Compares this BigDecimal with the specified BigDecimal. Two BigDecimal objects that are equal in val
  • InputStream (java.io)
    A readable source of bytes.Most clients will use input streams that read data from the file system (
  • ServerSocket (java.net)
    This class represents a server-side socket that waits for incoming client connections. A ServerSocke
  • Vector (java.util)
    The Vector class implements a growable array of objects. Like an array, it contains components that
  • JComboBox (javax.swing)
  • JFileChooser (javax.swing)
  • Join (org.hibernate.mapping)
Codota Logo
  • Products

    Search for Java codeSearch for JavaScript codeEnterprise
  • IDE Plugins

    IntelliJ IDEAWebStormAndroid StudioEclipseVisual Studio CodePyCharmSublime TextPhpStormVimAtomGoLandRubyMineEmacsJupyter
  • Company

    About UsContact UsCareers
  • Resources

    FAQBlogCodota Academy Plugin user guide Terms of usePrivacy policyJava Code IndexJavascript Code Index
Get Codota for your IDE now