weka.core.converters.ConverterUtils$DataSource java code examples

DataSource source = new DataSource(System.in);
i = source.getDataSet();
System.out.println(i.toSummaryString());
DataSource source = new DataSource(args[0]);
i = source.getDataSet();
System.out.println(i.toSummaryString());
DataSource source1 = new DataSource(args[1]);
DataSource source2 = new DataSource(args[2]);
i = Instances
 .mergeInstances(source1.getDataSet(), source2.getDataSet());
System.out.println(i);
DataSource source1 = new DataSource(args[1]);
DataSource source2 = new DataSource(args[2]);
String msg = source1.getStructure().equalHeadersMsg(
 source2.getStructure());
if (msg != null) {
 throw new Exception("The two datasets have different headers:\n"
  + msg);
Instances structure = source1.getStructure();
System.out.println(source1.getStructure());
while (source1.hasMoreElements(structure)) {
 System.out.println(source1.nextElement(structure));
structure = source2.getStructure();
while (source2.hasMoreElements(structure)) {

Instances data = DataSource.read(args[0]);
MLUtils.prepareData(data);

/**
 * for testing only - takes a data file as input.
 * 
 * @param args the commandline arguments
 * @throws Exception if something goes wrong
 */
public static void main(String[] args) throws Exception {
 if (args.length != 1) {
  System.out.println("\nUsage: " + DataSource.class.getName()
   + " <file>\n");
  System.exit(1);
 }
 DataSource loader = new DataSource(args[0]);
 System.out.println("Incremental? " + loader.isIncremental());
 System.out.println("Loader: " + loader.getLoader().getClass().getName());
 System.out.println("Data:\n");
 Instances structure = loader.getStructure();
 System.out.println(structure);
 while (loader.hasMoreElements(structure)) {
  System.out.println(loader.nextElement(structure));
 }
 Instances inst = loader.getDataSet();
 loader = new DataSource(inst);
 System.out.println("\n\nProxy-Data:\n");
 System.out.println(loader.getStructure());
 while (loader.hasMoreElements(structure)) {
  System.out.println(loader.nextElement(inst));
 }
}

  template = test = new DataSource(testFileName).getStructure();
  if (classIndex != -1) {
   test.setClassIndex(classIndex - 1);
  template = train = new DataSource(trainFileName).getStructure();
  if (classIndex != -1) {
   train.setClassIndex(classIndex - 1);
 if ((classifier instanceof UpdateableClassifier) && !forceBatchTraining) { // Build classifier incrementally
  trainTimeStart = System.currentTimeMillis();
  DataSource trainSource = new DataSource(trainFileName);
  trainSource.getStructure(); // Need to advance in the file to get to the data
  if (objectInputFileName.length() <= 0) { // Only need to initialize classifier if we haven't loaded one
   classifier.buildClassifier(new Instances(train, 0));
  while (trainSource.hasMoreElements(train)) {
   ((UpdateableClassifier) classifier).updateClassifier(trainSource.nextElement(train));
 } else if (classifier instanceof IterativeClassifier && continueIteratingIterative) {
  IterativeClassifier iClassifier = (IterativeClassifier)classifier;
  Instances tempTrain = new DataSource(trainFileName).getDataSet(actualClassIndex);
  iClassifier.initializeClassifier(tempTrain);
  while (iClassifier.next()){
  Instances tempTrain = new DataSource(trainFileName).getDataSet(actualClassIndex);
  if (classifier instanceof weka.classifiers.misc.InputMappedClassifier) {
   Instances mappedClassifierDataset = ((weka.classifiers.misc.InputMappedClassifier) classifier)
if (testFileName.length() > 0) { // CASE 1: SEPARATE TEST SET

 source = new DataSource(m_TestLoader);
 userTestStructure = source.getStructure();
 userTestStructure.setClassIndex(m_TestClassIndex);
 break;
case 4: // Test on user split
 if (source.isIncremental()) {
  outBuff.append("user supplied test set: "
   + " size unknown (reading incrementally)\n");
 } else {
  outBuff.append("user supplied test set: "
   + source.getDataSet().numInstances() + " instances\n");
 while (source.hasMoreElements(userTestStructure)) {
  instance = source.nextElement(userTestStructure);

  template = test = new DataSource(testFileName).getStructure();
  if (classIndex != -1) {
   test.setClassIndex(classIndex - 1);
  template = train = new DataSource(trainFileName).getStructure();
  if (classIndex != -1) {
   train.setClassIndex(classIndex - 1);
 if ((classifier instanceof UpdateableClassifier) && !forceBatchTraining) { // Build classifier incrementally
  trainTimeStart = System.currentTimeMillis();
  DataSource trainSource = new DataSource(trainFileName);
  trainSource.getStructure(); // Need to advance in the file to get to the data
  if (objectInputFileName.length() <= 0) { // Only need to initialize classifier if we haven't loaded one
   classifier.buildClassifier(new Instances(train, 0));
  while (trainSource.hasMoreElements(train)) {
   ((UpdateableClassifier) classifier).updateClassifier(trainSource.nextElement(train));
  Instances tempTrain = new DataSource(trainFileName).getDataSet(actualClassIndex);
  if (classifier instanceof weka.classifiers.misc.InputMappedClassifier) {
   Instances mappedClassifierDataset = ((weka.classifiers.misc.InputMappedClassifier) classifier)
if (testFileName.length() > 0) { // CASE 1: SEPARATE TEST SET
 predsBuff.append("\n=== Predictions on test data ===\n\n");
 classificationOutput.print(classifier, new DataSource(testFileName));
} else if (splitPercentage > 0) { // CASE 2: PERCENTAGE SPLIT
 Instances tmpInst = new DataSource(trainFileName).getDataSet(actualClassIndex);
 if (!preserveOrder) {
  tmpInst.randomize(new Random(seed));

 source = new DataSource(m_TestLoader);
 userTestStructure = source.getStructure();
 userTestStructure.setClassIndex(m_TestClassIndex);
 break;
case 4: // Test on user split
 if (source.isIncremental()) {
  outBuff.append("user supplied test set: "
   + " size unknown (reading incrementally)\n");
 } else {
  outBuff.append("user supplied test set: "
   + source.getDataSet().numInstances() + " instances\n");
 while (source.hasMoreElements(userTestStructure)) {
  instance = source.nextElement(userTestStructure);

  ((ArffLoader) m_TestLoader).setRetainStringVals(true);
 source = new DataSource(m_TestLoader);
 userTestStructure = source.getStructure();
 userTestStructure.setClassIndex(m_TestClassIndex);
} else {
 outBuff.append("Instances:    "
  + source.getDataSet().numInstances() + "\n");
while (source.hasMoreElements(userTestStructure)) {
 instance = source.nextElement(userTestStructure);

  ((ArffLoader) m_TestLoader).setRetainStringVals(true);
 source = new DataSource(m_TestLoader);
 userTestStructure = source.getStructure();
 userTestStructure.setClassIndex(m_TestClassIndex);
} else {
 outBuff.append("Instances:    "
  + source.getDataSet().numInstances() + "\n");
while (source.hasMoreElements(userTestStructure)) {
 instance = source.nextElement(userTestStructure);

 source = new DataSource(trainFileName);
 train = source.getStructure();
  clusterer.buildClusterer(source.getStructure());
  while (source.hasMoreElements(train)) {
   inst = source.nextElement(train);
   ((UpdateableClusterer) clusterer).updateClusterer(inst);
  clusterer.buildClusterer(source.getDataSet());
  clusterer.buildClusterer(clusterTrain);
  trainHeader = clusterTrain;
  while (source.hasMoreElements(train)) {
   inst = source.nextElement(train);
   removeClass.input(inst);
   removeClass.batchFinished();
 } else {
  Instances clusterTrain =
   Filter.useFilter(source.getDataSet(), removeClass);
  clusterer.buildClusterer(clusterTrain);
  trainHeader = clusterTrain;
DataSource test = new DataSource(testFileName);
Instances testStructure = test.getStructure();
if (!trainHeader.equalHeaders(testStructure)) {
 throw new Exception("Training and testing data are not compatible\n"
train = source.getDataSet();

 source = new DataSource(trainFileName);
 train = source.getStructure();
  clusterer.buildClusterer(source.getStructure());
  while (source.hasMoreElements(train)) {
   inst = source.nextElement(train);
   ((UpdateableClusterer) clusterer).updateClusterer(inst);
  clusterer.buildClusterer(source.getDataSet());
  clusterer.buildClusterer(clusterTrain);
  trainHeader = clusterTrain;
  while (source.hasMoreElements(train)) {
   inst = source.nextElement(train);
   removeClass.input(inst);
   removeClass.batchFinished();
 } else {
  Instances clusterTrain =
   Filter.useFilter(source.getDataSet(), removeClass);
  clusterer.buildClusterer(clusterTrain);
  trainHeader = clusterTrain;
DataSource test = new DataSource(testFileName);
Instances testStructure = test.getStructure();
if (!trainHeader.equalHeaders(testStructure)) {
 throw new Exception("Training and testing data are not compatible\n"
train = source.getDataSet();

  firstInput = new DataSource(fileName);
 } else {
  throw new Exception("No first input file given.\n");
  secondInput = new DataSource(fileName);
 } else {
  throw new Exception("No second input file given.\n");
  throw new Exception("Help requested.\n");
 firstData = firstInput.getStructure();
 secondData = secondInput.getStructure();
 if (!secondData.equalHeaders(firstData)) {
  throw new Exception("Input file formats differ.\n"
while (firstInput.hasMoreElements(firstData)) {
 inst = firstInput.nextElement(firstData);
 if (filter.input(inst)) {
  if (!printedHeader) {
while (secondInput.hasMoreElements(secondData)) {
 inst = secondInput.nextElement(secondData);
 if (filter.input(inst)) {
  if (!printedHeader) {

  input = new DataSource(infileName);
 } else {
  input = new DataSource(System.in);
 data = input.getStructure();
 if (classIndex.length() != 0) {
  if (classIndex.equals("first")) {
while (input.hasMoreElements(data)) {
 inst = input.nextElement(data);
 if (debug) {
  System.err.println("Input instance to filter");

  firstInput = new DataSource(fileName);
 } else {
  throw new Exception("No first input file given.\n");
  secondInput = new DataSource(fileName);
 } else {
  throw new Exception("No second input file given.\n");
  throw new Exception("Help requested.\n");
 firstData = firstInput.getStructure();
 secondData = secondInput.getStructure();
 if (!secondData.equalHeaders(firstData)) {
  throw new Exception("Input file formats differ.\n"
while (firstInput.hasMoreElements(firstData)) {
 inst = firstInput.nextElement(firstData);
 if (filter.input(inst)) {
  if (!printedHeader) {
while (secondInput.hasMoreElements(secondData)) {
 inst = secondInput.nextElement(secondData);
 if (filter.input(inst)) {
  if (!printedHeader) {

  input = new DataSource(infileName);
 } else {
  input = new DataSource(System.in);
 data = input.getStructure();
 if (classIndex.length() != 0) {
  if (classIndex.equals("first")) {
while (input.hasMoreElements(data)) {
 inst = input.nextElement(data);
 if (debug) {
  System.err.println("Input instance to filter");

 source = new DataSource(testFileName);
} else {
 source = new DataSource(test);
testRaw = source.getStructure(test.classIndex());
  : new Instances(source.getStructure(), 0);
i = 0;
while (source.hasMoreElements(testRaw)) {
 inst = source.nextElement(testRaw);
 if (filter != null) {
  filter.input(inst);

DataSource source = new DataSource(System.in);
i = source.getDataSet();
System.out.println(i.toSummaryString());
DataSource source = new DataSource(args[0]);
i = source.getDataSet();
System.out.println(i.toSummaryString());
DataSource source1 = new DataSource(args[1]);
DataSource source2 = new DataSource(args[2]);
i = Instances
 .mergeInstances(source1.getDataSet(), source2.getDataSet());
System.out.println(i);
DataSource source1 = new DataSource(args[1]);
DataSource source2 = new DataSource(args[2]);
String msg = source1.getStructure().equalHeadersMsg(
 source2.getStructure());
if (msg != null) {
 throw new Exception("The two datasets have different headers:\n"
  + msg);
Instances structure = source1.getStructure();
System.out.println(source1.getStructure());
while (source1.hasMoreElements(structure)) {
 System.out.println(source1.nextElement(structure));
structure = source2.getStructure();
while (source2.hasMoreElements(structure)) {

 source = new DataSource(testFileName);
} else {
 source = new DataSource(test);
testRaw = source.getStructure(test.classIndex());
  : new Instances(source.getStructure(), 0);
i = 0;
while (source.hasMoreElements(testRaw)) {
 inst = source.nextElement(testRaw);
 if (filter != null) {
  filter.input(inst);

DataSource source = new DataSource(fileName);
Instances structure = source.getStructure();
Instances forBatchPredictors =
 (clusterer instanceof BatchPredictor && ((BatchPredictor) clusterer)
  .implementsMoreEfficientBatchPrediction()) ? new Instances(
  source.getStructure(), 0) : null;
while (source.hasMoreElements(structure)) {
 inst = source.nextElement(structure);
 if (forBatchPredictors != null) {
  forBatchPredictors.add(inst);

DataSource source = new DataSource(getInitFile().getAbsolutePath());
Instances data = source.getDataSet();
m_InitFileClassIndex.setUpper(data.numAttributes() - 1);
data.setClassIndex(m_InitFileClassIndex.getIndex());

Javadoc

Helper class for loading data from files and URLs. Via the ConverterUtils class it determines which converter to use for loading the data into memory. If the chosen converter is an incremental one, then the data will be loaded incrementally, otherwise as batch. In both cases the same interface will be used (hasMoreElements, nextElement). Before the data can be read again, one has to call the reset method. The data source can also be initialized with an Instances object, in order to provide a unified interface to files and already loaded datasets.

Most used methods

getDataSet
returns the full dataset with the specified class index set, can be null in case of an error.
<init>
Initializes the datasource with the given Loader.
read
convencience method for loading a dataset in batch mode.
getStructure
returns the structure of the data, with the defined class index.
getLoader
returns the determined loader, null if the DataSource was initialized with data alone and not a file
hasMoreElements
returns whether there are more Instance objects in the data.
initBatchBuffer
initializes the batch buffer if necessary, i.e., for non-incremental loaders.
isArff
returns whether the extension of the location is likely to be of ARFF format, i.e., ending in ".arff
isIncremental
returns whether the loader is an incremental one.
nextElement
returns the next element and sets the specified dataset, null if none available.
reset
resets the loader.

reset

Popular in Java

Start an intent from android
setScale (BigDecimal)
getSystemService (Context)
requestLocationUpdates (LocationManager)
ObjectMapper (com.fasterxml.jackson.databind)
ObjectMapper provides functionality for reading and writing JSON, either to and from basic POJOs (Pl
RandomAccessFile (java.io)
Allows reading from and writing to a file in a random-access manner. This is different from the uni-
SocketTimeoutException (java.net)
This exception is thrown when a timeout expired on a socket read or accept operation.
SimpleDateFormat (java.text)
Formats and parses dates in a locale-sensitive manner. Formatting turns a Date into a String, and pa
ArrayList (java.util)
ArrayList is an implementation of List, backed by an array. All optional operations including adding
Modifier (javassist)
The Modifier class provides static methods and constants to decode class and member access modifiers
Top Sublime Text plugins

How to useConverterUtils$DataSource in weka.core.converters

Best Java code snippets using weka.core.converters.ConverterUtils$DataSource (Showing top 20 results out of 315)

How to use
ConverterUtils$DataSource
in
weka.core.converters