org.apache.spark.sql.types.StructType.fieldNames java code examples

public static boolean isEmptySchema(StructType schema) {
  return schema == null || schema.fieldNames() == null || schema.fieldNames().length == 0;
}

@Test
public void testCrosstab() {
 Dataset<Row> df = spark.table("testData2");
 Dataset<Row> crosstab = df.stat().crosstab("a", "b");
 String[] columnNames = crosstab.schema().fieldNames();
 Assert.assertEquals("a_b", columnNames[0]);
 Assert.assertEquals("1", columnNames[1]);
 Assert.assertEquals("2", columnNames[2]);
 List<Row> rows = crosstab.collectAsList();
 rows.sort(crosstabRowComparator);
 Integer count = 1;
 for (Row row : rows) {
  Assert.assertEquals(row.get(0).toString(), count.toString());
  Assert.assertEquals(1L, row.getLong(1));
  Assert.assertEquals(1L, row.getLong(2));
  count++;
 }
}

@Test
public void testCrosstab() {
 Dataset<Row> df = spark.table("testData2");
 Dataset<Row> crosstab = df.stat().crosstab("a", "b");
 String[] columnNames = crosstab.schema().fieldNames();
 Assert.assertEquals("a_b", columnNames[0]);
 Assert.assertEquals("1", columnNames[1]);
 Assert.assertEquals("2", columnNames[2]);
 List<Row> rows = crosstab.collectAsList();
 rows.sort(crosstabRowComparator);
 Integer count = 1;
 for (Row row : rows) {
  Assert.assertEquals(row.get(0).toString(), count.toString());
  Assert.assertEquals(1L, row.getLong(1));
  Assert.assertEquals(1L, row.getLong(2));
  count++;
 }
}

@Test
public void testCrosstab() {
 Dataset<Row> df = spark.table("testData2");
 Dataset<Row> crosstab = df.stat().crosstab("a", "b");
 String[] columnNames = crosstab.schema().fieldNames();
 Assert.assertEquals("a_b", columnNames[0]);
 Assert.assertEquals("1", columnNames[1]);
 Assert.assertEquals("2", columnNames[2]);
 List<Row> rows = crosstab.collectAsList();
 rows.sort(crosstabRowComparator);
 Integer count = 1;
 for (Row row : rows) {
  Assert.assertEquals(row.get(0).toString(), count.toString());
  Assert.assertEquals(1L, row.getLong(1));
  Assert.assertEquals(1L, row.getLong(2));
  count++;
 }
}

private Set<String> getColumnFamilies(Row row) {
 Set<String> families = Sets.newHashSet();
 
 for (String fieldName : row.schema().fieldNames()) {
  ColumnDef def = columns.get(fieldName);
  if (!def.cf.equals("rowkey")) {
   families.add(def.cf);
  }
 }
 
 return families;
}

public static Boolean validateSchemaColumns(StructType sparkSchema,
                      AnalyticsSchema analyticsSchema) {
  String[] rddCols = sparkSchema.fieldNames();
  Set<String> temp = analyticsSchema.getColumns().keySet();
  String[] tableCols = temp.toArray(new String[temp.size()]);
  return Arrays.equals(rddCols, tableCols);
}

private boolean filtersEntireRowKey(Row row) {
 for (String keyColumn : keyColumns) {
  if (!Arrays.asList(row.schema().fieldNames()).contains(keyColumn)) {
   return false;
  }
 }
 
 return true;
}

/**
 * True if the provided row contains a mutation type field.
 */
public static boolean hasMutationTypeField(Row row) {
 for (String fieldName : row.schema().fieldNames()) {
  if (fieldName.equals(MutationType.MUTATION_TYPE_FIELD_NAME)) {
   return true;
  }
 }
 
 return false;
}

private boolean filtersRowKeyPrefix(Row row) {
 Set<String> rowColumnNames = Sets.newHashSet(row.schema().fieldNames());
 Set<String> prefixColumnNames = Sets.newHashSet(keyColumns.subList(0, rowColumnNames.size()));
 
 return rowColumnNames.equals(prefixColumnNames);
}

public static Map<String, Object> convertRowAndSchemaToValuesMap(Row row, StructType schema) {
  String[] colNames = schema.fieldNames();
  Map<String, Object> result = new HashMap<>();
  for (int i = 0; i < row.length(); i++) {
    result.put(colNames[i], row.get(i));
  }
  return result;
}

  private Stream<String> getPropertyNames(final Row row) {
    return Arrays.stream(row.schema().fieldNames())
        .filter(f -> !ReservedPropertyNames.contains(f))
        .filter(n -> !row.isNullAt(row.fieldIndex(n)));
  }
}

public static DataSetResult getDataSetResult(Dataset<Row> df) {
  DataSetResult result = new DataSetResult();
  String[] fieldNames = df.schema().fieldNames();
  result.getColumnNames().addAll(Arrays.asList(fieldNames));
  Row[] rows = (Row[]) df.collect();
  for (Row row : rows) {
    List<Object> values = new ArrayList<>();
    for (int i = 0; i < fieldNames.length; i++) {
      Object obj = row.get(i);
      values.add(obj);
    }
    result.getRows().add(values);
  }
  return result;
}

public Dataset<Row> alignColumns(Dataset<Row> input) {
 Boolean caseSensitive = Contexts.getSparkSession().sparkContext().getConf().
             getBoolean(SPARK_SQL_CASE_SENSITIVE_CONFIG, false);
 Set<String> inputCols = new HashSet<String>();
 for (String col : Arrays.asList(input.schema().fieldNames())) {
  inputCols.add((caseSensitive) ? col : col.toLowerCase());
 }
 List<String> tableCols = new ArrayList<String>();
 for (String col : Contexts.getSparkSession().table(tableName).schema().fieldNames()) {
  tableCols.add((caseSensitive) ? col : col.toLowerCase());
 }
 List<Column> alignedCols = new ArrayList<Column>();
 for (String column : tableCols) {
  alignedCols.add((inputCols.contains(column)) ? functions.col(column) :
                          functions.lit(null).alias(column));
 }
 return input.select(alignedCols.toArray(new Column[alignedCols.size()]));
}

private boolean matchesValueFilter(Row row, Row filter) {
 for (String filterFieldName : filter.schema().fieldNames()) {
  Object rowValue = row.get(row.fieldIndex(filterFieldName));
  Object filterValue = RowUtils.get(filter, filterFieldName);
  
  if (!rowValue.equals(filterValue)) {
   return false;
  }
 }
 
 return true;
}

public static Row subsetRow(Row row, StructType subsetSchema) {
 Object[] values = new Object[subsetSchema.length()];
 int i = 0;
 for (String fieldName : subsetSchema.fieldNames()) {
  values[i] = row.get(row.fieldIndex(fieldName));
  i++;
 }
 Row subset = new RowWithSchema(subsetSchema, values);
 return subset;
}

public static RowBasedKeyValueBatch allocate(StructType keySchema, StructType valueSchema,
                       TaskMemoryManager manager, int maxRows) {
 boolean allFixedLength = true;
 // checking if there is any variable length fields
 // there is probably a more succinct impl of this
 for (String name : keySchema.fieldNames()) {
  allFixedLength = allFixedLength
      && UnsafeRow.isFixedLength(keySchema.apply(name).dataType());
 }
 for (String name : valueSchema.fieldNames()) {
  allFixedLength = allFixedLength
      && UnsafeRow.isFixedLength(valueSchema.apply(name).dataType());
 }
 if (allFixedLength) {
  return new FixedLengthRowBasedKeyValueBatch(keySchema, valueSchema, maxRows, manager);
 } else {
  return new VariableLengthRowBasedKeyValueBatch(keySchema, valueSchema, maxRows, manager);
 }
}

private AnalyticsQueryResult toResult(DataFrame dataFrame)
    throws AnalyticsExecutionException {
  int resultsLimit = this.sparkConf.getInt("carbon.spark.results.limit", -1);
  if (resultsLimit != -1) {
    return new AnalyticsQueryResult(dataFrame.schema().fieldNames(),
        convertRowsToObjects(dataFrame.limit(resultsLimit).collect()));
  } else {
    return new AnalyticsQueryResult(dataFrame.schema().fieldNames(),
        convertRowsToObjects(dataFrame.collect()));
  }
}

public static RowBasedKeyValueBatch allocate(StructType keySchema, StructType valueSchema,
                       TaskMemoryManager manager, int maxRows) {
 boolean allFixedLength = true;
 // checking if there is any variable length fields
 // there is probably a more succinct impl of this
 for (String name : keySchema.fieldNames()) {
  allFixedLength = allFixedLength
      && UnsafeRow.isFixedLength(keySchema.apply(name).dataType());
 }
 for (String name : valueSchema.fieldNames()) {
  allFixedLength = allFixedLength
      && UnsafeRow.isFixedLength(valueSchema.apply(name).dataType());
 }
 if (allFixedLength) {
  return new FixedLengthRowBasedKeyValueBatch(keySchema, valueSchema, maxRows, manager);
 } else {
  return new VariableLengthRowBasedKeyValueBatch(keySchema, valueSchema, maxRows, manager);
 }
}

/**
 * Returns a copy of intoRow where its time (as defined by intoRowModel)
 * is overwritten with the time from fromRow (as defined by fromTimeModel).
 * The two time models must be of the same type, but can be different
 * instances. This method does not modify fromRow or intoRow. 
 */
public static Row copyTime(Row fromRow, TimeModel fromTimeModel, Row into, TimeModel intoTimeModel) {
 assertCompatibleTimeModels(fromTimeModel, intoTimeModel);
 
 Row fromTime = fromTimeModel.getTime(fromRow);
 
 for (int fieldNum = 0; fieldNum < fromTimeModel.getSchema().size(); fieldNum++) {
  into = RowUtils.set(into, intoTimeModel.getSchema().fieldNames()[fieldNum], fromTime.get(fieldNum));
 }
 
 return into;
}

/**
 * Returns a copy of intoRow where its time (as defined by intoRowModel)
 * is overwritten with the preceding time from fromRow (as defined by
 * fromTimeModel). The two time models must be of the same type, but can
 * be different instances. This method does not modify fromRow or intoRow. 
 */
public static Row copyPrecedingTime(Row fromRow, TimeModel fromTimeModel, Row into, TimeModel intoTimeModel) {
 assertCompatibleTimeModels(fromTimeModel, intoTimeModel);
 
 Row fromTime = fromTimeModel.getPrecedingTime(fromRow);
 
 for (int fieldNum = 0; fieldNum < fromTimeModel.getSchema().size(); fieldNum++) {
  into = RowUtils.set(into, intoTimeModel.getSchema().fieldNames()[fieldNum], fromTime.get(fieldNum));
 }
 
 return into;
}

Javadoc

Returns all field names in an array.

Popular methods of StructType

Popular in Java

Running tasks concurrently on multiple threads
scheduleAtFixedRate (ScheduledExecutorService)
getSupportFragmentManager (FragmentActivity)
getSharedPreferences (Context)
File (java.io)
An "abstract" representation of a file system entity identified by a pathname. The pathname may be a
FileReader (java.io)
A specialized Reader that reads from a file in the file system. All read requests made by calling me
Collection (java.util)
Collection is the root of the collection hierarchy. It defines operations on data collections and t
Manifest (java.util.jar)
The Manifest class is used to obtain attribute information for a JarFile and its entries.
BufferedImage (java.awt.image)
The BufferedImage subclass describes an java.awt.Image with an accessible buffer of image data. All
Kernel (java.awt.image)
Top plugins for WebStorm

How to use fieldNamesmethodin org.apache.spark.sql.types.StructType

Best Java code snippets using org.apache.spark.sql.types.StructType.fieldNames (Showing top 20 results out of 315)

How to use
fieldNames
method
in
org.apache.spark.sql.types.StructType