/** * Use pattern matching to fill a column with the corresponding value (if it * exists), otherwise null. * * @param cols the columns to fill in * @param allCols a set containing all columns of interest * @return a list containing the filled columns */ private static Column[] expr(final Set<String> cols, final Set<String> allCols) { return allCols.stream() .map(x -> { if (cols.contains(x)) { return col(x); } else { return lit(null).as(x); } }).toArray(Column[]::new); }
/** * Create an empty {@link Dataset} of {@link Row}s for use as edges in a {@link org.graphframes.GraphFrame}. * * @param sparkSession the spark session * @return an empty {@link Dataset} of {@link Row}s with a src and dst column. */ public static Dataset<Row> emptyEdges(final SparkSession sparkSession) { return sparkSession.emptyDataFrame().select(lit(null).as("src"), lit(null).as("dst")); } }
@Override public List<Tuple2<MutationType, Dataset<Row>>> planMutationsForSet(Dataset<Row> arriving) { if (setsKeyToUUID()) { arriving = arriving.withColumn(getKeyFieldNames().get(0), functions.lit(UUID.randomUUID().toString())); } if (hasLastUpdatedField()) { arriving = arriving.withColumn(getLastUpdatedFieldName(), functions.lit(currentTimestampString())); } List<Tuple2<MutationType, Dataset<Row>>> planned = Lists.newArrayList(); planned.add(new Tuple2<MutationType, Dataset<Row>>(MutationType.INSERT, arriving)); return planned; }
@Override public List<Tuple2<MutationType, Dataset<Row>>> planMutationsForSet(Dataset<Row> arriving) { if (hasLastUpdatedField()) { arriving = arriving.withColumn(getLastUpdatedFieldName(), functions.lit(currentTimestampString())); } List<Tuple2<MutationType, Dataset<Row>>> planned = Lists.newArrayList(); planned.add(new Tuple2<MutationType, Dataset<Row>>(MutationType.UPSERT, arriving)); return planned; }
/** * Returns the mappings for the given URI and version. * * @param uri the uri of the concept map for which we get mappings * @param version the version of the concept map for which we get mappings * @return a dataset of mappings for the given URI and version. */ public Dataset<Mapping> getMappings(String uri, String version) { return this.mappings.where(functions.col("conceptmapuri").equalTo(lit(uri)) .and(functions.col("conceptmapversion").equalTo(lit(version)))); }
/** * Returns the mappings for the given URI and version. * * @param uri the uri of the concept map for which we get mappings * @param version the version of the concept map for which we get mappings * @return a dataset of mappings for the given URI and version. */ public Dataset<Mapping> getMappings(String uri, String version) { return this.mappings.where(functions.col("conceptmapuri").equalTo(lit(uri)) .and(functions.col("conceptmapversion").equalTo(lit(version)))); }
/** * Returns the values for the given URI and version. * * @param uri the uri of the value set for which we get values * @param version the version of the value set for which we get values * @return a dataset of values for the given URI and version. */ public Dataset<Value> getValues(String uri, String version) { return this.values.where(col("valueseturi").equalTo(lit(uri)) .and(col("valuesetversion").equalTo(lit(version)))); }
/** * Returns the values for the given URI and version. * * @param uri the uri of the value set for which we get values * @param version the version of the value set for which we get values * @return a dataset of values for the given URI and version. */ public Dataset<Value> getValues(String uri, String version) { return this.values.where(col("valueseturi").equalTo(lit(uri)) .and(col("valuesetversion").equalTo(lit(version)))); }
public Dataset<Row> alignColumns(Dataset<Row> input) { Boolean caseSensitive = Contexts.getSparkSession().sparkContext().getConf(). getBoolean(SPARK_SQL_CASE_SENSITIVE_CONFIG, false); Set<String> inputCols = new HashSet<String>(); for (String col : Arrays.asList(input.schema().fieldNames())) { inputCols.add((caseSensitive) ? col : col.toLowerCase()); } List<String> tableCols = new ArrayList<String>(); for (String col : Contexts.getSparkSession().table(tableName).schema().fieldNames()) { tableCols.add((caseSensitive) ? col : col.toLowerCase()); } List<Column> alignedCols = new ArrayList<Column>(); for (String column : tableCols) { alignedCols.add((inputCols.contains(column)) ? functions.col(column) : functions.lit(null).alias(column)); } return input.select(alignedCols.toArray(new Column[alignedCols.size()])); }
protected C withConceptMaps(Dataset<T> newMaps, Dataset<Mapping> newMappings) { Dataset<UrlAndVersion> newMembers = getUrlAndVersions(newMaps); // Instantiating a new composite ConceptMaps requires a new timestamp Timestamp timestamp = new Timestamp(System.currentTimeMillis()); Dataset<T> newMapsWithTimestamp = newMaps .withColumn("timestamp", lit(timestamp.toString()).cast("timestamp")) .as(conceptMapEncoder); return newInstance(spark, this.members.union(newMembers), this.conceptMaps.union(newMapsWithTimestamp), this.mappings.union(newMappings)); }
private ValueSets withValueSets(Dataset<ValueSet> newValueSets, Dataset<Value> newValues) { Dataset<UrlAndVersion> newMembers = getUrlAndVersions(newValueSets); // Instantiating a new composite ConceptMaps requires a new timestamp Timestamp timestamp = new Timestamp(System.currentTimeMillis()); Dataset<ValueSet> newValueSetsWithTimestamp = newValueSets .withColumn("timestamp", lit(timestamp.toString()).cast("timestamp")) .as(VALUE_SET_ENCODER); return new ValueSets(spark, this.members.union(newMembers), this.valueSets.union(newValueSetsWithTimestamp), this.values.union(newValues)); }
protected C withConceptMaps(Dataset<T> newMaps, Dataset<Mapping> newMappings) { Dataset<UrlAndVersion> newMembers = getUrlAndVersions(newMaps); // Instantiating a new composite ConceptMaps requires a new timestamp Timestamp timestamp = new Timestamp(System.currentTimeMillis()); Dataset<T> newMapsWithTimestamp = newMaps .withColumn("timestamp", lit(timestamp.toString()).cast("timestamp")) .as(conceptMapEncoder); return newInstance(spark, this.members.union(newMembers), this.conceptMaps.union(newMapsWithTimestamp), this.mappings.union(newMappings)); }
private ValueSets withValueSets(Dataset<ValueSet> newValueSets, Dataset<Value> newValues) { Dataset<UrlAndVersion> newMembers = getUrlAndVersions(newValueSets); // Instantiating a new composite ConceptMaps requires a new timestamp Timestamp timestamp = new Timestamp(System.currentTimeMillis()); Dataset<ValueSet> newValueSetsWithTimestamp = newValueSets .withColumn("timestamp", lit(timestamp.toString()).cast("timestamp")) .as(VALUE_SET_ENCODER); return new ValueSets(spark, this.members.union(newMembers), this.valueSets.union(newValueSetsWithTimestamp), this.values.union(newValues)); }
private ValueSets withValueSets(Dataset<ValueSet> newValueSets, Dataset<Value> newValues) { Dataset<UrlAndVersion> newMembers = getUrlAndVersions(newValueSets); // Instantiating a new composite ConceptMaps requires a new timestamp Timestamp timestamp = new Timestamp(System.currentTimeMillis()); Dataset<ValueSet> newValueSetsWithTimestamp = newValueSets .withColumn("timestamp", lit(timestamp.toString()).cast("timestamp")) .as(VALUE_SET_ENCODER); return new ValueSets(spark, this.members.union(newMembers), this.valueSets.union(newValueSetsWithTimestamp), this.values.union(newValues)); }
/** * Returns an empty ConceptMaps instance. * * @param spark the spark session * @return an empty ConceptMaps instance. */ public static ConceptMaps getEmpty(SparkSession spark) { Dataset<ConceptMap> emptyConceptMaps = spark.emptyDataset(CONCEPT_MAP_ENCODER) .withColumn("timestamp", lit(null).cast("timestamp")) .as(CONCEPT_MAP_ENCODER); return new ConceptMaps(spark, spark.emptyDataset(URL_AND_VERSION_ENCODER), emptyConceptMaps, spark.emptyDataset(MAPPING_ENCODER)); }
/** * Returns an empty ConceptMaps instance. * * @param spark the spark session * @return an empty ConceptMaps instance. */ public static ConceptMaps getEmpty(SparkSession spark) { Dataset<ConceptMap> emptyConceptMaps = spark.emptyDataset(CONCEPT_MAP_ENCODER) .withColumn("timestamp", lit(null).cast("timestamp")) .as(CONCEPT_MAP_ENCODER); return new ConceptMaps(spark, spark.emptyDataset(URL_AND_VERSION_ENCODER), emptyConceptMaps, spark.emptyDataset(MAPPING_ENCODER)); }
/** * Returns an empty ConceptMaps instance. * * @param spark the spark session * @return an empty ConceptMaps instance. */ public static ConceptMaps getEmpty(SparkSession spark) { Dataset<ConceptMap> emptyConceptMaps = spark.emptyDataset(CONCEPT_MAP_ENCODER) .withColumn("timestamp", lit(null).cast("timestamp")) .as(CONCEPT_MAP_ENCODER); return new ConceptMaps(spark, spark.emptyDataset(URL_AND_VERSION_ENCODER), emptyConceptMaps, spark.emptyDataset(MAPPING_ENCODER)); }
/** * Returns an empty ValueSets instance. * * @param spark the spark session * @return an empty ValueSets instance. */ public static ValueSets getEmpty(SparkSession spark) { Dataset<ValueSet> emptyValueSets = spark.emptyDataset(VALUE_SET_ENCODER) .withColumn("timestamp", lit(null).cast("timestamp")) .as(VALUE_SET_ENCODER); return new ValueSets(spark, spark.emptyDataset(URL_AND_VERSION_ENCODER), emptyValueSets, spark.emptyDataset(getValueEncoder())); }
/** * Returns an empty ValueSets instance. * * @param spark the spark session * @return an empty ValueSets instance. */ public static ValueSets getEmpty(SparkSession spark) { Dataset<ValueSet> emptyValueSets = spark.emptyDataset(VALUE_SET_ENCODER) .withColumn("timestamp", lit(null).cast("timestamp")) .as(VALUE_SET_ENCODER); return new ValueSets(spark, spark.emptyDataset(URL_AND_VERSION_ENCODER), emptyValueSets, spark.emptyDataset(getValueEncoder())); }
/** * Returns an empty ValueSets instance. * * @param spark the spark session * @return an empty ValueSets instance. */ public static ValueSets getEmpty(SparkSession spark) { Dataset<ValueSet> emptyValueSets = spark.emptyDataset(VALUE_SET_ENCODER) .withColumn("timestamp", lit(null).cast("timestamp")) .as(VALUE_SET_ENCODER); return new ValueSets(spark, spark.emptyDataset(URL_AND_VERSION_ENCODER), emptyValueSets, spark.emptyDataset(getValueEncoder())); }