/** * Creates {@link SparkConf} with {@link org.apache.spark.serializer.KryoSerializer} along with * registering default/user-input serializable classes and user-input Avro Schemas. * Once {@link SparkContext} is created, we can no longer register serialization classes and Avro schemas. */ public SparkConf createSparkConf(@NonNull final SparkArgs sparkArgs) { /** * By custom registering classes the full class name of each object * is not stored during serialization which reduces storage space. */ final SparkConf sparkConf = new SparkConf(); sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); final List<Class> serializableClasses = getDefaultSerializableClasses(); serializableClasses.addAll(sparkArgs.getUserSerializationClasses()); sparkConf.registerKryoClasses(serializableClasses.toArray(new Class[0])); if (sparkArgs.getAvroSchemas().isPresent()) { sparkConf.registerAvroSchemas( JavaConverters .iterableAsScalaIterableConverter(sparkArgs.getAvroSchemas().get()) .asScala() .toSeq()); } // override spark properties final Map<String, String> sparkProps = sparkArgs.getOverrideSparkProperties(); for (Entry<String, String> entry : sparkProps.entrySet()) { log.info("Setting spark key:val {} : {}", entry.getKey(), entry.getValue()); sparkConf.set(entry.getKey(), entry.getValue()); } return sparkConf; }
private void assertExpectationsOnSparkContext( @NonNull final SparkArgs sparkArgs, @NonNull final SparkContext sc) { final String registeredAvroSchemaStr = sc.conf().getAvroSchema().head()._2(); final Schema expectedAvroSchema = sparkArgs.getAvroSchemas().get().get(0); Assert.assertEquals(expectedAvroSchema.toString(), registeredAvroSchemaStr); Assert.assertEquals("foo_bar", sc.appName()); Assert.assertEquals("512", sc.hadoopConfiguration().get("mapreduce.map.memory.mb")); }