/** * Creates {@link SparkConf} with {@link org.apache.spark.serializer.KryoSerializer} along with * registering default/user-input serializable classes and user-input Avro Schemas. * Once {@link SparkContext} is created, we can no longer register serialization classes and Avro schemas. */ public SparkConf createSparkConf(@NonNull final SparkArgs sparkArgs) { /** * By custom registering classes the full class name of each object * is not stored during serialization which reduces storage space. */ final SparkConf sparkConf = new SparkConf(); sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); final List<Class> serializableClasses = getDefaultSerializableClasses(); serializableClasses.addAll(sparkArgs.getUserSerializationClasses()); sparkConf.registerKryoClasses(serializableClasses.toArray(new Class[0])); if (sparkArgs.getAvroSchemas().isPresent()) { sparkConf.registerAvroSchemas( JavaConverters .iterableAsScalaIterableConverter(sparkArgs.getAvroSchemas().get()) .asScala() .toSeq()); } // override spark properties final Map<String, String> sparkProps = sparkArgs.getOverrideSparkProperties(); for (Entry<String, String> entry : sparkProps.entrySet()) { log.info("Setting spark key:val {} : {}", entry.getKey(), entry.getValue()); sparkConf.set(entry.getKey(), entry.getValue()); } return sparkConf; }
private void updateSparkContext(@NonNull final SparkArgs sparkArgs, @NonNull final SparkContext sc) { for (SparkListener sparkListener : getSparkEventListeners()) { sc.addSparkListener(sparkListener); } sc.hadoopConfiguration().addResource(sparkArgs.getHadoopConfiguration()); }
private void assertExpectationsOnSparkContext( @NonNull final SparkArgs sparkArgs, @NonNull final SparkContext sc) { final String registeredAvroSchemaStr = sc.conf().getAvroSchema().head()._2(); final Schema expectedAvroSchema = sparkArgs.getAvroSchemas().get().get(0); Assert.assertEquals(expectedAvroSchema.toString(), registeredAvroSchemaStr); Assert.assertEquals("foo_bar", sc.appName()); Assert.assertEquals("512", sc.hadoopConfiguration().get("mapreduce.map.memory.mb")); }
private SparkArgs getSampleMarmaraySparkArgs() { final Schema recordSchema = SchemaBuilder.record("fooRecord").fields().name("abc").type() .intType().intDefault(0).endRecord(); final Optional<List<Schema>> schemas = Optional.of(Arrays.asList(recordSchema)); final Map<String, String> overrideSparkProperties = new HashMap<>(); overrideSparkProperties.put("spark.master", "local[2]"); overrideSparkProperties.put("spark.app.name", "foo_bar"); final Configuration hadoopConfiguration = new Configuration(); hadoopConfiguration.set("mapreduce.map.memory.mb", "512"); return new SparkArgs(schemas, Arrays.asList(), overrideSparkProperties, hadoopConfiguration); } }