private void writeObject(ObjectOutputStream out) throws IOException { ArrayList<String> udfImportList = Lists.newArrayList(Splitter.on(",").split(properties.getProperty(SPARK_UDF_IMPORT_LIST))); out.writeObject(udfImportList); //2 threads call SparkEngineConf#writeObject //In main thread: SparkLauncher#initialize->SparkUtil#newJobConf // ->ObjectSerializer#serialize-> SparkEngineConf#writeObject //In dag-scheduler-event-loop thread: DAGScheduler.submitMissingTasks->JavaSerializationStream.writeObject // //In main thread,UDFContext#getUDFContext is not empty, we store UDFContext#udfConfs and UDFContext#clientSysProps //into properties and serialize them. //In dag-scheduler-event-loop thread, UDFContext#getUDFContext is empty, we get value of UDFContext#udfConfs and UDFContext#clientSysProps //from properties and serialize them. if (!UDFContext.getUDFContext().isUDFConfEmpty()) { //In SparkUtil#newJobConf(), sparkEngineConf is serialized in job configuration and will call //SparkEngineConf#writeObject(at this time UDFContext#udfConfs and UDFContext#clientSysProps is not null) //later spark will call JavaSerializationStream.writeObject to serialize all objects when submit spark //jobs(at that time, UDFContext#udfConfs and UDFContext#clientSysProps is null so we need to save their //value in SparkEngineConf#properties after these two variables are correctly initialized in //SparkUtil#newJobConf, More detailed see PIG-4920 String udfConfsStr = UDFContext.getUDFContext().serialize(); String clientSysPropsStr = ObjectSerializer.serialize(UDFContext.getUDFContext().getClientSystemProps()); this.properties.setProperty(SPARK_UDFCONTEXT_UDFCONFS, udfConfsStr); this.properties.setProperty(SPARK_UDFCONTEXT_CLIENTSYSPROPS, clientSysPropsStr); out.writeObject(udfConfsStr); out.writeObject(clientSysPropsStr); } else { out.writeObject(this.properties.getProperty(SPARK_UDFCONTEXT_UDFCONFS)); out.writeObject(this.properties.getProperty(SPARK_UDFCONTEXT_CLIENTSYSPROPS)); } }
private void init(PhysicalPlan pp, POStore poStore) throws IOException { poStore.setStoreImpl(new FetchPOStoreImpl(pigContext)); poStore.setUp(); TaskAttemptID taskAttemptID = HadoopShims.getNewTaskAttemptID(); //Fetch mode needs to explicitly set the task id which is otherwise done by Hadoop conf.setInt(MRConfiguration.JOB_APPLICATION_ATTEMPT_ID, taskAttemptID.getId()); if (!PlanHelper.getPhysicalOperators(pp, POStream.class).isEmpty()) { MapRedUtil.setupStreamingDirsConfSingle(poStore, pigContext, conf); } String currentTime = Long.toString(System.currentTimeMillis()); conf.set("pig.script.submitted.timestamp", currentTime); conf.set("pig.job.submitted.timestamp", currentTime); PhysicalOperator.setReporter(new FetchProgressableReporter()); SchemaTupleBackend.initialize(conf, pigContext); UDFContext udfContext = UDFContext.getUDFContext(); udfContext.addJobConf(conf); udfContext.setClientSystemProps(pigContext.getProperties()); udfContext.serialize(conf); PigMapReduce.sJobConfInternal.set(conf); Utils.setDefaultTimeZone(conf); boolean aggregateWarning = "true".equalsIgnoreCase(conf.get("aggregate.warning")); PigStatusReporter pigStatusReporter = PigStatusReporter.getInstance(); pigStatusReporter.setContext(new FetchTaskContext(new FetchContext())); PigHadoopLogger pigHadoopLogger = PigHadoopLogger.getInstance(); pigHadoopLogger.setReporter(pigStatusReporter); pigHadoopLogger.setAggregate(aggregateWarning); PhysicalOperator.setPigLogger(pigHadoopLogger); }
UDFContext.getUDFContext().serialize(conf); Job cjob = new Job(new JobConf(conf), new ArrayList<Job>()); jobStoreMap.put(cjob,new Pair<List<POStore>, Path>(storeLocations, tmpLocation));
UDFContext.getUDFContext().serialize(conf); conf.set("udf.import.list", ObjectSerializer.serialize(PigContext.getPackageImportList()));
UDFContext.getUDFContext().serialize(jobConf);