/** * creates PigServer in LOCAL mode. * http://pig.apache.org/docs/r0.12.0/perf.html#error-handling * @param stopOnFailure equivalent of "-stop_on_failure" command line arg, setting to 'true' makes * debugging easier */ public static PigServer createPigServer(boolean stopOnFailure, Properties p) throws ExecException { Path workDir = new Path(System.getProperty("test.tmp.dir", "target" + File.separator + "test" + File.separator + "tmp")); String testId = "HCatBaseTest_" + System.currentTimeMillis(); p.put("mapred.local.dir", workDir + File.separator + testId + File.separator + "mapred" + File.separator + "local"); p.put("mapred.system.dir", workDir + File.separator + testId + File.separator + "mapred" + File.separator + "system"); p.put("mapreduce.jobtracker.staging.root.dir", workDir + File.separator + testId + File.separator + "mapred" + File.separator + "staging"); p.put("mapred.temp.dir", workDir + File.separator + testId + File.separator + "mapred" + File.separator + "temp"); p.put("pig.temp.dir", workDir + File.separator + testId + File.separator + "pig" + File.separator + "temp"); if(stopOnFailure) { p.put("stop.on.failure", Boolean.TRUE.toString()); return new PigServer(ExecType.LOCAL, p); } return new PigServer(ExecType.LOCAL, p); } }
expectedList.add(Storage.tuple(30,10)); pigServer.setBatchOn(); pigServer.registerQuery(String.format( "A = load 'hbase://table/%s' using " + PhoenixHBaseLoader.class.getName() + "('%s');", TABLE, zkQuorum)); pigServer.registerQuery("B = GROUP A BY AGE;"); pigServer.registerQuery("C = FOREACH B GENERATE group,COUNT(A);"); pigServer.registerQuery("STORE C INTO 'out' using mock.Storage();"); pigServer.executeBatch();
@After public void tearDown() throws Exception { if(conn != null) { conn.close(); } if (pigServer != null) { pigServer.shutdown(); } }
pigServer.registerQuery(String.format( "A = load 'hbase://query/%s' using org.apache.phoenix.pig.PhoenixHBaseLoader('%s');", sqlQuery, zkQuorum)); final Iterator<Tuple> iterator = pigServer.openIterator("A"); int recordsRead = 0; while (iterator.hasNext()) {
/** * Validates the schema returned when specific columns of a table are given as part of LOAD . * @throws Exception */ @Test public void testSchemaForTableWithSpecificColumns() throws Exception { //create the table final String TABLE = "TABLE2"; final String ddl = "CREATE TABLE " + TABLE + " (ID INTEGER NOT NULL PRIMARY KEY,NAME VARCHAR, AGE INTEGER) "; conn.createStatement().execute(ddl); final String selectColumns = "ID,NAME"; pigServer.registerQuery(String.format( "A = load 'hbase://table/%s/%s' using " + PhoenixHBaseLoader.class.getName() + "('%s');", TABLE, selectColumns, zkQuorum)); Schema schema = pigServer.dumpSchema("A"); List<FieldSchema> fields = schema.getFields(); assertEquals(2, fields.size()); assertTrue(fields.get(0).alias.equalsIgnoreCase("ID")); assertTrue(fields.get(0).type == DataType.INTEGER); assertTrue(fields.get(1).alias.equalsIgnoreCase("NAME")); assertTrue(fields.get(1).type == DataType.CHARARRAY); }
expectedList.add(Storage.tuple(2, 2L, 2L, dynArrTuple2)); final String load = String.format("A = load 'hbase://query/%s' using " + PhoenixHBaseLoader.class.getName() + "('%s');",sqlQuery,zkQuorum); pigServer.setBatchOn(); pigServer.registerQuery(load); pigServer.registerQuery("B = FOREACH A GENERATE ID, SIZE(A_DOUBLE_ARRAY), SIZE(A_VARCHAR_ARRAY), FLATTEND_STR;"); pigServer.registerQuery("STORE B INTO 'out' using mock.Storage();"); pigServer.executeBatch(); assertEquals(expectedList, actualList); Schema schema = pigServer.dumpSchema("A"); List<FieldSchema> fields = schema.getFields(); assertEquals(4, fields.size()); assertTrue(fields.get(3).type == DataType.TUPLE); Iterator<Tuple> iterator = pigServer.openIterator("A"); Tuple firstTuple = Storage.tuple(1, doubleArrTuple, strArrTuple, dynArrTuple); Tuple secondTuple = Storage.tuple(2, doubleArrTuple2, strArrTuple2, dynArrTuple2);
@Override public PigServer run() throws Exception { return new PigServer(ctx, true); pigServer = new PigServer(ctx, true); pigServer.addPathToSkip(path); pigServer.setDefaultParallel(parallelism); pigServer.setJobName(jobName); pigServer.setJobName(beanName); pigServer.setJobPriority(jobPriority);
/** * Register a query with the Pig runtime. The query will be read from the indicated file. * @param fileName file to read query from. * @throws IOException */ public void registerScript(String fileName) throws IOException { registerScript(fileName, null, null); }
@Override public void execute(Query query) { String queryName = query.name; String queryValue = query.value; Map<String, String> queryMetadata = query.getMetadata(); String execType = Query.getKey(queryMetadata, METADATA_EXEC_TYPE_KEY).orElse(defaultExecType); String alias = Query.getKey(queryMetadata, METADATA_ALIAS_KEY).orElse(defaultOutputAlias); log.info("Running {} for alias {}: {}", queryName, alias, queryValue); try { PigServer server = getPigServer(execType); server.registerScript(new ByteArrayInputStream(queryValue.getBytes())); Iterator<Tuple> queryResults = server.openIterator(alias); Result result = query.createResults(); // dumpSchema will also, unfortunately, print the schema to stdout. List<FieldDetail> metadata = getFieldDetails(server.dumpSchema(alias)); populateColumns(metadata, result); while (queryResults.hasNext()) { populateRow(queryResults.next(), metadata, result); } server.shutdown(); } catch (IOException ioe) { log.error("Problem with Pig query: {}\n{}", queryValue, ioe); query.setFailure(ioe.toString()); } catch (Exception e) { log.error("Error occurred while processing Pig query: {}\n{}", queryValue, e); query.setFailure(e.toString()); } }
/** * Register a jar for use in Pig. Once this is done this jar will be * registered for <b>all subsequent</b> Pig pipelines in this script. * If you wish to register it for only a single Pig pipeline, use * register within that definition. * @param jarfile Path of jar to include. * @throws IOException if the indicated jarfile cannot be found. */ public static void registerJar(String jarfile) throws IOException { LOG.info("Register jar: "+ jarfile); ScriptPigContext ctx = getScriptContext(); PigServer pigServer = new PigServer(ctx.getPigContext(), false); pigServer.registerJar(jarfile); }
mPigServer.debugOn(); else if (value.equals("off")) mPigServer.debugOff(); else throw new ParseException("Invalid value " + value + " provided for " + key); } else { System.out.println(key + "=" + mPigServer.isDebugOn()); mPigServer.setJobName(value); } else { System.out.println(key + "=" + mPigServer.getJobName()); mPigServer.setJobPriority(value); } else { System.out.println(key + "=" + mPigServer.getJobPriority()); mPigServer.addPathToSkip(value); } else { System.out.println(key + "=" + StringUtils.join(mPigServer.getPigContext().getPathsToSkip(), ",")); mPigServer.setDefaultParallel(Integer.parseInt(value)); } catch (NumberFormatException e) { throw new ParseException("Invalid value for default_parallel"); System.out.println(key + "=" + mPigServer.getPigContext().getDefaultParallel()); mPigServer.getPigContext().getExecutionEngine().setProperty(key, value); } else { if (mPigServer.getPigContext().getProperties().containsKey(key)) {
System.setProperty(PySystemState.PYTHON_CACHEDIR_SKIP, "false"); PigServer pigServer = new PigServer(pigContext, false); pigServer.registerJar(jythonJar); FileInputStream fis = new FileInputStream(scriptFile); try { load(fis, scriptFile, pigServer.getPigContext()); } finally { fis.close();
@Override protected void processExplain(String alias, String script, boolean isVerbose, String format, String target, List<String> params, List<String> files) throws IOException, ParseException { if (mPigServer.isBatchOn()) { mPigServer.parseAndBuild(); } if (alias == null && script == null) { if (mInteractive) { alias = mPigServer.getPigContext().getLastAlias(); // if explain is used immediately after launching grunt shell then // last defined alias will be null if (alias == null) { throw new ParseException("'explain' statement must be on an alias or on a script."); } } } if ("@".equals(alias)) { alias = mPigServer.getLastRel(); } processExplain(alias, script, isVerbose, format, target, params, files, false); }
pigServer.registerQuery(String.format( "A = load 'hbase://query/%s' using " + PhoenixHBaseLoader.class.getName() + "('%s');", sqlQuery, zkQuorum)); final Iterator<Tuple> iterator = pigServer.openIterator("A"); int recordsRead = 0; while (iterator.hasNext()) { Schema schema = pigServer.dumpSchema("A"); List<FieldSchema> fields = schema.getFields(); assertEquals(2, fields.size());
private PigStats exec(String query) throws IOException { LOG.info("Query to run:\n" + query); List<PigProgressNotificationListener> listeners = ScriptState.get().getAllListeners(); PigContext pc = scriptContext.getPigContext(); String scriptName = new File(ScriptState.get().getFileName()).getName(); ScriptState scriptState = pc.getExecutionEngine().instantiateScriptState(); scriptState.setFileName(scriptName); ScriptState.start(scriptState); ScriptState.get().setScript(query); for (PigProgressNotificationListener listener : listeners) { ScriptState.get().registerListener(listener); } PigServer pigServer = new PigServer(scriptContext.getPigContext(), false); if (!pigServer.getPigContext().getProperties().containsKey("jobName")) { pigServer.setJobName(scriptName); } GruntParser grunt = new GruntParser(new StringReader(query), pigServer); grunt.setInteractive(false); try { grunt.parseStopOnError(false); } catch (ParseException e) { throw new IOException("Failed to parse script " + e.getMessage(), e); } return PigStats.get(); }
/** * Describe the schema of an alias in this pipeline. * Results will be printed to stdout. * @param alias to be described * @throws IOException if describe fails. */ public void describe(String alias) throws IOException { if (queries.isEmpty()) { LOG.info("No bound query to describe"); return; } PigServer pigServer = new PigServer(scriptContext.getPigContext(), false); registerQueryForDiagnostics(pigServer, queries.get(0)); pigServer.dumpSchema(alias); }
@Override public Long doInPig(PigServer pigServer) throws ExecException, IOException { pigServer.setBatchOn(); pigServer.registerScript(script.getResource() .getInputStream()); List<ExecJob> executeBatch = pigServer.executeBatch(); long count = 0; for (ExecJob execJob : executeBatch) { count = (Long) execJob.getResults().next().get(0); LOG.debug( "Pig Script Exec job result for total events: {}", count); } return count; } });
/** * Set a variable for use in Pig Latin. This set * will then be present for <b>all subsequent</b> Pig pipelines defined in this * script. If you wish to set it for only a single Pig pipeline, use * set within that definition. * @param var variable to set * @param value to set it to */ public static void set(String var, String value) throws IOException { ScriptPigContext ctx = getScriptContext(); PigServer pigServer = new PigServer(ctx.getPigContext(), false); pigServer.getPigContext().getProperties().setProperty(var, value); }
static List<ExecJob> run(PigServer pig, Iterable<PigScript> scripts) throws ExecException, IOException { Assert.notNull(scripts, "at least one script is required"); if (!pig.isBatchOn()) { pig.setBatchOn(); } List<ExecJob> jobs = new ArrayList<ExecJob>(); pig.getPigContext().connect(); InputStream in = null; try { for (PigScript script : scripts) { try { in = script.getResource().getInputStream(); } catch (IOException ex) { throw new IllegalArgumentException("Cannot open script [" + script.getResource() + "]", ex); } // register the script (with fallback for old Pig versions) registerScript(pig, in, script.getArguments()); jobs.addAll(pig.executeBatch()); } } finally { IOUtils.closeStream(in); } return jobs; }
/** * Explain this pipeline. Results will be printed to stdout. * @throws IOException if explain fails. */ public void explain() throws IOException { if (queries.isEmpty()) { LOG.info("No bound query to explain"); return; } PigServer pigServer = new PigServer(scriptContext.getPigContext(), false); registerQueryForDiagnostics(pigServer, queries.get(0)); pigServer.explain(null, System.out); }