Hello All,
When running a PIG query from a java program on the host mac machine, i get the below error. I have a hdps virtual machine running on my mac. It it because my mac user is different than my vm user ? The java code is also given below:
package com.redhat.aml.pig;
import java.io.IOException;
import org.apache.pig.PigServer;
import org.apache.pig.backend.executionengine.ExecException;
import java.util.Properties;
import org.apache.pig.ExecType;
public class GenerateCustomerProfile {
public static void main(String[] args) throws ExecException, IOException {
// TODO Auto-generated method stub
Properties props = new Properties();
props.setProperty(“fs.default.name”, “hdfs://localhost:8020″);
System.out.println(“Step 1″);
//props.setProperty(“mapred.job.tracker”, “<jobtracker-hostname>:<port>”);
//props.setProperty(“mapred.job.tracker”, “<jobtracker-hostname>:<port>”);
PigServer pigServer = new PigServer(ExecType.MAPREDUCE,props);
System.out.println(“Step 2″);
try {
runMyQuery(pigServer, “/user/aml-demo/trans.txt”, “/user/aml-demo/account.txt”);
}
catch (IOException e) {
e.printStackTrace();
}
}
public static void runMyQuery(PigServer pigServer, String trans, String account) throws IOException {
System.out.println(“Step 3″);
pigServer.registerQuery(“transaction = load ‘” + trans + “‘ using PigStorage(‘,’) as (TransactionID:int,AccountNo:int,FirstName:chararray,LastName:chararray,Amount:int,TransactionType:chararray,FromZipCode:chararray,ToZipCode:chararray,IPAddress:chararray,DeviceLocation:chararray,Country:chararray,State:chararray);”);
System.out.println(“Step 4″);
pigServer.registerQuery(“account = load ‘” + account + “‘ using PigStorage(‘,’) as (AccountNo:int, FirstName:chararray, LastName:chararray, Street:chararray, City:chararray, State:chararray, ZipCode:chararray, Occupation:chararray, Age:int, Sex:chararray,MaritalStatus:chararray, AccountType:chararray);”);
System.out.println(“Step 5″);
pigServer.registerQuery(“C = foreach account generate AccountNo as id, ZipCode,Occupation;”);
System.out.println(“Step 6″);
pigServer.registerQuery(“jnd = join transaction by AccountNo, C by id;”);
System.out.println(“Step 7″);
pigServer.registerQuery(“D = group jnd by (C::ZipCode,transaction::TransactionType,C::Occupation);”);
System.out.println(“Step 8″);
pigServer.registerQuery(“E = foreach D generate flatten(group) as (zip,Tranaction,occupation),SUM($1.Amount) as total_spent,COUNT(jnd) as numOfTransactions,AVG($1.Amount) as avg;”);
System.out.println(“Step 9″);
pigServer.openIterator(“E”);
System.out.println(“Step 10″);
//pigServer.store(“E”, “/user/aml-demo/idout”);
}
}
org.apache.pig.impl.logicalLayer.FrontendException: ERROR 1066: Unable to open iterator for alias E
at org.apache.pig.PigServer.openIterator(PigServer.java:935)
at com.redhat.aml.pig.GenerateCustomerProfile.runMyQuery(GenerateCustomerProfile.java:42)
at com.redhat.aml.pig.GenerateCustomerProfile.main(GenerateCustomerProfile.java:21)
Caused by: org.apache.pig.PigException: ERROR 1002: Unable to store alias E
at org.apache.pig.PigServer.storeEx(PigServer.java:1038)
at org.apache.pig.PigServer.store(PigServer.java:997)
at org.apache.pig.PigServer.openIterator(PigServer.java:910)
… 2 more
Caused by: org.apache.pig.backend.hadoop.executionengine.JobCreationException: ERROR 2017: Internal error creating job configuration.
at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler.getJob(JobControlCompiler.java:1010)
at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler.compile(JobControlCompiler.java:323)
at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher.launchPig(MapReduceLauncher.java:196)
at org.apache.pig.backend.hadoop.executionengine.HExecutionEngine.launchPig(HExecutionEngine.java:304)
at org.apache.pig.PigServer.launchPlan(PigServer.java:1390)
at org.apache.pig.PigServer.executeCompiledLogicalPlan(PigServer.java:1375)
at org.apache.pig.PigServer.storeEx(PigServer.java:1034)
… 4 more
Caused by: org.apache.hadoop.ipc.RemoteException(java.io.IOException): File /tmp/temp680828053/tmp-1259370403/pig-0.15.0.jar could only be replicated to 0 nodes instead of minReplication (=1). There are 1 datanode(s) running and 1 node(s) are excluded in this operation.
at org.apache.hadoop.hdfs.server.blockmanagement.BlockManager.chooseTarget4NewBlock(BlockManager.java:1551)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getNewBlockTargets(FSNamesystem.java:3117)