Pig : UDFs

Pig UDFS ----------   UDF ---> user defined functions.      adv:        i)  custom functionalities.       ii)  reusability.  Pig UDFs can be developed by     java    python     ruby     c++     javascript     perl step1:    Develop udf code. step2:    export into jar file    ex: /home/cloudera/Desktop/pigs.jar step3:    register jar file into pig.  grunt> register Desktop/pigs.jar step4:    create temporory function  for udf class.  grunt> define  ucase pig.analytics.ConvertUpper(); step5:   calling the function:  grunt>e =  foreach emp generate       id, ucase(name) as name, sal,         ucase(sex) as sex, dno;   package  pig.analytics; import ..... --> ucase(name) ---> upper conversion public class ConvertUpper extends EvalFunc   {      public String exec(Tuple v)       throws IOException      {         String str = (String)v.get(0);         String res = str.toUpperCase();         retrun res;                  }    } -------------------------- $ cat > samp 100,230,400 123,100,90 140,560,430 $ hadoop fs -copyFromLocal samp piglab grunt> s = load 'piglab/samp'             using PigStorage(',')           as (a:int, b:int, c:int); package pig.analytics; .... public class RMax extends EvalFunc {     public  Integer exec(Tuple v)      throws IOException     {       int a =(Integer) v.get(0);       int b =(Integer) v.get(1);       int c =(Integer) v.get(2);       int big = a; // 10,20,3       if (a>big) big = a;       if (b>big) big = b;       if (c>big) big = c;       return  big;     }  } export into jar : Desktop/pigs.jar grunt> register Desktop/pigs.jar; grunt> define rmax pig.analytics.RMax(); grunt> res = foreach s generate *,                    rmax(*) as max; --------------------------------  package pig.analytics;  .......  public class RowMax    extends EvalFunc  {     public Integer exec(Tuple v) throws IOException     {      List lobs = v.getAll() ;      int max = 0;      int cnt =0;     // -20,-3,-40      for(Object o : lobs)      {        cnt++;        int val = (Integer)o;        if(cnt==1) max = val;        max = Math.max(max, val);      }      return max;     }  } export in to jar : Desktop/pigs.jar grunt> register Desktop/pigs.jar grunt> define dynmax pig.analytics.RowMax(); grunt> r = foreach s generate *, dynmax(*) as m; ----------------------------------------- emp = load 'piglab/emp' using PigStorage(',')    as (id:int, name:chararray, sal:int,      sex:chararray, dno:int); grade() dname() gender() package pig.analytics; public class Gender extends EvalFunc {  public String exec(Tuple v) throws IOException  {      String s =(String) v.get(0);      s = s.toUpperCase();      if (s.matches("F"))        s = "Female";      else        s = "Male";      return s;  } } ----------------- package pig.analytics; public class Grade extends EvalFunc {  public String exec(Tuple v) throws IOException  {      String sal =(Integer) v.get(0);      String grade;      if (sal>=70000)        grade="A";      else if (sal>=50000)           grade="B";          else if (sal>=30000)                grade="C";               else                grade="D";      return grade;  } } ------ package pig.analytics; public class DeptName extends EvalFunc {  public String exec(Tuple v) throws IOException  {     int dno = (Integer)v.get(0);     String dname;     switch (dno){     case 11 :           dname = "Marketing";           break;     case 12 :           dname = "HR";           break;     case 13 :           dname = "Finance";           break;     default:           dname = "Others";      }     return dname;    } } --------------------------------- --------------------------- export into jar : Desktop/pigs.jar; grunt> register Desktop/pigs.jar; grunt> define gender pig.analytics.Gender(); grunt> define grade pig.analytics.Grade(); grunt> define dept pig.analytics.DeptName(); grunt> res = foreach emp generate     id, ucase(name) as name,      sal, grade(sal) as grade,     gender(sex) as sex,     dept(dno) as dname ; ---------------------------------                


Pig UDFS
----------

  UDF ---> user defined functions.
 
   adv:
       i)  custom functionalities.
      ii)  reusability.

 Pig UDFs can be developed by
    java
   python
    ruby
    c++
    javascript
    perl

step1:
   Develop udf code.

step2:
   export into jar file
   ex: /home/cloudera/Desktop/pigs.jar

step3:
   register jar file into pig.
 grunt> register Desktop/pigs.jar

step4:
   create temporory function  for udf class.

 grunt> define  ucase pig.analytics.ConvertUpper();

step5:
  calling the function:

 grunt>e =  foreach emp generate
      id, ucase(name) as name, sal,
        ucase(sex) as sex, dno;

 
package  pig.analytics;
import .....

--> ucase(name) ---> upper conversion

public class ConvertUpper extends EvalFunc
  {
     public String exec(Tuple v)
      throws IOException
     {
        String str = (String)v.get(0);
        String res = str.toUpperCase();
        retrun res;
           
     }

 
 }
--------------------------
$ cat > samp
100,230,400
123,100,90
140,560,430

$ hadoop fs -copyFromLocal samp piglab

grunt> s = load 'piglab/samp'
            using PigStorage(',')
          as (a:int, b:int, c:int);



package pig.analytics;
....
public class RMax extends EvalFunc
{
    public  Integer exec(Tuple v)
     throws IOException
    {
      int a =(Integer) v.get(0);
      int b =(Integer) v.get(1);
      int c =(Integer) v.get(2);

      int big = a; // 10,20,3
      if (a>big) big = a;
      if (b>big) big = b;
      if (c>big) big = c;
      return  big;
    }
 }

export into jar : Desktop/pigs.jar

grunt> register Desktop/pigs.jar;

grunt> define rmax pig.analytics.RMax();

grunt> res = foreach s generate *,
                   rmax(*) as max;

--------------------------------

 package pig.analytics;
 .......
 public class RowMax
   extends EvalFunc
 {
    public Integer exec(Tuple v) throws IOException
    {
     List lobs = v.getAll() ;
     int max = 0;
     int cnt =0;
    // -20,-3,-40
     for(Object o : lobs)
     {
       cnt++;
       int val = (Integer)o;
       if(cnt==1) max = val;
       max = Math.max(max, val);
     }
     return max;
    }
 }

export in to jar : Desktop/pigs.jar
grunt> register Desktop/pigs.jar
grunt> define dynmax pig.analytics.RowMax();
grunt> r = foreach s generate *, dynmax(*) as m;
-----------------------------------------

emp = load 'piglab/emp' using PigStorage(',')
   as (id:int, name:chararray, sal:int,
     sex:chararray, dno:int);

grade()
dname()
gender()


package pig.analytics;
public class Gender extends EvalFunc
{
 public String exec(Tuple v) throws IOException
 {
     String s =(String) v.get(0);
     s = s.toUpperCase();
     if (s.matches("F"))
       s = "Female";
     else
       s = "Male";
     return s;
 }
}
-----------------

package pig.analytics;
public class Grade extends EvalFunc
{
 public String exec(Tuple v) throws IOException
 {
     String sal =(Integer) v.get(0);
     String grade;
     if (sal>=70000)
       grade="A";
     else if (sal>=50000)
          grade="B";
         else if (sal>=30000)
               grade="C";
              else
               grade="D";
     return grade;
 }
}
------
package pig.analytics;
public class DeptName extends EvalFunc
{
 public String exec(Tuple v) throws IOException
 {
    int dno = (Integer)v.get(0);
    String dname;
    switch (dno){
    case 11 :
          dname = "Marketing";
          break;
    case 12 :
          dname = "HR";
          break;
    case 13 :
          dname = "Finance";
          break;
    default:
          dname = "Others";
     }
    return dname;  
 }
}
---------------------------------

---------------------------
export into jar : Desktop/pigs.jar;
grunt> register Desktop/pigs.jar;
grunt> define gender pig.analytics.Gender();
grunt> define grade pig.analytics.Grade();
grunt> define dept pig.analytics.DeptName();

grunt> res = foreach emp generate
    id, ucase(name) as name,
     sal, grade(sal) as grade,
    gender(sex) as sex,
    dept(dno) as dname ;
---------------------------------











     
         







What's Your Reaction?

like

dislike

love

funny

angry

sad

wow

This site uses cookies. By continuing to browse the site you are agreeing to our use of cookies.

ca-pub-4239506253673884