Hive User Defined Table Generating Functions
Step 1 - Add these jar files to your java project.
Myudtf.java
Step 2 - Compile and create a jar file of your java project. Creating a jar file is left to you.
Step 3 - Create a phn_num.txt file
Step 4 - Add these following lines to phn_num.txt file. Save and close.
Step 5 - Change the directory to /usr/local/hive/bin
Step 6 - Enter into hive shell
Step 7 - Create a table phone, load phn_num.txt data into the table and verify. Save and close.
Step 8 - Add jar file in distributed cache, create a function and execute udtf function.
Step 1 - Add these jar files to your java project.
hive-exe*.jar
$HIVE_HOME/lib/*.jar $HADOOP_HOME/share/hadoop/mapreduce/*.jar $HADOOP_HOME/share/hadoop/common/*.jar
import java.util.ArrayList; import java.util.Iterator; import java.util.List; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; public class Myudtf extends GenericUDTF { private PrimitiveObjectInspector stringOI = null; @Override public StructObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException { if (args.length != 1) { throw new UDFArgumentException( "NameParserGenericUDTF() takes exactly one argument"); } if (args[0].getCategory() != ObjectInspector.Category.PRIMITIVE && ((PrimitiveObjectInspector) args[0]).getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING) { throw new UDFArgumentException( "NameParserGenericUDTF() takes a string as a parameter"); } // input inspectors stringOI = (PrimitiveObjectInspector) args[0]; // output inspectors -- an object with three fields! List<String> fieldNames = new ArrayList<String>(2); List<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>(2); fieldNames.add("id"); fieldNames.add("phone_number"); fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); return ObjectInspectorFactory.getStandardStructObjectInspector( fieldNames, fieldOIs); } public ArrayList<Object[]> processInputRecord(String id) { ArrayList<Object[]> result = new ArrayList<Object[]>(); // ignoring null or empty input if (id == null || id.isEmpty()) { return result; } String[] tokens = id.split("\\s+"); if (tokens.length == 2) { result.add(new Object[] { tokens[0], tokens[1] }); } else if (tokens.length == 3) { result.add(new Object[] { tokens[0], tokens[1] }); result.add(new Object[] { tokens[0], tokens[2] }); } return result; } @Override public void process(Object[] record) throws HiveException { final String id = stringOI.getPrimitiveJavaObject(record[0]).toString(); ArrayList<Object[]> results = processInputRecord(id); Iterator<Object[]> it = results.iterator(); while (it.hasNext()) { Object[] r = it.next(); forward(r); } } @Override public void close() throws HiveException { // do nothing } }
Step 3 - Create a phn_num.txt file
phn_num.txt
123,phone1,phone2 123,phone3 124,phone1,phone2 125,phone1,phone2 125,phone3 126,phone1 126,phone2,phone3
$ cd $HIVE_HOME/bin
$ hive
hive> CREATE TABLE phone(id String) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\n';
hive> LOAD DATA LOCAL INPATH '/home/hduser/Desktop/HIVE/phn_num.txt' OVERWRITE INTO TABLE phone;
hive> SELECT * FROM phone;
hive> ADD JAR /home/hduser/Desktop/HIVE/UDTF.jar;
hive> CREATE TEMPORARY FUNCTION fun2 AS 'Myudtf';
hive> SELECT fun2(id) FROM phone;
Comments
Post a Comment