hive
查看本地的文件 #Can execute local commands within CLI, place a command in between ! and ; !cat data/text.txt;
文件的内容 user1,Funny Story,1343182026191 user2,Cool Deal,1343182133839 user4,Interesting Post,1343182154633 user5,Yet Another Blog,13431839394
建表 CREATE TABLE posts (user STRING, post STRING, time BIGINT) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE;
hive> show tables; hive> describe posts;
把文件加载到表中 LOAD DATA LOCAL INPATH 'text.txt' OVERWRITE INTO TABLE posts;
– Load data from HDFS location hive> LOAD DATA INPATH '/training/hive/user-posts.txt' > OVERWRITE INTO TABLE posts;
– Load data from a local file system hive> LOAD DATA LOCAL INPATH 'data/user-posts.txt' > OVERWRITE INTO TABLE posts;
如果在hdfs里存在大量的同类型格式的文件,可以通过已有文件来建表
hive> CREATE EXTERNAL TABLE posts > (user STRING, post STRING, time BIGINT) > ROW FORMAT DELIMITED > FIELDS TERMINATED BY ',' > STORED AS TEXTFILE > LOCATION '/training/hive/';
测试一下是否在hdfs里 hdfs dfs -cat /user/hive/warehouse/posts/text.txt
select count (1) from posts; select * from posts where user="user2"; select * from posts where time<=1343182133839 limit 2;
DROP TABLE posts; 将会在hdfs系统里执行删除 hdfs dfs -ls /user/hive/warehouse/
为了提高性能,还可以使用分区,如果设置了分区,用户 在插入数据时必须指定一个分区
CREATE TABLE posts1 (user STRING, post STRING, time BIGINT) PARTITIONED BY(country STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE;
describe posts1;
用户在插入数据时必须指定一个分区 LOAD DATA LOCAL INPATH 'text.txt' OVERWRITE INTO TABLE posts1 PARTITION(country='US');
LOAD DATA LOCAL INPATH 'text.txt' OVERWRITE INTO TABLE posts1 PARTITION(country='AUSTRALIA');
显示已有的分区 show partitions posts; hdfs dfs -ls -R /user/hive/warehouse/posts
查询 select * from posts where country='US' limit 10;
CREATE TABLE posts_likes (user STRING, post STRING, likes_count INT); INSERT OVERWRITE TABLE posts_likes > SELECT p.user, p.post, l.count > FROM posts p JOIN likes l ON (p.user = l.user);
Outer Join表 SELECT p.*, l.* FROM posts p LEFT OUTER JOIN likes l ON (p.user = l.user) limit 10; SELECT p.*, l.* FROM posts p RIGHT OUTER JOIN likes l ON (p.user = l.user) limit 10; SELECT p.*, l.* FROM posts p FULL OUTER JOIN likes l ON (p.user = l.user) limit 10; Resources
$ javac HiveCreateDb.java $ java HiveCreateDb
import java.sql.SQLException; import java.sql.Connection; import java.sql.ResultSet; import java.sql.Statement; import java.sql.DriverManager; public class HiveQLWhere { private static String driverName = "org.apache.hadoop.hive.jdbc.HiveDriver"; public static void main(String[] args) throws SQLException { // Register driver and create driver instance Class.forName(driverName); // get connection Connection con = DriverManager.getConnection("jdbc:hive://localhost:10000/userdb", "", ""); // create statement Statement stmt = con.createStatement(); // execute statement Resultset res = stmt.executeQuery("SELECT * FROM employee WHERE salary>30000;"); System.out.println("Result:"); System.out.println(" ID \t Name \t Salary \t Designation \t Dept "); while (res.next()) { System.out.println(res.getInt(1) + " " + res.getString(2) + " " + res.getDouble(3) + " " + res.getString(4) + " " + res.getString(5)); } con.close(); } } View Code
$ javac HiveQLWhere.java $ java HiveQLWhere