制作网站的成本规划,网站设计分类,宝安做网站哪家好,宾馆酒店网站建设方案招聘网站—Hive数据分析
第1关#xff1a;统计最热门的十种职业#xff08;招聘人数最多#xff09;
#进入hive
hive#在hive中创建数据库 mydb
create database mydb;#使用数据库 mydb
use mydb;#创建表 recruitcleaned 并使用/t分割字段
create table re…招聘网站—Hive数据分析
第1关统计最热门的十种职业招聘人数最多
#进入hive
hive#在hive中创建数据库 mydb
create database mydb;#使用数据库 mydb
use mydb;#创建表 recruitcleaned 并使用/t分割字段
create table recruitcleaned(job_name string,city string, salary string, workingExp string, edu string, company_name string, company_size string, welfare string, skill string) row format delimited fields terminated by \t stored as textfile;#将本地清洗后的数据导入recruitcleaned中
load data local inpath /data/workspace/myshixun/data/advertise.txt into table recruitcleaned;#创建 recruitersnumbers 表存放最热门的十种职业的数据
create table recruitersnumbers(job_name string, count int) row format delimited fields terminated by \t stored as textfile;#将结果存入至 recruitersnumbers 表中
insert overwrite table recruitersnumbers select job_name,count(*) count from recruitcleaned group by job_name order by count desc limit 10;第2关分析学历相同的情况下每种职位的平均薪资
#创建 edulevelsalary 表
create table edulevelsalary(edu string, job_name string,salary double) row format delimited fields terminated by \t stored as textfile;#将结果存入至 edulevelsalary 表中
insert overwrite table edulevelsalary select edu,job_name,avg(salary)salary from recruitcleaned group by edu,job_name;第3关统计各个职业要求具备的技能及其出现的次数
#创建 jobskill 表
create table jobskill(job_name string, skill string,count int) row format delimited fields terminated by \t stored as textfile;#将结果存入至 edulevelsalary 表中
insert overwrite table jobskill select job_name,skill, count(*) count from (select job_name,subview.* from recruitcleaned lateral view explode(split(skill,\\|)) subview as skill)t1 group by job_name,skill order by job_name,count;第4关分析每种职业在各个薪资等级需要招聘的人数
#创建 jobsalarylevel 表
create table jobsalarylevel(job_name string, salary string,count int) row format delimited fields terminated by \t stored as textfile;#将结果存入至 edulevelsalary 表中
insert overwrite table jobsalarylevel select job_name,salary,count(*) count from (select job_name,case when 0recruitcleaned.salary and recruitcleaned.salary 10 then primary when 10recruitcleaned.salary and recruitcleaned.salary 20 then intermediate when recruitcleaned.salary20 then senior else other END as salary from recruitcleaned ) t1 group by job_name,salary;