master

分支 (1)

管理

管理

master

gmalll
/
default.sql

select *
from student;
create external table if not exists emp
(
    empno    int,
    ename    string,
    job      string,
    mgr      int,
    hiredate string,
    sal      double,
    comm     double,
    deptno   int
)
    row format delimited fields terminated by ',';
select *
from emp
limit 5;
create external table if not exists dept
(
    deptno int,
    dname  string,
    loc    int
)
    row format delimited fields terminated by ',';
select count(*)
from emp;
select count(*)
from dept;
select null == null;
select null = null;
select null <=> null;
select *
from emp
where comm is null;
select *
from emp
where deptno in (20, 30);
select *
from emp
where sal between 1000 and 2000;
select ename, sal
from emp
where ename like '%L%';
select ename, sal
from emp
where ename rlike '^(A|S).*';
select emp.deptno, avg(emp.sal)
from emp
group by deptno;
select emp.deptno, emp.job, max(emp.sal)
from emp
group by deptno, job;
SELECT deptno, COUNT(*)
FROM emp
GROUP BY deptno
HAVING COUNT(*) > 1;
drop table u1;
create table if not exists u1
(
    id   int,
    name string
)
    row format delimited fields terminated by ',';
drop table arr1;
create table if not exists arr1
(
    name   STRING,
    scores ARRAY<INT>
)
    ROW FORMAT DELIMITED
        FIELDS TERMINATED BY ' '
        COLLECTION ITEMS TERMINATED BY ',';
LOAD DATA LOCAL INPATH '/opt/data/arr1.txt' INTO TABLE arr1;
select *
from arr1;
SELECT name, scores[0] AS first_score
FROM arr1;
SELECT name, scores[1] AS second_score
FROM arr1
WHERE SIZE(scores) >= 3;
SELECT name, SUM(score) AS total_score
FROM arr1
         LATERAL VIEW EXPLODE(scores) exploded_table AS score
GROUP BY name;
SELECT name, scores[SIZE(scores) - 1] AS last_score
FROM arr1;
create table if not exists u2
(
    id   int,
    name string
)
    row format delimited fields terminated by ',';
load data local inpath '/opt/data/u1.txt' into table u1;
load data local inpath '/opt/data/u2.txt' into table u2;
select *
from u1
         join u2 on u1.id = u2.id;
select *
from u1
         left join u2 on u1.id = u2.id;
select *
from u1
         right join u2 on u1.id = u2.id;
select *
from u1
         full join u2 on u1.id = u2.id;
set hive.strict.checks.cartesian.product=flase;
select *
from u1,
     u2;
set mapreduce.job.reduces;
select *
from emp
order by deptno;
select empno, ename, job, mgr, sal + nvl(comm, 0) salcomm, deptno
from emp
order by salcomm desc;
select empno, ename, job, mgr, sal + nvl(comm, 0) salcomm, deptno
from emp
order by salcomm, deptno desc;
select empno, ename, job, mgr, sal + nvl(comm, 0) salcomm
from emp
order by deptno, salcomm desc;
set mapreduce.job.reduces=1;
set mapreduce.job.reduces;
select *
from emp sort by sal desc;
insert overwrite local directory '/opt/data/output/sortsal'
select *
from emp sort by sal desc;
create database gmall;
set mapreduce.job.reduces=3;
insert overwrite local directory '/opt/module/data/output/distBy'
select empno, ename, job, deptno, sal + nvl(comm, 0) salcomm
from emp distribute by deptno sort by salcomm desc;
select empno, ename, job, deptno, sal + nvl(comm, 0) salcomm
from emp distribute by deptno sort by salcomm desc;
select *
from emp distribute by deptno sort by deptno;
select *
from emp cluster by deptno;
show functions;
select `current_date`();
select `current_timestamp`();
select from_unixtime(000000000);
select from_unixtime(000000000, 'yyyyMMdd');
select from_unixtime(000000000, 'yyyy-MM-dd HH:mm:ss');
select unix_timestamp('2019-09-15 14:23:00');
select datediff('2020-04-18', '2019-11-21');
select datediff('2019-11-21', '2020-04-18');
select dayofmonth(`current_date`());
select last_day(`current_date`());
select date_sub(current_date, dayofmonth(current_date) - 1);
select date_sub(current_date, dayofmonth(current_date) - 1);
select add_months(date_sub(current_date, dayofmonth(current_date) - 1), 1);
select to_date('2020-01-01');
select to_date('2020-01-01 12:12:12');
select date_format(current_timestamp(), 'yyyy-MM-dd HH:mm:ss');
select date_format(current_date(), 'yyyyMMdd');
select date_format('2020-06-01', 'yyyy-MM-dd HH:mm:ss');
select *, round(datediff(current_date, hiredate) / 365, 1) workingyears
from emp;
select lower("HELLO,WORD");
select upper(emp.ename), emp.ename
from emp;
select length(emp.ename), emp.ename
from emp;
select emp.empno || " " || emp.ename idname
from emp;
select concat(emp.empno, " ", emp.ename) idname
from emp;
select concat_ws('.', 'WWW', `array`('sias', 'com'));
select concat_ws(' ', emp.ename, emp.job)
from emp;
SELECT substr('www.sias.com', 5);
SELECT substr('www.sias.com', -5);
SELECT substr('www.sias.com', 5, 5);
select split("www.sias.com", "\\.");
select round(314.222);
select round(314.222, 2);
select round(314.222, -2);
select ceil(3.14524);
select `floor`(2.9000);
select emp.sal, `if`(emp.sal < 1500, 1, `if`(emp.sal < 3000, 2, 3))
from emp;
select sal, if(sal <= 1500, 1, if(sal <= 3000, 2, 3))
from emp;
select abs(-123);
select power(1234, 2);
select power(25, 0.5);
select sqrt(25);
select sin(90);
select emp.sal,
       case
           when emp.sal <= 1500 then 1
           when emp.sal <= 3000 then 2
           else 3 end sallevel
from emp;
select ename,
       deptno,
       case deptno
           when 10 then 'accounting'
           when 20 then 'research'
           when 30 then 'sales'
           else 'unknown' end deptname
from emp;
select emp.ename,
       emp.deptno,
       case
           when emp.deptno = 10 then "accounting"
           when emp.deptno = 20 then "research"
           when deptno = 30 then "sales"
           else "unknown" end deptname
from emp;
select explode(`array`('a', 'b', 'c')) as col;
select explode(`map`('a', 8, 'b', 88, 'c', 888));
select sin(90);
select emp.sal, coalesce(emp.comm, 0)
from emp;
select sum(emp.sal)
from emp;
select ename, sal, sum(emp.sal) salsum
from emp;
select emp.ename,
       emp.sal,
       sum(emp.sal)                                                  salsum,
       concat(round(emp.sal / sum(emp.sal) over () * 100, 1) || '%') ratiosal
from emp;
select ename,
       sal,
       sum(sal) over ()                                      salsum,
       concat(round(sal / sum(sal) over () * 100, 1) || '%') ratiosal
from emp;
drop table t2;
create table t2
(
    cname string,
    sname string,
    score int
) row format delimited fields terminated by '\t';
load data local inpath '/opt/module/data/t2.txt' into table t2;
select cname,
       sname,
       score,
       row_number() over (partition by cname order by score desc) rank1,
       rank() over (partition by cname order by score desc)       rank2,
       dense_rank() over (partition by cname order by score desc) rank3
from t2;
select count(*)
from t2;
select cname, sname, score, rank
from (select cname,
             sname,
             score,
             dense_rank() over (partition by cname order by score desc) rank
      from t2) tmp
where rank <= 3;
create table userpv
(
    cid   string,
    ctime date,
    pv    int
)
    row format delimited fields terminated by ",";
Load data local inpath '/opt/module/data/t2.txt' into table userpv;
select cid,
       ctime,
       pv,
       lag(pv) over (partition by cid order by ctime)  lagpv,
       lead(pv) over (partition by cid order by ctime) leadpv
from userpv;
create temporary function mynvl as "com.sias.hive.udf.nvl";
select mynvl(comm, 0)
from emp;


create table dept_partition
(
    deptno int,
    dname  string,
    loc    string
) partitioned by (day string) row format delimited fields terminated by ",";
load data local inpath '/opt/data/dept_20200401.log' into table dept_partition partition (day = "20200401");
load data local inpath '/opt/data/dept_20200402.log' into table dept_partition partition (day = "20200402");
load data local inpath '/opt/data/dept_20200403.log' into table dept_partition partition (day = "20200403");
select count(*)
from stu_buck;
select *
from dept_partition
where day = '20200401'
union
select *
from dept_partition
where day = '20200402'
union
select *
from dept_partition
where day = '20200403';
show partitions dept_partition;
desc formatted dept_partition;
drop table stu_buck;
create table stu_buck
(
    id   int,
    name string
) clustered by (id)
    into 4 buckets
    row format delimited fields terminated by ",";
desc formatted stu_buck;
SET hive.enforce.bucketing = true;
load data inpath "/user/hive/warehouse/stu_buck/student.txt" into table stu_buck;
select *
from stu_buck;

--------------------------------
drop table dimate_ori;
drop table dimdate;
drop table sale;
drop table sale_ori;
drop table saledetail;
drop table saledetail_ori;
create table dimate_ori
(
    dt        date,
    yearmonth int,
    year      int,
    month     tinyint,
    day       tinyint,
    week      tinyint,
    weeks     tinyint,
    quat      tinyint,
    tendays   tinyint,
    halfmonth tinyint
) row format delimited fields terminated by ",";
create table sale_ori
(
    orderid    string,
    locationid string,
    dt         date
) row format delimited fields terminated by ",";
create table saledetail_ori
(
    orderid string,
    rownum  int,
    itemid  string,
    num     int,
    price   double,
    amount  double
) row format delimited fields terminated by ",";
create table dimdate
(
    dt        date,
    yearmonth int,
    year      smallint,
    month     tinyint,
    day       tinyint,
    week      tinyint,
    weeks     tinyint,
    quat      tinyint,
    tendays   tinyint,
    halfmonth tinyint
) stored as orc;

create table sale
(
    orderid    string,
    locationid string,
    dt         date
) stored as orc;

create table saledetail
(
    orderid string,
    rownum  int,
    goods   string,
    num     int,
    price   double,
    amount  double
) stored as orc;
show tables;
load data local inpath "/opt/data/tbDate.dat" overwrite into table dimate_ori;
load data local inpath "/opt/data/tbSale.dat" overwrite into table sale_ori;
load data local inpath "/opt/data/tbSaleDetail.dat" overwrite into table saledetail_ori;
insert overwrite table dimdate
select *
from dimate_ori;
insert overwrite table sale
select *
from sale_ori;
insert overwrite table saledetail
select *
from saledetail_ori;
-- 1、按年统计销售额
select count(*)
from sale_ori;
set mapred.map.tasks.speculative.execution=true;
set mapred.reduce.tasks.speculative.execution=true;
select year(a.dt)                     year,
       round(sum(b.amount) / 1000, 2) amounts
from sale a
         left join saledetail b on a.orderid = b.orderid
group by year(a.dt);
use default;
select year(a.dt) year, round(sum(amount), 2) amounts
from sale a
         left join saledetail b on a.orderid = b.orderid
group by year(a.dt);
--2销售金额在10W以上的订单
select orderid, round(sum(amount), 2) amounts
from saledetail
group by orderid
having sum(amount) > 100000;
--3每年销售额的差
select year,
       amounts,
       round(lag(amounts) over (order by year), 2)           prioramount,
       round(amounts - lag(amounts) over (order by year), 2) diff
from (select year(a.dt)            year,
             round(sum(amount), 2) amounts
      from sale a
               join saledetail b on a.orderid = b.orderid
      group by year(a.dt)) c;
--4年度订单金额前10
select dt, orderids, amounts, rank
from (select dt,
             orderids,
             amounts,
             dense_rank() over (partition by dt order by amounts desc ) rank
      from (select year(b.dt) dt, a.orderid orderids, round(sum(a.amount), 2) amounts
            from saledetail a
                     join sale b on a.orderid = b.orderid
            group by year(b.dt), a.orderid) tmp1) tmp2
where rank <= 10;
--5季度订单金额前十
select year, quat, orderid, amount, rank
from (select year, quat, orderid, amount, dense_rank() over (partition by year,quat order by amount desc ) rank
      from (select c.year,
                   c.quat,
                   a.orderid,
                   round(sum(amount), 2) amount
            from sale a
                     join saledetail b on a.orderid = b.orderid
                     join dimdate c on a.dt = c.dt
            group by c.year, c.quat, a.orderid) tmp) tmp1
where rank <= 10;


--6、求所有交易日中订单金额最高的前10位
select dt, orderid, amounts, rank
from (select dt,
             orderid,
             amounts,
             dense_rank() over (order by amounts desc ) rank
      from (
            select a.dt, a.orderid, round(sum(b.amount), 2) amounts
            from sale a
                      join saledetail b on a.orderid = b.orderid
             group by a.dt, a.orderid
           )tmp
      ) tmp1
where rank <= 10;

--7、每年度销售额最大的交易日
select year, dt, sumamount
from (select year(dt) year, dt, sumamount, dense_rank() over (partition by year(dt) order by sumamount desc ) rank
      from (select a.dt, round(sum(b.amount), 2) sumamount
            from sale a
                     join saledetail b on a.orderid = b.orderid
            group by a.dt) tmp) tmp1

where rank = 1
order by year;
--8、年度最畅销的商品(即每年销售金额最大的商品)
select year, goods, amounts,rank
from (select year, goods, amounts, dense_rank() over (partition by year order by amounts desc ) rank
      from (select year(a.dt) year, goods, round(sum(b.amount), 2) amounts
            from sale a
                     join saledetail b on a.orderid = b.orderid
            group by year(a.dt), goods) tmp) tmp1
where rank = 1;