1 Star 2 Fork 1

小明/gmalll

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
default.sql 13.18 KB
一键复制 编辑 原始数据 按行查看 历史
小明 提交于 2024-05-26 22:01 . test
select *
from student;
create external table if not exists emp
(
empno int,
ename string,
job string,
mgr int,
hiredate string,
sal double,
comm double,
deptno int
)
row format delimited fields terminated by ',';
select *
from emp
limit 5;
create external table if not exists dept
(
deptno int,
dname string,
loc int
)
row format delimited fields terminated by ',';
select count(*)
from emp;
select count(*)
from dept;
select null == null;
select null = null;
select null <=> null;
select *
from emp
where comm is null;
select *
from emp
where deptno in (20, 30);
select *
from emp
where sal between 1000 and 2000;
select ename, sal
from emp
where ename like '%L%';
select ename, sal
from emp
where ename rlike '^(A|S).*';
select emp.deptno, avg(emp.sal)
from emp
group by deptno;
select emp.deptno, emp.job, max(emp.sal)
from emp
group by deptno, job;
SELECT deptno, COUNT(*)
FROM emp
GROUP BY deptno
HAVING COUNT(*) > 1;
drop table u1;
create table if not exists u1
(
id int,
name string
)
row format delimited fields terminated by ',';
drop table arr1;
create table if not exists arr1
(
name STRING,
scores ARRAY<INT>
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ' '
COLLECTION ITEMS TERMINATED BY ',';
LOAD DATA LOCAL INPATH '/opt/data/arr1.txt' INTO TABLE arr1;
select *
from arr1;
SELECT name, scores[0] AS first_score
FROM arr1;
SELECT name, scores[1] AS second_score
FROM arr1
WHERE SIZE(scores) >= 3;
SELECT name, SUM(score) AS total_score
FROM arr1
LATERAL VIEW EXPLODE(scores) exploded_table AS score
GROUP BY name;
SELECT name, scores[SIZE(scores) - 1] AS last_score
FROM arr1;
create table if not exists u2
(
id int,
name string
)
row format delimited fields terminated by ',';
load data local inpath '/opt/data/u1.txt' into table u1;
load data local inpath '/opt/data/u2.txt' into table u2;
select *
from u1
join u2 on u1.id = u2.id;
select *
from u1
left join u2 on u1.id = u2.id;
select *
from u1
right join u2 on u1.id = u2.id;
select *
from u1
full join u2 on u1.id = u2.id;
set hive.strict.checks.cartesian.product=flase;
select *
from u1,
u2;
set mapreduce.job.reduces;
select *
from emp
order by deptno;
select empno, ename, job, mgr, sal + nvl(comm, 0) salcomm, deptno
from emp
order by salcomm desc;
select empno, ename, job, mgr, sal + nvl(comm, 0) salcomm, deptno
from emp
order by salcomm, deptno desc;
select empno, ename, job, mgr, sal + nvl(comm, 0) salcomm
from emp
order by deptno, salcomm desc;
set mapreduce.job.reduces=1;
set mapreduce.job.reduces;
select *
from emp sort by sal desc;
insert overwrite local directory '/opt/data/output/sortsal'
select *
from emp sort by sal desc;
create database gmall;
set mapreduce.job.reduces=3;
insert overwrite local directory '/opt/module/data/output/distBy'
select empno, ename, job, deptno, sal + nvl(comm, 0) salcomm
from emp distribute by deptno sort by salcomm desc;
select empno, ename, job, deptno, sal + nvl(comm, 0) salcomm
from emp distribute by deptno sort by salcomm desc;
select *
from emp distribute by deptno sort by deptno;
select *
from emp cluster by deptno;
show functions;
select `current_date`();
select `current_timestamp`();
select from_unixtime(000000000);
select from_unixtime(000000000, 'yyyyMMdd');
select from_unixtime(000000000, 'yyyy-MM-dd HH:mm:ss');
select unix_timestamp('2019-09-15 14:23:00');
select datediff('2020-04-18', '2019-11-21');
select datediff('2019-11-21', '2020-04-18');
select dayofmonth(`current_date`());
select last_day(`current_date`());
select date_sub(current_date, dayofmonth(current_date) - 1);
select date_sub(current_date, dayofmonth(current_date) - 1);
select add_months(date_sub(current_date, dayofmonth(current_date) - 1), 1);
select to_date('2020-01-01');
select to_date('2020-01-01 12:12:12');
select date_format(current_timestamp(), 'yyyy-MM-dd HH:mm:ss');
select date_format(current_date(), 'yyyyMMdd');
select date_format('2020-06-01', 'yyyy-MM-dd HH:mm:ss');
select *, round(datediff(current_date, hiredate) / 365, 1) workingyears
from emp;
select lower("HELLO,WORD");
select upper(emp.ename), emp.ename
from emp;
select length(emp.ename), emp.ename
from emp;
select emp.empno || " " || emp.ename idname
from emp;
select concat(emp.empno, " ", emp.ename) idname
from emp;
select concat_ws('.', 'WWW', `array`('sias', 'com'));
select concat_ws(' ', emp.ename, emp.job)
from emp;
SELECT substr('www.sias.com', 5);
SELECT substr('www.sias.com', -5);
SELECT substr('www.sias.com', 5, 5);
select split("www.sias.com", "\\.");
select round(314.222);
select round(314.222, 2);
select round(314.222, -2);
select ceil(3.14524);
select `floor`(2.9000);
select emp.sal, `if`(emp.sal < 1500, 1, `if`(emp.sal < 3000, 2, 3))
from emp;
select sal, if(sal <= 1500, 1, if(sal <= 3000, 2, 3))
from emp;
select abs(-123);
select power(1234, 2);
select power(25, 0.5);
select sqrt(25);
select sin(90);
select emp.sal,
case
when emp.sal <= 1500 then 1
when emp.sal <= 3000 then 2
else 3 end sallevel
from emp;
select ename,
deptno,
case deptno
when 10 then 'accounting'
when 20 then 'research'
when 30 then 'sales'
else 'unknown' end deptname
from emp;
select emp.ename,
emp.deptno,
case
when emp.deptno = 10 then "accounting"
when emp.deptno = 20 then "research"
when deptno = 30 then "sales"
else "unknown" end deptname
from emp;
select explode(`array`('a', 'b', 'c')) as col;
select explode(`map`('a', 8, 'b', 88, 'c', 888));
select sin(90);
select emp.sal, coalesce(emp.comm, 0)
from emp;
select sum(emp.sal)
from emp;
select ename, sal, sum(emp.sal) salsum
from emp;
select emp.ename,
emp.sal,
sum(emp.sal) salsum,
concat(round(emp.sal / sum(emp.sal) over () * 100, 1) || '%') ratiosal
from emp;
select ename,
sal,
sum(sal) over () salsum,
concat(round(sal / sum(sal) over () * 100, 1) || '%') ratiosal
from emp;
drop table t2;
create table t2
(
cname string,
sname string,
score int
) row format delimited fields terminated by '\t';
load data local inpath '/opt/module/data/t2.txt' into table t2;
select cname,
sname,
score,
row_number() over (partition by cname order by score desc) rank1,
rank() over (partition by cname order by score desc) rank2,
dense_rank() over (partition by cname order by score desc) rank3
from t2;
select count(*)
from t2;
select cname, sname, score, rank
from (select cname,
sname,
score,
dense_rank() over (partition by cname order by score desc) rank
from t2) tmp
where rank <= 3;
create table userpv
(
cid string,
ctime date,
pv int
)
row format delimited fields terminated by ",";
Load data local inpath '/opt/module/data/t2.txt' into table userpv;
select cid,
ctime,
pv,
lag(pv) over (partition by cid order by ctime) lagpv,
lead(pv) over (partition by cid order by ctime) leadpv
from userpv;
create temporary function mynvl as "com.sias.hive.udf.nvl";
select mynvl(comm, 0)
from emp;
create table dept_partition
(
deptno int,
dname string,
loc string
) partitioned by (day string) row format delimited fields terminated by ",";
load data local inpath '/opt/data/dept_20200401.log' into table dept_partition partition (day = "20200401");
load data local inpath '/opt/data/dept_20200402.log' into table dept_partition partition (day = "20200402");
load data local inpath '/opt/data/dept_20200403.log' into table dept_partition partition (day = "20200403");
select count(*)
from stu_buck;
select *
from dept_partition
where day = '20200401'
union
select *
from dept_partition
where day = '20200402'
union
select *
from dept_partition
where day = '20200403';
show partitions dept_partition;
desc formatted dept_partition;
drop table stu_buck;
create table stu_buck
(
id int,
name string
) clustered by (id)
into 4 buckets
row format delimited fields terminated by ",";
desc formatted stu_buck;
SET hive.enforce.bucketing = true;
load data inpath "/user/hive/warehouse/stu_buck/student.txt" into table stu_buck;
select *
from stu_buck;
--------------------------------
drop table dimate_ori;
drop table dimdate;
drop table sale;
drop table sale_ori;
drop table saledetail;
drop table saledetail_ori;
create table dimate_ori
(
dt date,
yearmonth int,
year int,
month tinyint,
day tinyint,
week tinyint,
weeks tinyint,
quat tinyint,
tendays tinyint,
halfmonth tinyint
) row format delimited fields terminated by ",";
create table sale_ori
(
orderid string,
locationid string,
dt date
) row format delimited fields terminated by ",";
create table saledetail_ori
(
orderid string,
rownum int,
itemid string,
num int,
price double,
amount double
) row format delimited fields terminated by ",";
create table dimdate
(
dt date,
yearmonth int,
year smallint,
month tinyint,
day tinyint,
week tinyint,
weeks tinyint,
quat tinyint,
tendays tinyint,
halfmonth tinyint
) stored as orc;
create table sale
(
orderid string,
locationid string,
dt date
) stored as orc;
create table saledetail
(
orderid string,
rownum int,
goods string,
num int,
price double,
amount double
) stored as orc;
show tables;
load data local inpath "/opt/data/tbDate.dat" overwrite into table dimate_ori;
load data local inpath "/opt/data/tbSale.dat" overwrite into table sale_ori;
load data local inpath "/opt/data/tbSaleDetail.dat" overwrite into table saledetail_ori;
insert overwrite table dimdate
select *
from dimate_ori;
insert overwrite table sale
select *
from sale_ori;
insert overwrite table saledetail
select *
from saledetail_ori;
-- 1、按年统计销售额
select count(*)
from sale_ori;
set mapred.map.tasks.speculative.execution=true;
set mapred.reduce.tasks.speculative.execution=true;
select year(a.dt) year,
round(sum(b.amount) / 1000, 2) amounts
from sale a
left join saledetail b on a.orderid = b.orderid
group by year(a.dt);
use default;
select year(a.dt) year, round(sum(amount), 2) amounts
from sale a
left join saledetail b on a.orderid = b.orderid
group by year(a.dt);
--2销售金额在10W以上的订单
select orderid, round(sum(amount), 2) amounts
from saledetail
group by orderid
having sum(amount) > 100000;
--3每年销售额的差
select year,
amounts,
round(lag(amounts) over (order by year), 2) prioramount,
round(amounts - lag(amounts) over (order by year), 2) diff
from (select year(a.dt) year,
round(sum(amount), 2) amounts
from sale a
join saledetail b on a.orderid = b.orderid
group by year(a.dt)) c;
--4年度订单金额前10
select dt, orderids, amounts, rank
from (select dt,
orderids,
amounts,
dense_rank() over (partition by dt order by amounts desc ) rank
from (select year(b.dt) dt, a.orderid orderids, round(sum(a.amount), 2) amounts
from saledetail a
join sale b on a.orderid = b.orderid
group by year(b.dt), a.orderid) tmp1) tmp2
where rank <= 10;
--5季度订单金额前十
select year, quat, orderid, amount, rank
from (select year, quat, orderid, amount, dense_rank() over (partition by year,quat order by amount desc ) rank
from (select c.year,
c.quat,
a.orderid,
round(sum(amount), 2) amount
from sale a
join saledetail b on a.orderid = b.orderid
join dimdate c on a.dt = c.dt
group by c.year, c.quat, a.orderid) tmp) tmp1
where rank <= 10;
--6、求所有交易日中订单金额最高的前10位
select dt, orderid, amounts, rank
from (select dt,
orderid,
amounts,
dense_rank() over (order by amounts desc ) rank
from (
select a.dt, a.orderid, round(sum(b.amount), 2) amounts
from sale a
join saledetail b on a.orderid = b.orderid
group by a.dt, a.orderid
)tmp
) tmp1
where rank <= 10;
--7、每年度销售额最大的交易日
select year, dt, sumamount
from (select year(dt) year, dt, sumamount, dense_rank() over (partition by year(dt) order by sumamount desc ) rank
from (select a.dt, round(sum(b.amount), 2) sumamount
from sale a
join saledetail b on a.orderid = b.orderid
group by a.dt) tmp) tmp1
where rank = 1
order by year;
--8、年度最畅销的商品(即每年销售金额最大的商品)
select year, goods, amounts,rank
from (select year, goods, amounts, dense_rank() over (partition by year order by amounts desc ) rank
from (select year(a.dt) year, goods, round(sum(b.amount), 2) amounts
from sale a
join saledetail b on a.orderid = b.orderid
group by year(a.dt), goods) tmp) tmp1
where rank = 1;
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
SQL
1
https://gitee.com/xiaoming12139/gmalll.git
git@gitee.com:xiaoming12139/gmalll.git
xiaoming12139
gmalll
gmalll
master

搜索帮助