Hive 炸裂函数和侧视图(explode+lateral view)

-- explode函数(炸裂函数,UDTF(表生成函数))
-- explode只能炸开array和map
/*
NBA球队获得总冠军年份

Chicago Bulls,1991|1992|1993|1996|1997|1998
San Antonio Spurs,1999|2003|2005|2007|2014
Golden State Warriors,1947|1956|1975|2015
Boston Celtics,1957|1959|1960|1961|1962|1963|1964|1965|1966|1968|1969|1974|1976|1981|1984|1986|2008
L.A. Lakers,1949|1950|1952|1953|1954|1972|1980|1982|1985|1987|1988|2000|2001|2002|2009|2010
Miami Heat,2006|2012|2013
Philadelphia 76ers,1955|1967|1983
Detroit Pistons,1989|1990|2004
Houston Rockets,1994|1995
New York Knicks,1970|1973
*/
create table the_nba_championship
(
    team_name         string,
    championship_year array<int>
) row format delimited fields terminated by ','
    collection items terminated by '|';

load data local inpath '/root/hivedata/The_NBA_Championship.txt'
    overwrite into table the_nba_championship;

select *
from the_nba_championship;

-- 使用explode函数对championship_year进行拆分,俗称炸开
select explode(championship_year)
from the_nba_championship;

-- 使用lateral view(侧视图)和explode结合查询球队和球队夺冠年份
select a.team_name, b.year
from the_nba_championship a lateral view explode(a.championship_year) b as year
order by b.year;

-- 使用join关联
/*with b as (select explode(championship_year) year
           from the_nba_championship)
select a.team_name, b.year
from the_nba_championship a
         join b on array_contains(a.championship_year, b.year)
order by b.year;*/

-- 统计每个球队获得总冠军的次数,且按照次数倒序排序
select a.team_name, count(1) num
from the_nba_championship a lateral view explode(a.championship_year) b as year
group by a.team_name
order by num;

版权声明:本文为m0_58535137原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
THE END
< <上一篇
下一篇>>