hive的特殊函数named_struct、inline及特殊类型map<string, array<string>>

spark-sql> desc test2;
userid int NULL
date string NULL
list_01 array<string> NULL
target string NULL
group_num int NULL
Time taken: 0.526 seconds, Fetched 5 row(s)

spark-sql> select * from test2;
1 20220303 ["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 1:20220303 1
1 20220202 ["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 1:20220202 1
1 20220202 ["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 1:20220202 1
2 20220202 ["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 2:20220202 2
3 20220303 ["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 3:20220303 2
3 20220303 ["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 3:20220303 2
3 20220303 ["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 3:20220303 3
3 20220303 ["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"] 3:20220303 3

spark-sql> select named_struct('a',target, 'b',list_01) from test2 limit 1;
{"a":"1:20220303","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]}

1、构建 类型 struct<key:string,value:array<string>
spark-sql> 
    drop table if exists test3;
    create table test3 as 
    select 
        named_struct('a',target, 'b',list_01) as content_shi_group,
        group_num
    from 
        test2;
{"a":"1:20220303","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]} 1
{"a":"1:20220202","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]} 1
{"a":"1:20220202","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]} 1
{"a":"2:20220202","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]} 2
{"a":"3:20220303","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]} 2
{"a":"3:20220303","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]} 2
{"a":"3:20220303","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]} 3
{"a":"3:20220303","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]} 3

2、按group_num聚合,生成 类型array<struct<key:string,value:array<string>>
spark-sql> 
    drop table if exists test4;
    create table test4 as 
    select 
        group_num, collect_list(content_shi_group) as list_struct
    from test3 
    group by group_num;
1 [{"a":"1:20220303","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]},{"a":"1:20220202","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]},{"a":"1:20220202","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]}]
3 [{"a":"3:20220303","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]},{"a":"3:20220303","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]}]
2 [{"a":"2:20220202","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]},{"a":"3:20220303","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]},{"a":"3:20220303","b":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]}]

3、类型 array<struct<key:string,value:array<string>> 转 map<string,array<string>>
spark-sql> 
    drop table if exists test5;
    create table test5 as 
    select 
        group_num, map(k1,v1) as map_list
    from test4 
    lateral view inline(list_struct) t1 as k1,v1;
1 {"1:20220303":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]}
1 {"1:20220202":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]}
1 {"1:20220202":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]}
3 {"3:20220303":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]}
3 {"3:20220303":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]}
2 {"2:20220202":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]}
2 {"3:20220303":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]}
2 {"3:20220303":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]}

spark-sql> select inline(list_struct) from test4 where group_num=1;
1:20220303 ["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]
1:20220202 ["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]
1:20220202 ["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]

4、直接生成hive map<string, array<String>>类型
spark-sql> 
    drop table if exists test3_02;
    create table test3_02 as 
    select 
        map(target, list_01) as content_group,
        group_num
    from 
    test2 ;
{"1:20220303":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]} 1
{"1:20220202":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]} 1
{"1:20220202":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]} 1
{"2:20220202":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]} 2
{"3:20220303":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]} 2
{"3:20220303":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]} 2
{"3:20220303":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]} 3
{"3:20220303":["1:20220202","1:20220202","2:20220202","3:20220303","3:20220303","3:20220303","3:20220303","1:20220303"]} 3
spark-sql> desc test3_02;
content_group map<string,array<string>> NULL
group_num int NULL