Spark SQL----内置函数Aggregate Functions

07-14 1267阅读

Spark SQL----内置函数Aggregate Functions

  • Aggregate Functions

    例子:

    -- any
    SELECT any(col) FROM VALUES (true), (false), (false) AS tab(col);
    +--------+
    |any(col)|
    +--------+
    |    true|
    +--------+
    SELECT any(col) FROM VALUES (NULL), (true), (false) AS tab(col);
    +--------+
    |any(col)|
    +--------+
    |    true|
    +--------+
    SELECT any(col) FROM VALUES (false), (false), (NULL) AS tab(col);
    +--------+
    |any(col)|
    +--------+
    |   false|
    +--------+
    -- any_value
    SELECT any_value(col) FROM VALUES (10), (5), (20) AS tab(col);
    +--------------+
    |any_value(col)|
    +--------------+
    |            10|
    +--------------+
    SELECT any_value(col) FROM VALUES (NULL), (5), (20) AS tab(col);
    +--------------+
    |any_value(col)|
    +--------------+
    |          NULL|
    +--------------+
    SELECT any_value(col, true) FROM VALUES (NULL), (5), (20) AS tab(col);
    +--------------+
    |any_value(col)|
    +--------------+
    |             5|
    +--------------+
    -- approx_count_distinct
    SELECT approx_count_distinct(col1) FROM VALUES (1), (1), (2), (2), (3) tab(col1);
    +---------------------------+
    |approx_count_distinct(col1)|
    +---------------------------+
    |                          3|
    +---------------------------+
    -- approx_percentile
    SELECT approx_percentile(col, array(0.5, 0.4, 0.1), 100) FROM VALUES (0), (1), (2), (10) AS tab(col);
    +-------------------------------------------------+
    |approx_percentile(col, array(0.5, 0.4, 0.1), 100)|
    +-------------------------------------------------+
    |                                        [1, 1, 0]|
    +-------------------------------------------------+
    SELECT approx_percentile(col, 0.5, 100) FROM VALUES (0), (6), (7), (9), (10) AS tab(col);
    +--------------------------------+
    |approx_percentile(col, 0.5, 100)|
    +--------------------------------+
    |                               7|
    +--------------------------------+
    SELECT approx_percentile(col, 0.5, 100) FROM VALUES (INTERVAL '0' MONTH), (INTERVAL '1' MONTH), (INTERVAL '2' MONTH), (INTERVAL '10' MONTH) AS tab(col);
    +--------------------------------+
    |approx_percentile(col, 0.5, 100)|
    +--------------------------------+
    |              INTERVAL '1' MONTH|
    +--------------------------------+
    SELECT approx_percentile(col, array(0.5, 0.7), 100) FROM VALUES (INTERVAL '0' SECOND), (INTERVAL '1' SECOND), (INTERVAL '2' SECOND), (INTERVAL '10' SECOND) AS tab(col);
    +--------------------------------------------+
    |approx_percentile(col, array(0.5, 0.7), 100)|
    +--------------------------------------------+
    |                        [INTERVAL '01' SE...|
    +--------------------------------------------+
    -- array_agg
    SELECT array_agg(col) FROM VALUES (1), (2), (1) AS tab(col);
    +-----------------+
    |collect_list(col)|
    +-----------------+
    |        [1, 2, 1]|
    +-----------------+
    -- avg
    SELECT avg(col) FROM VALUES (1), (2), (3) AS tab(col);
    +--------+
    |avg(col)|
    +--------+
    |     2.0|
    +--------+
    SELECT avg(col) FROM VALUES (1), (2), (NULL) AS tab(col);
    +--------+
    |avg(col)|
    +--------+
    |     1.5|
    +--------+
    -- bit_and
    SELECT bit_and(col) FROM VALUES (3), (5) AS tab(col);
    +------------+
    |bit_and(col)|
    +------------+
    |           1|
    +------------+
    -- bit_or
    SELECT bit_or(col) FROM VALUES (3), (5) AS tab(col);
    +-----------+
    |bit_or(col)|
    +-----------+
    |          7|
    +-----------+
    -- bit_xor
    SELECT bit_xor(col) FROM VALUES (3), (5) AS tab(col);
    +------------+
    |bit_xor(col)|
    +------------+
    |           6|
    +------------+
    -- bitmap_construct_agg
    SELECT substring(hex(bitmap_construct_agg(bitmap_bit_position(col))), 0, 6) FROM VALUES (1), (2), (3) AS tab(col);
    +--------------------------------------------------------------------+
    |substring(hex(bitmap_construct_agg(bitmap_bit_position(col))), 0, 6)|
    +--------------------------------------------------------------------+
    |               070000|
    +--------------------------------------------------------------------+
    SELECT substring(hex(bitmap_construct_agg(bitmap_bit_position(col))), 0, 6) FROM VALUES (1), (1), (1) AS tab(col);
    +--------------------------------------------------------------------+
    |substring(hex(bitmap_construct_agg(bitmap_bit_position(col))), 0, 6)|
    +--------------------------------------------------------------------+
    |               010000|
    +--------------------------------------------------------------------+
    -- bitmap_or_agg
    SELECT substring(hex(bitmap_or_agg(col)), 0, 6) FROM VALUES (X '10'), (X '20'), (X '40') AS tab(col);
    +----------------------------------------+
    |substring(hex(bitmap_or_agg(col)), 0, 6)|
    +----------------------------------------+
    |                                  700000|
    +----------------------------------------+
    SELECT substring(hex(bitmap_or_agg(col)), 0, 6) FROM VALUES (X '10'), (X '10'), (X '10') AS tab(col);
    +----------------------------------------+
    |substring(hex(bitmap_or_agg(col)), 0, 6)|
    +----------------------------------------+
    |                                  100000|
    +----------------------------------------+
    -- bool_and
    SELECT bool_and(col) FROM VALUES (true), (true), (true) AS tab(col);
    +-------------+
    |bool_and(col)|
    +-------------+
    |         true|
    +-------------+
    SELECT bool_and(col) FROM VALUES (NULL), (true), (true) AS tab(col);
    +-------------+
    |bool_and(col)|
    +-------------+
    |         true|
    +-------------+
    SELECT bool_and(col) FROM VALUES (true), (false), (true) AS tab(col);
    +-------------+
    |bool_and(col)|
    +-------------+
    |        false|
    +-------------+
    -- bool_or
    SELECT bool_or(col) FROM VALUES (true), (false), (false) AS tab(col);
    +------------+
    |bool_or(col)|
    +------------+
    |        true|
    +------------+
    SELECT bool_or(col) FROM VALUES (NULL), (true), (false) AS tab(col);
    +------------+
    |bool_or(col)|
    +------------+
    |        true|
    +------------+
    SELECT bool_or(col) FROM VALUES (false), (false), (NULL) AS tab(col);
    +------------+
    |bool_or(col)|
    +------------+
    |       false|
    +------------+
    -- collect_list
    SELECT collect_list(col) FROM VALUES (1), (2), (1) AS tab(col);
    +-----------------+
    |collect_list(col)|
    +-----------------+
    |        [1, 2, 1]|
    +-----------------+
    -- collect_set
    SELECT collect_set(col) FROM VALUES (1), (2), (1) AS tab(col);
    +----------------+
    |collect_set(col)|
    +----------------+
    |          [1, 2]|
    +----------------+
    -- corr
    SELECT corr(c1, c2) FROM VALUES (3, 2), (3, 3), (6, 4) as tab(c1, c2);
    +------------------+
    |      corr(c1, c2)|
    +------------------+
    |0.8660254037844387|
    +------------------+
    -- count
    SELECT count(*) FROM VALUES (NULL), (5), (5), (20) AS tab(col);
    +--------+
    |count(1)|
    +--------+
    |       4|
    +--------+
    SELECT count(col) FROM VALUES (NULL), (5), (5), (20) AS tab(col);
    +----------+
    |count(col)|
    +----------+
    |         3|
    +----------+
    SELECT count(DISTINCT col) FROM VALUES (NULL), (5), (5), (10) AS tab(col);
    +-------------------+
    |count(DISTINCT col)|
    +-------------------+
    |                  2|
    +-------------------+
    -- count_if
    SELECT count_if(col % 2 = 0) FROM VALUES (NULL), (0), (1), (2), (3) AS tab(col);
    +-------------------------+
    |count_if(((col % 2) = 0))|
    +-------------------------+
    |                        2|
    +-------------------------+
    SELECT count_if(col IS NULL) FROM VALUES (NULL), (0), (1), (2), (3) AS tab(col);
    +-----------------------+
    |count_if((col IS NULL))|
    +-----------------------+
    |                      1|
    +-----------------------+
    -- count_min_sketch
    SELECT hex(count_min_sketch(col, 0.5d, 0.5d, 1)) FROM VALUES (1), (2), (1) AS tab(col);
    +---------------------------------------+
    |hex(count_min_sketch(col, 0.5, 0.5, 1))|
    +---------------------------------------+
    |                   00000001000000000...|
    +---------------------------------------+
    -- covar_pop
    SELECT covar_pop(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2);
    +------------------+
    | covar_pop(c1, c2)|
    +------------------+
    |0.6666666666666666|
    +------------------+
    -- covar_samp
    SELECT covar_samp(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2);
    +------------------+
    |covar_samp(c1, c2)|
    +------------------+
    |               1.0|
    +------------------+
    -- every
    SELECT every(col) FROM VALUES (true), (true), (true) AS tab(col);
    +----------+
    |every(col)|
    +----------+
    |      true|
    +----------+
    SELECT every(col) FROM VALUES (NULL), (true), (true) AS tab(col);
    +----------+
    |every(col)|
    +----------+
    |      true|
    +----------+
    SELECT every(col) FROM VALUES (true), (false), (true) AS tab(col);
    +----------+
    |every(col)|
    +----------+
    |     false|
    +----------+
    -- first
    SELECT first(col) FROM VALUES (10), (5), (20) AS tab(col);
    +----------+
    |first(col)|
    +----------+
    |        10|
    +----------+
    SELECT first(col) FROM VALUES (NULL), (5), (20) AS tab(col);
    +----------+
    |first(col)|
    +----------+
    |      NULL|
    +----------+
    SELECT first(col, true) FROM VALUES (NULL), (5), (20) AS tab(col);
    +----------+
    |first(col)|
    +----------+
    |         5|
    +----------+
    -- first_value
    SELECT first_value(col) FROM VALUES (10), (5), (20) AS tab(col);
    +----------------+
    |first_value(col)|
    +----------------+
    |              10|
    +----------------+
    SELECT first_value(col) FROM VALUES (NULL), (5), (20) AS tab(col);
    +----------------+
    |first_value(col)|
    +----------------+
    |            NULL|
    +----------------+
    SELECT first_value(col, true) FROM VALUES (NULL), (5), (20) AS tab(col);
    +----------------+
    |first_value(col)|
    +----------------+
    |               5|
    +----------------+
    -- grouping
    SELECT name, grouping(name), sum(age) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY cube(name);
    +-----+--------------+--------+
    | name|grouping(name)|sum(age)|
    +-----+--------------+--------+
    | NULL|             1|       7|
    |Alice|             0|       2|
    |  Bob|             0|       5|
    +-----+--------------+--------+
    -- grouping_id
    SELECT name, grouping_id(), sum(age), avg(height) FROM VALUES (2, 'Alice', 165), (5, 'Bob', 180) people(age, name, height) GROUP BY cube(name, height);
    +-----+-------------+--------+-----------+
    | name|grouping_id()|sum(age)|avg(height)|
    +-----+-------------+--------+-----------+
    | NULL|            2|       2|      165.0|
    |Alice|            0|       2|      165.0|
    |Alice|            1|       2|      165.0|
    | NULL|            3|       7|      172.5|
    |  Bob|            1|       5|      180.0|
    |  Bob|            0|       5|      180.0|
    | NULL|            2|       5|      180.0|
    +-----+-------------+--------+-----------+
    -- histogram_numeric
    SELECT histogram_numeric(col, 5) FROM VALUES (0), (1), (2), (10) AS tab(col);
    +-------------------------+
    |histogram_numeric(col, 5)|
    +-------------------------+
    |     [{0, 1.0}, {1, 1....|
    +-------------------------+
    -- hll_sketch_agg
    SELECT hll_sketch_estimate(hll_sketch_agg(col, 12)) FROM VALUES (1), (1), (2), (2), (3) tab(col);
    +--------------------------------------------+
    |hll_sketch_estimate(hll_sketch_agg(col, 12))|
    +--------------------------------------------+
    |                                           3|
    +--------------------------------------------+
    -- hll_union_agg
    SELECT hll_sketch_estimate(hll_union_agg(sketch, true)) FROM (SELECT hll_sketch_agg(col) as sketch FROM VALUES (1) tab(col) UNION ALL SELECT hll_sketch_agg(col, 20) as sketch FROM VALUES (1) tab(col));
    +------------------------------------------------+
    |hll_sketch_estimate(hll_union_agg(sketch, true))|
    +------------------------------------------------+
    |1|
    +------------------------------------------------+
    -- kurtosis
    SELECT kurtosis(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col);
    +-------------------+
    |      kurtosis(col)|
    +-------------------+
    |-0.7014368047529618|
    +-------------------+
    SELECT kurtosis(col) FROM VALUES (1), (10), (100), (10), (1) as tab(col);
    +-------------------+
    |      kurtosis(col)|
    +-------------------+
    |0.19432323191698986|
    +-------------------+
    -- last
    SELECT last(col) FROM VALUES (10), (5), (20) AS tab(col);
    +---------+
    |last(col)|
    +---------+
    |       20|
    +---------+
    SELECT last(col) FROM VALUES (10), (5), (NULL) AS tab(col);
    +---------+
    |last(col)|
    +---------+
    |     NULL|
    +---------+
    SELECT last(col, true) FROM VALUES (10), (5), (NULL) AS tab(col);
    +---------+
    |last(col)|
    +---------+
    |        5|
    +---------+
    -- last_value
    SELECT last_value(col) FROM VALUES (10), (5), (20) AS tab(col);
    +---------------+
    |last_value(col)|
    +---------------+
    |             20|
    +---------------+
    SELECT last_value(col) FROM VALUES (10), (5), (NULL) AS tab(col);
    +---------------+
    |last_value(col)|
    +---------------+
    |           NULL|
    +---------------+
    SELECT last_value(col, true) FROM VALUES (10), (5), (NULL) AS tab(col);
    +---------------+
    |last_value(col)|
    +---------------+
    |              5|
    +---------------+
    -- max
    SELECT max(col) FROM VALUES (10), (50), (20) AS tab(col);
    +--------+
    |max(col)|
    +--------+
    |      50|
    +--------+
    -- max_by
    SELECT max_by(x, y) FROM VALUES ('a', 10), ('b', 50), ('c', 20) AS tab(x, y);
    +------------+
    |max_by(x, y)|
    +------------+
    |           b|
    +------------+
    -- mean
    SELECT mean(col) FROM VALUES (1), (2), (3) AS tab(col);
    +---------+
    |mean(col)|
    +---------+
    |      2.0|
    +---------+
    SELECT mean(col) FROM VALUES (1), (2), (NULL) AS tab(col);
    +---------+
    |mean(col)|
    +---------+
    |      1.5|
    +---------+
    -- median
    SELECT median(col) FROM VALUES (0), (10) AS tab(col);
    +-----------+
    |median(col)|
    +-----------+
    |        5.0|
    +-----------+
    SELECT median(col) FROM VALUES (INTERVAL '0' MONTH), (INTERVAL '10' MONTH) AS tab(col);
    +--------------------+
    |         median(col)|
    +--------------------+
    |INTERVAL '0-5' YE...|
    +--------------------+
    -- min
    SELECT min(col) FROM VALUES (10), (-1), (20) AS tab(col);
    +--------+
    |min(col)|
    +--------+
    |      -1|
    +--------+
    -- min_by
    SELECT min_by(x, y) FROM VALUES ('a', 10), ('b', 50), ('c', 20) AS tab(x, y);
    +------------+
    |min_by(x, y)|
    +------------+
    |           a|
    +------------+
    -- mode
    SELECT mode(col) FROM VALUES (0), (10), (10) AS tab(col);
    +---------+
    |mode(col)|
    +---------+
    |       10|
    +---------+
    SELECT mode(col) FROM VALUES (INTERVAL '0' MONTH), (INTERVAL '10' MONTH), (INTERVAL '10' MONTH) AS tab(col);
    +-------------------+
    |          mode(col)|
    +-------------------+
    |INTERVAL '10' MONTH|
    +-------------------+
    SELECT mode(col) FROM VALUES (0), (10), (10), (null), (null), (null) AS tab(col);
    +---------+
    |mode(col)|
    +---------+
    |       10|
    +---------+
    -- percentile
    SELECT percentile(col, 0.3) FROM VALUES (0), (10) AS tab(col);
    +-----------------------+
    |percentile(col, 0.3, 1)|
    +-----------------------+
    |                    3.0|
    +-----------------------+
    SELECT percentile(col, array(0.25, 0.75)) FROM VALUES (0), (10) AS tab(col);
    +-------------------------------------+
    |percentile(col, array(0.25, 0.75), 1)|
    +-------------------------------------+
    |                           [2.5, 7.5]|
    +-------------------------------------+
    SELECT percentile(col, 0.5) FROM VALUES (INTERVAL '0' MONTH), (INTERVAL '10' MONTH) AS tab(col);
    +-----------------------+
    |percentile(col, 0.5, 1)|
    +-----------------------+
    |   INTERVAL '0-5' YE...|
    +-----------------------+
    SELECT percentile(col, array(0.2, 0.5)) FROM VALUES (INTERVAL '0' SECOND), (INTERVAL '10' SECOND) AS tab(col);
    +-----------------------------------+
    |percentile(col, array(0.2, 0.5), 1)|
    +-----------------------------------+
    |               [INTERVAL '0 00:0...'|
    +-----------------------------------+
    -- percentile_approx
    SELECT percentile_approx(col, array(0.5, 0.4, 0.1), 100) FROM VALUES (0), (1), (2), (10) AS tab(col);
    +-------------------------------------------------+
    |percentile_approx(col, array(0.5, 0.4, 0.1), 100)|
    +-------------------------------------------------+
    |                                        [1, 1, 0]|
    +-------------------------------------------------+
    SELECT percentile_approx(col, 0.5, 100) FROM VALUES (0), (6), (7), (9), (10) AS tab(col);
    +--------------------------------+
    |percentile_approx(col, 0.5, 100)|
    +--------------------------------+
    |                               7|
    +--------------------------------+
    SELECT percentile_approx(col, 0.5, 100) FROM VALUES (INTERVAL '0' MONTH), (INTERVAL '1' MONTH), (INTERVAL '2' MONTH), (INTERVAL '10' MONTH) AS tab(col);
    +--------------------------------+
    |percentile_approx(col, 0.5, 100)|
    +--------------------------------+
    |              INTERVAL '1' MONTH|
    +--------------------------------+
    SELECT percentile_approx(col, array(0.5, 0.7), 100) FROM VALUES (INTERVAL '0' SECOND), (INTERVAL '1' SECOND), (INTERVAL '2' SECOND), (INTERVAL '10' SECOND) AS tab(col);
    +--------------------------------------------+
    |percentile_approx(col, array(0.5, 0.7), 100)|
    +--------------------------------------------+
    |                        [INTERVAL '01' SE...|
    +--------------------------------------------+
    -- regr_avgx
    SELECT regr_avgx(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x);
    +---------------+
    |regr_avgx(y, x)|
    +---------------+
    |           2.75|
    +---------------+
    SELECT regr_avgx(y, x) FROM VALUES (1, null) AS tab(y, x);
    +---------------+
    |regr_avgx(y, x)|
    +---------------+
    |           NULL|
    +---------------+
    SELECT regr_avgx(y, x) FROM VALUES (null, 1) AS tab(y, x);
    +---------------+
    |regr_avgx(y, x)|
    +---------------+
    |           NULL|
    +---------------+
    SELECT regr_avgx(y, x) FROM VALUES (1, 2), (2, null), (2, 3), (2, 4) AS tab(y, x);
    +---------------+
    |regr_avgx(y, x)|
    +---------------+
    |            3.0|
    +---------------+
    SELECT regr_avgx(y, x) FROM VALUES (1, 2), (2, null), (null, 3), (2, 4) AS tab(y, x);
    +---------------+
    |regr_avgx(y, x)|
    +---------------+
    |            3.0|
    +---------------+
    -- regr_avgy
    SELECT regr_avgy(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x);
    +---------------+
    |regr_avgy(y, x)|
    +---------------+
    |           1.75|
    +---------------+
    SELECT regr_avgy(y, x) FROM VALUES (1, null) AS tab(y, x);
    +---------------+
    |regr_avgy(y, x)|
    +---------------+
    |           NULL|
    +---------------+
    SELECT regr_avgy(y, x) FROM VALUES (null, 1) AS tab(y, x);
    +---------------+
    |regr_avgy(y, x)|
    +---------------+
    |           NULL|
    +---------------+
    SELECT regr_avgy(y, x) FROM VALUES (1, 2), (2, null), (2, 3), (2, 4) AS tab(y, x);
    +------------------+
    |   regr_avgy(y, x)|
    +------------------+
    |1.6666666666666667|
    +------------------+
    SELECT regr_avgy(y, x) FROM VALUES (1, 2), (2, null), (null, 3), (2, 4) AS tab(y, x);
    +---------------+
    |regr_avgy(y, x)|
    +---------------+
    |            1.5|
    +---------------+
    -- regr_count
    SELECT regr_count(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x);
    +----------------+
    |regr_count(y, x)|
    +----------------+
    |               4|
    +----------------+
    SELECT regr_count(y, x) FROM VALUES (1, 2), (2, null), (2, 3), (2, 4) AS tab(y, x);
    +----------------+
    |regr_count(y, x)|
    +----------------+
    |               3|
    +----------------+
    SELECT regr_count(y, x) FROM VALUES (1, 2), (2, null), (null, 3), (2, 4) AS tab(y, x);
    +----------------+
    |regr_count(y, x)|
    +----------------+
    |               2|
    +----------------+
    -- regr_intercept
    SELECT regr_intercept(y, x) FROM VALUES (1,1), (2,2), (3,3) AS tab(y, x);
    +--------------------+
    |regr_intercept(y, x)|
    +--------------------+
    |                 0.0|
    +--------------------+
    SELECT regr_intercept(y, x) FROM VALUES (1, null) AS tab(y, x);
    +--------------------+
    |regr_intercept(y, x)|
    +--------------------+
    |                NULL|
    +--------------------+
    SELECT regr_intercept(y, x) FROM VALUES (null, 1) AS tab(y, x);
    +--------------------+
    |regr_intercept(y, x)|
    +--------------------+
    |                NULL|
    +--------------------+
    -- regr_r2
    SELECT regr_r2(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x);
    +------------------+
    |     regr_r2(y, x)|
    +------------------+
    |0.2727272727272726|
    +------------------+
    SELECT regr_r2(y, x) FROM VALUES (1, null) AS tab(y, x);
    +-------------+
    |regr_r2(y, x)|
    +-------------+
    |         NULL|
    +-------------+
    SELECT regr_r2(y, x) FROM VALUES (null, 1) AS tab(y, x);
    +-------------+
    |regr_r2(y, x)|
    +-------------+
    |         NULL|
    +-------------+
    SELECT regr_r2(y, x) FROM VALUES (1, 2), (2, null), (2, 3), (2, 4) AS tab(y, x);
    +------------------+
    |     regr_r2(y, x)|
    +------------------+
    |0.7500000000000001|
    +------------------+
    SELECT regr_r2(y, x) FROM VALUES (1, 2), (2, null), (null, 3), (2, 4) AS tab(y, x);
    +-------------+
    |regr_r2(y, x)|
    +-------------+
    |          1.0|
    +-------------+
    -- regr_slope
    SELECT regr_slope(y, x) FROM VALUES (1,1), (2,2), (3,3) AS tab(y, x);
    +----------------+
    |regr_slope(y, x)|
    +----------------+
    |             1.0|
    +----------------+
    SELECT regr_slope(y, x) FROM VALUES (1, null) AS tab(y, x);
    +----------------+
    |regr_slope(y, x)|
    +----------------+
    |            NULL|
    +----------------+
    SELECT regr_slope(y, x) FROM VALUES (null, 1) AS tab(y, x);
    +----------------+
    |regr_slope(y, x)|
    +----------------+
    |            NULL|
    +----------------+
    -- regr_sxx
    SELECT regr_sxx(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x);
    +------------------+
    |    regr_sxx(y, x)|
    +------------------+
    |2.7499999999999996|
    +------------------+
    SELECT regr_sxx(y, x) FROM VALUES (1, 2), (2, null), (2, 3), (2, 4) AS tab(y, x);
    +--------------+
    |regr_sxx(y, x)|
    +--------------+
    |           2.0|
    +--------------+
    SELECT regr_sxx(y, x) FROM VALUES (1, 2), (2, null), (null, 3), (2, 4) AS tab(y, x);
    +--------------+
    |regr_sxx(y, x)|
    +--------------+
    |           2.0|
    +--------------+
    -- regr_sxy
    SELECT regr_sxy(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x);
    +------------------+
    |    regr_sxy(y, x)|
    +------------------+
    |0.7499999999999998|
    +------------------+
    SELECT regr_sxy(y, x) FROM VALUES (1, 2), (2, null), (2, 3), (2, 4) AS tab(y, x);
    +--------------+
    |regr_sxy(y, x)|
    +--------------+
    |           1.0|
    +--------------+
    SELECT regr_sxy(y, x) FROM VALUES (1, 2), (2, null), (null, 3), (2, 4) AS tab(y, x);
    +--------------+
    |regr_sxy(y, x)|
    +--------------+
    |           1.0|
    +--------------+
    -- regr_syy
    SELECT regr_syy(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x);
    +------------------+
    |    regr_syy(y, x)|
    +------------------+
    |0.7499999999999999|
    +------------------+
    SELECT regr_syy(y, x) FROM VALUES (1, 2), (2, null), (2, 3), (2, 4) AS tab(y, x);
    +------------------+
    |    regr_syy(y, x)|
    +------------------+
    |0.6666666666666666|
    +------------------+
    SELECT regr_syy(y, x) FROM VALUES (1, 2), (2, null), (null, 3), (2, 4) AS tab(y, x);
    +--------------+
    |regr_syy(y, x)|
    +--------------+
    |           0.5|
    +--------------+
    -- skewness
    SELECT skewness(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col);
    +------------------+
    |     skewness(col)|
    +------------------+
    |1.1135657469022013|
    +------------------+
    SELECT skewness(col) FROM VALUES (-1000), (-100), (10), (20) AS tab(col);
    +-------------------+
    |      skewness(col)|
    +-------------------+
    |-1.1135657469022011|
    +-------------------+
    -- some
    SELECT some(col) FROM VALUES (true), (false), (false) AS tab(col);
    +---------+
    |some(col)|
    +---------+
    |     true|
    +---------+
    SELECT some(col) FROM VALUES (NULL), (true), (false) AS tab(col);
    +---------+
    |some(col)|
    +---------+
    |     true|
    +---------+
    SELECT some(col) FROM VALUES (false), (false), (NULL) AS tab(col);
    +---------+
    |some(col)|
    +---------+
    |    false|
    +---------+
    -- std
    SELECT std(col) FROM VALUES (1), (2), (3) AS tab(col);
    +--------+
    |std(col)|
    +--------+
    |     1.0|
    +--------+
    -- stddev
    SELECT stddev(col) FROM VALUES (1), (2), (3) AS tab(col);
    +-----------+
    |stddev(col)|
    +-----------+
    |        1.0|
    +-----------+
    -- stddev_pop
    SELECT stddev_pop(col) FROM VALUES (1), (2), (3) AS tab(col);
    +-----------------+
    |  stddev_pop(col)|
    +-----------------+
    |0.816496580927726|
    +-----------------+
    -- stddev_samp
    SELECT stddev_samp(col) FROM VALUES (1), (2), (3) AS tab(col);
    +----------------+
    |stddev_samp(col)|
    +----------------+
    |             1.0|
    +----------------+
    -- sum
    SELECT sum(col) FROM VALUES (5), (10), (15) AS tab(col);
    +--------+
    |sum(col)|
    +--------+
    |      30|
    +--------+
    SELECT sum(col) FROM VALUES (NULL), (10), (15) AS tab(col);
    +--------+
    |sum(col)|
    +--------+
    |      25|
    +--------+
    SELECT sum(col) FROM VALUES (NULL), (NULL) AS tab(col);
    +--------+
    |sum(col)|
    +--------+
    |    NULL|
    +--------+
    -- try_avg
    SELECT try_avg(col) FROM VALUES (1), (2), (3) AS tab(col);
    +------------+
    |try_avg(col)|
    +------------+
    |         2.0|
    +------------+
    SELECT try_avg(col) FROM VALUES (1), (2), (NULL) AS tab(col);
    +------------+
    |try_avg(col)|
    +------------+
    |         1.5|
    +------------+
    SELECT try_avg(col) FROM VALUES (interval '2147483647 months'), (interval '1 months') AS tab(col);
    +------------+
    |try_avg(col)|
    +------------+
    |        NULL|
    +------------+
    -- try_sum
    SELECT try_sum(col) FROM VALUES (5), (10), (15) AS tab(col);
    +------------+
    |try_sum(col)|
    +------------+
    |          30|
    +------------+
    SELECT try_sum(col) FROM VALUES (NULL), (10), (15) AS tab(col);
    +------------+
    |try_sum(col)|
    +------------+
    |          25|
    +------------+
    SELECT try_sum(col) FROM VALUES (NULL), (NULL) AS tab(col);
    +------------+
    |try_sum(col)|
    +------------+
    |        NULL|
    +------------+
    SELECT try_sum(col) FROM VALUES (9223372036854775807L), (1L) AS tab(col);
    +------------+
    |try_sum(col)|
    +------------+
    |        NULL|
    +------------+
    -- var_pop
    SELECT var_pop(col) FROM VALUES (1), (2), (3) AS tab(col);
    +------------------+
    |      var_pop(col)|
    +------------------+
    |0.6666666666666666|
    +------------------+
    -- var_samp
    SELECT var_samp(col) FROM VALUES (1), (2), (3) AS tab(col);
    +-------------+
    |var_samp(col)|
    +-------------+
    |          1.0|
    +-------------+
    -- variance
    SELECT variance(col) FROM VALUES (1), (2), (3) AS tab(col);
    +-------------+
    |variance(col)|
    +-------------+
    |          1.0|
    +-------------+
    
    Spark SQL----内置函数Aggregate Functions
    (图片来源网络,侵删)
VPS购买请点击我

文章版权声明:除非注明,否则均为主机测评原创文章,转载或复制请以超链接形式并注明出处。

目录[+]