Oracle PL/SQL Tutorial/Analytical Functions/CUME DIST

Материал из SQL эксперт
Перейти к: навигация, поиск

CUME_DIST() calculates the position of a specified value relative to a group of values.

CUME_DIST() is short for cumulative distribution.



SQL> -- create demo table
SQL> create table Employee(
  2    ID                 VARCHAR2(4 BYTE)         NOT NULL,
  3    First_Name         VARCHAR2(10 BYTE),
  4    Last_Name          VARCHAR2(10 BYTE),
  5    Start_Date         DATE,
  6    End_Date           DATE,
  7    Salary             Number(8,2),
  8    City               VARCHAR2(10 BYTE),
  9    Description        VARCHAR2(15 BYTE)
 10  )
 11  /
Table created.
SQL>
SQL> -- prepare data
SQL> insert into Employee(ID,  First_Name, Last_Name, Start_Date,                     End_Date,                       Salary,  City,       Description)
  2               values ("01","Jason",    "Martin",  to_date("19960725","YYYYMMDD"), to_date("20060725","YYYYMMDD"), 1234.56, "Toronto",  "Programmer")
  3  /
1 row created.
SQL> insert into Employee(ID,  First_Name, Last_Name, Start_Date,                     End_Date,                       Salary,  City,       Description)
  2                values("02","Alison",   "Mathews", to_date("19760321","YYYYMMDD"), to_date("19860221","YYYYMMDD"), 6661.78, "Vancouver","Tester")
  3  /
1 row created.
SQL> insert into Employee(ID,  First_Name, Last_Name, Start_Date,                     End_Date,                       Salary,  City,       Description)
  2                values("03","James",    "Smith",   to_date("19781212","YYYYMMDD"), to_date("19900315","YYYYMMDD"), 6544.78, "Vancouver","Tester")
  3  /
1 row created.
SQL> insert into Employee(ID,  First_Name, Last_Name, Start_Date,                     End_Date,                       Salary,  City,       Description)
  2                values("04","Celia",    "Rice",    to_date("19821024","YYYYMMDD"), to_date("19990421","YYYYMMDD"), 2344.78, "Vancouver","Manager")
  3  /
1 row created.
SQL> insert into Employee(ID,  First_Name, Last_Name, Start_Date,                     End_Date,                       Salary,  City,       Description)
  2                values("05","Robert",   "Black",   to_date("19840115","YYYYMMDD"), to_date("19980808","YYYYMMDD"), 2334.78, "Vancouver","Tester")
  3  /
1 row created.
SQL> insert into Employee(ID,  First_Name, Last_Name, Start_Date,                     End_Date,                       Salary, City,        Description)
  2                values("06","Linda",    "Green",   to_date("19870730","YYYYMMDD"), to_date("19960104","YYYYMMDD"), 4322.78,"New York",  "Tester")
  3  /
1 row created.
SQL> insert into Employee(ID,  First_Name, Last_Name, Start_Date,                     End_Date,                       Salary, City,        Description)
  2                values("07","David",    "Larry",   to_date("19901231","YYYYMMDD"), to_date("19980212","YYYYMMDD"), 7897.78,"New York",  "Manager")
  3  /
1 row created.
SQL> insert into Employee(ID,  First_Name, Last_Name, Start_Date,                     End_Date,                       Salary, City,        Description)
  2                values("08","James",    "Cat",     to_date("19960917","YYYYMMDD"), to_date("20020415","YYYYMMDD"), 1232.78,"Vancouver", "Tester")
  3  /
1 row created.
SQL>
SQL>
SQL>
SQL> -- display data in the table
SQL> select * from Employee
  2  /
ID   FIRST_NAME LAST_NAME  START_DAT END_DATE      SALARY CITY       DESCRIPTION
---- ---------- ---------- --------- --------- ---------- ---------- ---------------
01   Jason      Martin     25-JUL-96 25-JUL-06    1234.56 Toronto    Programmer
02   Alison     Mathews    21-MAR-76 21-FEB-86    6661.78 Vancouver  Tester
03   James      Smith      12-DEC-78 15-MAR-90    6544.78 Vancouver  Tester
04   Celia      Rice       24-OCT-82 21-APR-99    2344.78 Vancouver  Manager
05   Robert     Black      15-JAN-84 08-AUG-98    2334.78 Vancouver  Tester
06   Linda      Green      30-JUL-87 04-JAN-96    4322.78 New York   Tester
07   David      Larry      31-DEC-90 12-FEB-98    7897.78 New York   Manager
08   James      Cat        17-SEP-96 15-APR-02    1232.78 Vancouver  Tester
8 rows selected.
SQL>
SQL>
SQL> SELECT
  2   city, SUM(salary),
  3   CUME_DIST() OVER (ORDER BY SUM(salary) DESC) AS cume_dist,
  4   PERCENT_RANK() OVER (ORDER BY SUM(salary) DESC) AS percent_rank
  5  FROM employee
  6  GROUP BY city
  7  ORDER BY city;
CITY       SUM(SALARY)  CUME_DIST PERCENT_RANK
---------- ----------- ---------- ------------
New York      12220.56 .666666667           .5
Toronto        1234.56          1            1
Vancouver      19118.9 .333333333            0
SQL>
SQL>
SQL> -- clean the table
SQL> drop table Employee
  2  /
Table dropped.
SQL>


CUME_DIST functions

The syntax for the CUME_ DIST functions:

CUME_DIST() OVER ([PARTITION clause] ORDER clause)

The PARTITION clause is optional.



SQL> -- create demo table
SQL> create table Employee(
  2    ID                 VARCHAR2(4 BYTE)         NOT NULL,
  3    First_Name         VARCHAR2(10 BYTE),
  4    Last_Name          VARCHAR2(10 BYTE),
  5    Start_Date         DATE,
  6    End_Date           DATE,
  7    Salary             Number(8,2),
  8    City               VARCHAR2(10 BYTE),
  9    Description        VARCHAR2(15 BYTE)
 10  )
 11  /
Table created.
SQL>
SQL> -- prepare data
SQL> insert into Employee(ID,  First_Name, Last_Name, Start_Date,                     End_Date,                       Salary,  City,       Description)
  2               values ("01","Jason",    "Martin",  to_date("19960725","YYYYMMDD"), to_date("20060725","YYYYMMDD"), 1234.56, "Toronto",  "Programmer")
  3  /
1 row created.
SQL> insert into Employee(ID,  First_Name, Last_Name, Start_Date,                     End_Date,                       Salary,  City,       Description)
  2                values("02","Alison",   "Mathews", to_date("19760321","YYYYMMDD"), to_date("19860221","YYYYMMDD"), 6661.78, "Vancouver","Tester")
  3  /
1 row created.
SQL> insert into Employee(ID,  First_Name, Last_Name, Start_Date,                     End_Date,                       Salary,  City,       Description)
  2                values("03","James",    "Smith",   to_date("19781212","YYYYMMDD"), to_date("19900315","YYYYMMDD"), 6544.78, "Vancouver","Tester")
  3  /
1 row created.
SQL> insert into Employee(ID,  First_Name, Last_Name, Start_Date,                     End_Date,                       Salary,  City,       Description)
  2                values("04","Celia",    "Rice",    to_date("19821024","YYYYMMDD"), to_date("19990421","YYYYMMDD"), 2344.78, "Vancouver","Manager")
  3  /
1 row created.
SQL> insert into Employee(ID,  First_Name, Last_Name, Start_Date,                     End_Date,                       Salary,  City,       Description)
  2                values("05","Robert",   "Black",   to_date("19840115","YYYYMMDD"), to_date("19980808","YYYYMMDD"), 2334.78, "Vancouver","Tester")
  3  /
1 row created.
SQL> insert into Employee(ID,  First_Name, Last_Name, Start_Date,                     End_Date,                       Salary, City,        Description)
  2                values("06","Linda",    "Green",   to_date("19870730","YYYYMMDD"), to_date("19960104","YYYYMMDD"), 4322.78,"New York",  "Tester")
  3  /
1 row created.
SQL> insert into Employee(ID,  First_Name, Last_Name, Start_Date,                     End_Date,                       Salary, City,        Description)
  2                values("07","David",    "Larry",   to_date("19901231","YYYYMMDD"), to_date("19980212","YYYYMMDD"), 7897.78,"New York",  "Manager")
  3  /
1 row created.
SQL> insert into Employee(ID,  First_Name, Last_Name, Start_Date,                     End_Date,                       Salary, City,        Description)
  2                values("08","James",    "Cat",     to_date("19960917","YYYYMMDD"), to_date("20020415","YYYYMMDD"), 1232.78,"Vancouver", "Tester")
  3  /
1 row created.
SQL>
SQL>
SQL>
SQL> -- display data in the table
SQL> select * from Employee
  2  /
ID   FIRST_NAME LAST_NAME  START_DAT END_DATE      SALARY CITY       DESCRIPTION
---- ---------- ---------- --------- --------- ---------- ---------- ---------------
01   Jason      Martin     25-JUL-96 25-JUL-06    1234.56 Toronto    Programmer
02   Alison     Mathews    21-MAR-76 21-FEB-86    6661.78 Vancouver  Tester
03   James      Smith      12-DEC-78 15-MAR-90    6544.78 Vancouver  Tester
04   Celia      Rice       24-OCT-82 21-APR-99    2344.78 Vancouver  Manager
05   Robert     Black      15-JAN-84 08-AUG-98    2334.78 Vancouver  Tester
06   Linda      Green      30-JUL-87 04-JAN-96    4322.78 New York   Tester
07   David      Larry      31-DEC-90 12-FEB-98    7897.78 New York   Manager
08   James      Cat        17-SEP-96 15-APR-02    1232.78 Vancouver  Tester
8 rows selected.
SQL>
SQL>
SQL> SELECT first_name, salary,
  2    RANK() OVER(ORDER BY salary) RANK,
  3    PERCENT_RANK() OVER(ORDER BY salary) PR,
  4    CUME_DIST() OVER(ORDER BY salary) CD
  5  FROM employee
  6  ORDER BY salary
  7  /
FIRST_NAME     SALARY       RANK         PR         CD
---------- ---------- ---------- ---------- ----------
James         1232.78          1          0       .125
Jason         1234.56          2 .142857143        .25
Robert        2334.78          3 .285714286       .375
Celia         2344.78          4 .428571429         .5
Linda         4322.78          5 .571428571       .625
James         6544.78          6 .714285714        .75
Alison        6661.78          7 .857142857       .875
David         7897.78          8          1          1
8 rows selected.
SQL>
SQL>
SQL>
SQL> -- clean the table
SQL> drop table Employee
  2  /
Table dropped.
SQL>


cume_dist over

SQL> CREATE TABLE sales(
  2    product_id            NUMBER(6),
  3    cid           NUMBER,
  4    time_id               DATE,
  5    sold         NUMBER(3),
  6    amount                NUMBER(10,2),
  7    cost                  NUMBER(10,2)
  8  );
Table created.
SQL>
SQL> select product_id,sum(sold),cume_dist () over (order by sum(sold) asc) as cume_dist
  2  from sales
  3  where to_char(time_id, "yyyy-mm") = "2001-06"
  4  group by product_id
  5  order by sum(sold) desc;
no rows selected
SQL>
SQL> drop table sales;
Table dropped.


Use RANK(), PERCENT_RANK(), CUME_DIST()

SQL>
SQL> -- create demo table
SQL> create table Employee(
  2    ID                 VARCHAR2(4 BYTE)         NOT NULL,
  3    First_Name         VARCHAR2(10 BYTE),
  4    Last_Name          VARCHAR2(10 BYTE),
  5    Start_Date         DATE,
  6    End_Date           DATE,
  7    Salary             Number(8,2),
  8    City               VARCHAR2(10 BYTE),
  9    Description        VARCHAR2(15 BYTE)
 10  )
 11  /
Table created.
SQL>
SQL> -- prepare data
SQL> insert into Employee(ID,  First_Name, Last_Name, Start_Date,                     End_Date,                       Salary,  City,       Description)
  2               values ("01","Jason",    "Martin",  to_date("19960725","YYYYMMDD"), to_date("20060725","YYYYMMDD"), 1234.56, "Toronto",  "Programmer")
  3  /
1 row created.
SQL> insert into Employee(ID,  First_Name, Last_Name, Start_Date,                     End_Date,                       Salary,  City,       Description)
  2                values("02","Alison",   "Mathews", to_date("19760321","YYYYMMDD"), to_date("19860221","YYYYMMDD"), 6661.78, "Vancouver","Tester")
  3  /
1 row created.
SQL> insert into Employee(ID,  First_Name, Last_Name, Start_Date,                     End_Date,                       Salary,  City,       Description)
  2                values("03","James",    "Smith",   to_date("19781212","YYYYMMDD"), to_date("19900315","YYYYMMDD"), 6544.78, "Vancouver","Tester")
  3  /
1 row created.
SQL> insert into Employee(ID,  First_Name, Last_Name, Start_Date,                     End_Date,                       Salary,  City,       Description)
  2                values("04","Celia",    "Rice",    to_date("19821024","YYYYMMDD"), to_date("19990421","YYYYMMDD"), 2344.78, "Vancouver","Manager")
  3  /
1 row created.
SQL> insert into Employee(ID,  First_Name, Last_Name, Start_Date,                     End_Date,                       Salary,  City,       Description)
  2                values("05","Robert",   "Black",   to_date("19840115","YYYYMMDD"), to_date("19980808","YYYYMMDD"), 2334.78, "Vancouver","Tester")
  3  /
1 row created.
SQL> insert into Employee(ID,  First_Name, Last_Name, Start_Date,                     End_Date,                       Salary, City,        Description)
  2                values("06","Linda",    "Green",   to_date("19870730","YYYYMMDD"), to_date("19960104","YYYYMMDD"), 4322.78,"New York",  "Tester")
  3  /
1 row created.
SQL> insert into Employee(ID,  First_Name, Last_Name, Start_Date,                     End_Date,                       Salary, City,        Description)
  2                values("07","David",    "Larry",   to_date("19901231","YYYYMMDD"), to_date("19980212","YYYYMMDD"), 7897.78,"New York",  "Manager")
  3  /
1 row created.
SQL> insert into Employee(ID,  First_Name, Last_Name, Start_Date,                     End_Date,                       Salary, City,        Description)
  2                values("08","James",    "Cat",     to_date("19960917","YYYYMMDD"), to_date("20020415","YYYYMMDD"), 1232.78,"Vancouver", "Tester")
  3  /
1 row created.
SQL>
SQL>
SQL>
SQL> -- display data in the table
SQL> select * from Employee
  2  /
ID   FIRST_NAME LAST_NAME  START_DAT END_DATE      SALARY CITY       DESCRIPTION
---- ---------- ---------- --------- --------- ---------- ---------- ---------------
01   Jason      Martin     25-JUL-96 25-JUL-06    1234.56 Toronto    Programmer
02   Alison     Mathews    21-MAR-76 21-FEB-86    6661.78 Vancouver  Tester
03   James      Smith      12-DEC-78 15-MAR-90    6544.78 Vancouver  Tester
04   Celia      Rice       24-OCT-82 21-APR-99    2344.78 Vancouver  Manager
05   Robert     Black      15-JAN-84 08-AUG-98    2334.78 Vancouver  Tester
06   Linda      Green      30-JUL-87 04-JAN-96    4322.78 New York   Tester
07   David      Larry      31-DEC-90 12-FEB-98    7897.78 New York   Manager
08   James      Cat        17-SEP-96 15-APR-02    1232.78 Vancouver  Tester
8 rows selected.
SQL>
SQL>
SQL>
SQL> SELECT id, first_name, description,
  2    RANK() OVER(PARTITION BY description ORDER BY salary) RANK,
  3    PERCENT_RANK() OVER(PARTITION BY description ORDER BY salary) PR,
  4    CUME_DIST() OVER(PARTITION BY description ORDER BY salary) CD
  5  FROM employee
  6  /
ID   FIRST_NAME DESCRIPTION           RANK         PR         CD
---- ---------- --------------- ---------- ---------- ----------
04   Celia      Manager                  1          0         .5
07   David      Manager                  2          1          1
01   Jason      Programmer               1          0          1
08   James      Tester                   1          0         .2
05   Robert     Tester                   2        .25         .4
06   Linda      Tester                   3         .5         .6
03   James      Tester                   4        .75         .8
02   Alison     Tester                   5          1          1
8 rows selected.
SQL>
SQL>
SQL> -- clean the table
SQL> drop table Employee
  2  /
Table dropped.