Oracle PL/SQL Tutorial/Analytical Functions/CUME DIST
Содержание
CUME_DIST() calculates the position of a specified value relative to a group of values.
CUME_DIST() is short for cumulative distribution.
SQL> -- create demo table
SQL> create table Employee(
2 ID VARCHAR2(4 BYTE) NOT NULL,
3 First_Name VARCHAR2(10 BYTE),
4 Last_Name VARCHAR2(10 BYTE),
5 Start_Date DATE,
6 End_Date DATE,
7 Salary Number(8,2),
8 City VARCHAR2(10 BYTE),
9 Description VARCHAR2(15 BYTE)
10 )
11 /
Table created.
SQL>
SQL> -- prepare data
SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values ("01","Jason", "Martin", to_date("19960725","YYYYMMDD"), to_date("20060725","YYYYMMDD"), 1234.56, "Toronto", "Programmer")
3 /
1 row created.
SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("02","Alison", "Mathews", to_date("19760321","YYYYMMDD"), to_date("19860221","YYYYMMDD"), 6661.78, "Vancouver","Tester")
3 /
1 row created.
SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("03","James", "Smith", to_date("19781212","YYYYMMDD"), to_date("19900315","YYYYMMDD"), 6544.78, "Vancouver","Tester")
3 /
1 row created.
SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("04","Celia", "Rice", to_date("19821024","YYYYMMDD"), to_date("19990421","YYYYMMDD"), 2344.78, "Vancouver","Manager")
3 /
1 row created.
SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("05","Robert", "Black", to_date("19840115","YYYYMMDD"), to_date("19980808","YYYYMMDD"), 2334.78, "Vancouver","Tester")
3 /
1 row created.
SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("06","Linda", "Green", to_date("19870730","YYYYMMDD"), to_date("19960104","YYYYMMDD"), 4322.78,"New York", "Tester")
3 /
1 row created.
SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("07","David", "Larry", to_date("19901231","YYYYMMDD"), to_date("19980212","YYYYMMDD"), 7897.78,"New York", "Manager")
3 /
1 row created.
SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("08","James", "Cat", to_date("19960917","YYYYMMDD"), to_date("20020415","YYYYMMDD"), 1232.78,"Vancouver", "Tester")
3 /
1 row created.
SQL>
SQL>
SQL>
SQL> -- display data in the table
SQL> select * from Employee
2 /
ID FIRST_NAME LAST_NAME START_DAT END_DATE SALARY CITY DESCRIPTION
---- ---------- ---------- --------- --------- ---------- ---------- ---------------
01 Jason Martin 25-JUL-96 25-JUL-06 1234.56 Toronto Programmer
02 Alison Mathews 21-MAR-76 21-FEB-86 6661.78 Vancouver Tester
03 James Smith 12-DEC-78 15-MAR-90 6544.78 Vancouver Tester
04 Celia Rice 24-OCT-82 21-APR-99 2344.78 Vancouver Manager
05 Robert Black 15-JAN-84 08-AUG-98 2334.78 Vancouver Tester
06 Linda Green 30-JUL-87 04-JAN-96 4322.78 New York Tester
07 David Larry 31-DEC-90 12-FEB-98 7897.78 New York Manager
08 James Cat 17-SEP-96 15-APR-02 1232.78 Vancouver Tester
8 rows selected.
SQL>
SQL>
SQL> SELECT
2 city, SUM(salary),
3 CUME_DIST() OVER (ORDER BY SUM(salary) DESC) AS cume_dist,
4 PERCENT_RANK() OVER (ORDER BY SUM(salary) DESC) AS percent_rank
5 FROM employee
6 GROUP BY city
7 ORDER BY city;
CITY SUM(SALARY) CUME_DIST PERCENT_RANK
---------- ----------- ---------- ------------
New York 12220.56 .666666667 .5
Toronto 1234.56 1 1
Vancouver 19118.9 .333333333 0
SQL>
SQL>
SQL> -- clean the table
SQL> drop table Employee
2 /
Table dropped.
SQL>
CUME_DIST functions
The syntax for the CUME_ DIST functions:
CUME_DIST() OVER ([PARTITION clause] ORDER clause)
The PARTITION clause is optional.
SQL> -- create demo table
SQL> create table Employee(
2 ID VARCHAR2(4 BYTE) NOT NULL,
3 First_Name VARCHAR2(10 BYTE),
4 Last_Name VARCHAR2(10 BYTE),
5 Start_Date DATE,
6 End_Date DATE,
7 Salary Number(8,2),
8 City VARCHAR2(10 BYTE),
9 Description VARCHAR2(15 BYTE)
10 )
11 /
Table created.
SQL>
SQL> -- prepare data
SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values ("01","Jason", "Martin", to_date("19960725","YYYYMMDD"), to_date("20060725","YYYYMMDD"), 1234.56, "Toronto", "Programmer")
3 /
1 row created.
SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("02","Alison", "Mathews", to_date("19760321","YYYYMMDD"), to_date("19860221","YYYYMMDD"), 6661.78, "Vancouver","Tester")
3 /
1 row created.
SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("03","James", "Smith", to_date("19781212","YYYYMMDD"), to_date("19900315","YYYYMMDD"), 6544.78, "Vancouver","Tester")
3 /
1 row created.
SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("04","Celia", "Rice", to_date("19821024","YYYYMMDD"), to_date("19990421","YYYYMMDD"), 2344.78, "Vancouver","Manager")
3 /
1 row created.
SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("05","Robert", "Black", to_date("19840115","YYYYMMDD"), to_date("19980808","YYYYMMDD"), 2334.78, "Vancouver","Tester")
3 /
1 row created.
SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("06","Linda", "Green", to_date("19870730","YYYYMMDD"), to_date("19960104","YYYYMMDD"), 4322.78,"New York", "Tester")
3 /
1 row created.
SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("07","David", "Larry", to_date("19901231","YYYYMMDD"), to_date("19980212","YYYYMMDD"), 7897.78,"New York", "Manager")
3 /
1 row created.
SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("08","James", "Cat", to_date("19960917","YYYYMMDD"), to_date("20020415","YYYYMMDD"), 1232.78,"Vancouver", "Tester")
3 /
1 row created.
SQL>
SQL>
SQL>
SQL> -- display data in the table
SQL> select * from Employee
2 /
ID FIRST_NAME LAST_NAME START_DAT END_DATE SALARY CITY DESCRIPTION
---- ---------- ---------- --------- --------- ---------- ---------- ---------------
01 Jason Martin 25-JUL-96 25-JUL-06 1234.56 Toronto Programmer
02 Alison Mathews 21-MAR-76 21-FEB-86 6661.78 Vancouver Tester
03 James Smith 12-DEC-78 15-MAR-90 6544.78 Vancouver Tester
04 Celia Rice 24-OCT-82 21-APR-99 2344.78 Vancouver Manager
05 Robert Black 15-JAN-84 08-AUG-98 2334.78 Vancouver Tester
06 Linda Green 30-JUL-87 04-JAN-96 4322.78 New York Tester
07 David Larry 31-DEC-90 12-FEB-98 7897.78 New York Manager
08 James Cat 17-SEP-96 15-APR-02 1232.78 Vancouver Tester
8 rows selected.
SQL>
SQL>
SQL> SELECT first_name, salary,
2 RANK() OVER(ORDER BY salary) RANK,
3 PERCENT_RANK() OVER(ORDER BY salary) PR,
4 CUME_DIST() OVER(ORDER BY salary) CD
5 FROM employee
6 ORDER BY salary
7 /
FIRST_NAME SALARY RANK PR CD
---------- ---------- ---------- ---------- ----------
James 1232.78 1 0 .125
Jason 1234.56 2 .142857143 .25
Robert 2334.78 3 .285714286 .375
Celia 2344.78 4 .428571429 .5
Linda 4322.78 5 .571428571 .625
James 6544.78 6 .714285714 .75
Alison 6661.78 7 .857142857 .875
David 7897.78 8 1 1
8 rows selected.
SQL>
SQL>
SQL>
SQL> -- clean the table
SQL> drop table Employee
2 /
Table dropped.
SQL>
cume_dist over
SQL> CREATE TABLE sales(
2 product_id NUMBER(6),
3 cid NUMBER,
4 time_id DATE,
5 sold NUMBER(3),
6 amount NUMBER(10,2),
7 cost NUMBER(10,2)
8 );
Table created.
SQL>
SQL> select product_id,sum(sold),cume_dist () over (order by sum(sold) asc) as cume_dist
2 from sales
3 where to_char(time_id, "yyyy-mm") = "2001-06"
4 group by product_id
5 order by sum(sold) desc;
no rows selected
SQL>
SQL> drop table sales;
Table dropped.
Use RANK(), PERCENT_RANK(), CUME_DIST()
SQL>
SQL> -- create demo table
SQL> create table Employee(
2 ID VARCHAR2(4 BYTE) NOT NULL,
3 First_Name VARCHAR2(10 BYTE),
4 Last_Name VARCHAR2(10 BYTE),
5 Start_Date DATE,
6 End_Date DATE,
7 Salary Number(8,2),
8 City VARCHAR2(10 BYTE),
9 Description VARCHAR2(15 BYTE)
10 )
11 /
Table created.
SQL>
SQL> -- prepare data
SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values ("01","Jason", "Martin", to_date("19960725","YYYYMMDD"), to_date("20060725","YYYYMMDD"), 1234.56, "Toronto", "Programmer")
3 /
1 row created.
SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("02","Alison", "Mathews", to_date("19760321","YYYYMMDD"), to_date("19860221","YYYYMMDD"), 6661.78, "Vancouver","Tester")
3 /
1 row created.
SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("03","James", "Smith", to_date("19781212","YYYYMMDD"), to_date("19900315","YYYYMMDD"), 6544.78, "Vancouver","Tester")
3 /
1 row created.
SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("04","Celia", "Rice", to_date("19821024","YYYYMMDD"), to_date("19990421","YYYYMMDD"), 2344.78, "Vancouver","Manager")
3 /
1 row created.
SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("05","Robert", "Black", to_date("19840115","YYYYMMDD"), to_date("19980808","YYYYMMDD"), 2334.78, "Vancouver","Tester")
3 /
1 row created.
SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("06","Linda", "Green", to_date("19870730","YYYYMMDD"), to_date("19960104","YYYYMMDD"), 4322.78,"New York", "Tester")
3 /
1 row created.
SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("07","David", "Larry", to_date("19901231","YYYYMMDD"), to_date("19980212","YYYYMMDD"), 7897.78,"New York", "Manager")
3 /
1 row created.
SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("08","James", "Cat", to_date("19960917","YYYYMMDD"), to_date("20020415","YYYYMMDD"), 1232.78,"Vancouver", "Tester")
3 /
1 row created.
SQL>
SQL>
SQL>
SQL> -- display data in the table
SQL> select * from Employee
2 /
ID FIRST_NAME LAST_NAME START_DAT END_DATE SALARY CITY DESCRIPTION
---- ---------- ---------- --------- --------- ---------- ---------- ---------------
01 Jason Martin 25-JUL-96 25-JUL-06 1234.56 Toronto Programmer
02 Alison Mathews 21-MAR-76 21-FEB-86 6661.78 Vancouver Tester
03 James Smith 12-DEC-78 15-MAR-90 6544.78 Vancouver Tester
04 Celia Rice 24-OCT-82 21-APR-99 2344.78 Vancouver Manager
05 Robert Black 15-JAN-84 08-AUG-98 2334.78 Vancouver Tester
06 Linda Green 30-JUL-87 04-JAN-96 4322.78 New York Tester
07 David Larry 31-DEC-90 12-FEB-98 7897.78 New York Manager
08 James Cat 17-SEP-96 15-APR-02 1232.78 Vancouver Tester
8 rows selected.
SQL>
SQL>
SQL>
SQL> SELECT id, first_name, description,
2 RANK() OVER(PARTITION BY description ORDER BY salary) RANK,
3 PERCENT_RANK() OVER(PARTITION BY description ORDER BY salary) PR,
4 CUME_DIST() OVER(PARTITION BY description ORDER BY salary) CD
5 FROM employee
6 /
ID FIRST_NAME DESCRIPTION RANK PR CD
---- ---------- --------------- ---------- ---------- ----------
04 Celia Manager 1 0 .5
07 David Manager 2 1 1
01 Jason Programmer 1 0 1
08 James Tester 1 0 .2
05 Robert Tester 2 .25 .4
06 Linda Tester 3 .5 .6
03 James Tester 4 .75 .8
02 Alison Tester 5 1 1
8 rows selected.
SQL>
SQL>
SQL> -- clean the table
SQL> drop table Employee
2 /
Table dropped.