Oracle PL/SQL Tutorial/Analytical Functions/CUME DIST
Содержание
CUME_DIST() calculates the position of a specified value relative to a group of values.
CUME_DIST() is short for cumulative distribution.
<source lang="sql">
SQL> -- create demo table SQL> create table Employee(
2 ID VARCHAR2(4 BYTE) NOT NULL, 3 First_Name VARCHAR2(10 BYTE), 4 Last_Name VARCHAR2(10 BYTE), 5 Start_Date DATE, 6 End_Date DATE, 7 Salary Number(8,2), 8 City VARCHAR2(10 BYTE), 9 Description VARCHAR2(15 BYTE) 10 ) 11 /
Table created. SQL> SQL> -- prepare data SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values ("01","Jason", "Martin", to_date("19960725","YYYYMMDD"), to_date("20060725","YYYYMMDD"), 1234.56, "Toronto", "Programmer") 3 /
1 row created. SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("02","Alison", "Mathews", to_date("19760321","YYYYMMDD"), to_date("19860221","YYYYMMDD"), 6661.78, "Vancouver","Tester") 3 /
1 row created. SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("03","James", "Smith", to_date("19781212","YYYYMMDD"), to_date("19900315","YYYYMMDD"), 6544.78, "Vancouver","Tester") 3 /
1 row created. SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("04","Celia", "Rice", to_date("19821024","YYYYMMDD"), to_date("19990421","YYYYMMDD"), 2344.78, "Vancouver","Manager") 3 /
1 row created. SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("05","Robert", "Black", to_date("19840115","YYYYMMDD"), to_date("19980808","YYYYMMDD"), 2334.78, "Vancouver","Tester") 3 /
1 row created. SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("06","Linda", "Green", to_date("19870730","YYYYMMDD"), to_date("19960104","YYYYMMDD"), 4322.78,"New York", "Tester") 3 /
1 row created. SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("07","David", "Larry", to_date("19901231","YYYYMMDD"), to_date("19980212","YYYYMMDD"), 7897.78,"New York", "Manager") 3 /
1 row created. SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("08","James", "Cat", to_date("19960917","YYYYMMDD"), to_date("20020415","YYYYMMDD"), 1232.78,"Vancouver", "Tester") 3 /
1 row created. SQL> SQL> SQL> SQL> -- display data in the table SQL> select * from Employee
2 /
ID FIRST_NAME LAST_NAME START_DAT END_DATE SALARY CITY DESCRIPTION
---------- ---------- --------- --------- ---------- ---------- ---------------
01 Jason Martin 25-JUL-96 25-JUL-06 1234.56 Toronto Programmer 02 Alison Mathews 21-MAR-76 21-FEB-86 6661.78 Vancouver Tester 03 James Smith 12-DEC-78 15-MAR-90 6544.78 Vancouver Tester 04 Celia Rice 24-OCT-82 21-APR-99 2344.78 Vancouver Manager 05 Robert Black 15-JAN-84 08-AUG-98 2334.78 Vancouver Tester 06 Linda Green 30-JUL-87 04-JAN-96 4322.78 New York Tester 07 David Larry 31-DEC-90 12-FEB-98 7897.78 New York Manager 08 James Cat 17-SEP-96 15-APR-02 1232.78 Vancouver Tester 8 rows selected. SQL> SQL> SQL> SELECT
2 city, SUM(salary), 3 CUME_DIST() OVER (ORDER BY SUM(salary) DESC) AS cume_dist, 4 PERCENT_RANK() OVER (ORDER BY SUM(salary) DESC) AS percent_rank 5 FROM employee 6 GROUP BY city 7 ORDER BY city;
CITY SUM(SALARY) CUME_DIST PERCENT_RANK
----------- ---------- ------------
New York 12220.56 .666666667 .5 Toronto 1234.56 1 1 Vancouver 19118.9 .333333333 0 SQL> SQL> SQL> -- clean the table SQL> drop table Employee
2 /
Table dropped. SQL></source>
CUME_DIST functions
The syntax for the CUME_ DIST functions:
CUME_DIST() OVER ([PARTITION clause] ORDER clause)
The PARTITION clause is optional.
<source lang="sql">
SQL> -- create demo table SQL> create table Employee(
2 ID VARCHAR2(4 BYTE) NOT NULL, 3 First_Name VARCHAR2(10 BYTE), 4 Last_Name VARCHAR2(10 BYTE), 5 Start_Date DATE, 6 End_Date DATE, 7 Salary Number(8,2), 8 City VARCHAR2(10 BYTE), 9 Description VARCHAR2(15 BYTE) 10 ) 11 /
Table created. SQL> SQL> -- prepare data SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values ("01","Jason", "Martin", to_date("19960725","YYYYMMDD"), to_date("20060725","YYYYMMDD"), 1234.56, "Toronto", "Programmer") 3 /
1 row created. SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("02","Alison", "Mathews", to_date("19760321","YYYYMMDD"), to_date("19860221","YYYYMMDD"), 6661.78, "Vancouver","Tester") 3 /
1 row created. SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("03","James", "Smith", to_date("19781212","YYYYMMDD"), to_date("19900315","YYYYMMDD"), 6544.78, "Vancouver","Tester") 3 /
1 row created. SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("04","Celia", "Rice", to_date("19821024","YYYYMMDD"), to_date("19990421","YYYYMMDD"), 2344.78, "Vancouver","Manager") 3 /
1 row created. SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("05","Robert", "Black", to_date("19840115","YYYYMMDD"), to_date("19980808","YYYYMMDD"), 2334.78, "Vancouver","Tester") 3 /
1 row created. SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("06","Linda", "Green", to_date("19870730","YYYYMMDD"), to_date("19960104","YYYYMMDD"), 4322.78,"New York", "Tester") 3 /
1 row created. SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("07","David", "Larry", to_date("19901231","YYYYMMDD"), to_date("19980212","YYYYMMDD"), 7897.78,"New York", "Manager") 3 /
1 row created. SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("08","James", "Cat", to_date("19960917","YYYYMMDD"), to_date("20020415","YYYYMMDD"), 1232.78,"Vancouver", "Tester") 3 /
1 row created. SQL> SQL> SQL> SQL> -- display data in the table SQL> select * from Employee
2 /
ID FIRST_NAME LAST_NAME START_DAT END_DATE SALARY CITY DESCRIPTION
---------- ---------- --------- --------- ---------- ---------- ---------------
01 Jason Martin 25-JUL-96 25-JUL-06 1234.56 Toronto Programmer 02 Alison Mathews 21-MAR-76 21-FEB-86 6661.78 Vancouver Tester 03 James Smith 12-DEC-78 15-MAR-90 6544.78 Vancouver Tester 04 Celia Rice 24-OCT-82 21-APR-99 2344.78 Vancouver Manager 05 Robert Black 15-JAN-84 08-AUG-98 2334.78 Vancouver Tester 06 Linda Green 30-JUL-87 04-JAN-96 4322.78 New York Tester 07 David Larry 31-DEC-90 12-FEB-98 7897.78 New York Manager 08 James Cat 17-SEP-96 15-APR-02 1232.78 Vancouver Tester 8 rows selected. SQL> SQL> SQL> SELECT first_name, salary,
2 RANK() OVER(ORDER BY salary) RANK, 3 PERCENT_RANK() OVER(ORDER BY salary) PR, 4 CUME_DIST() OVER(ORDER BY salary) CD 5 FROM employee 6 ORDER BY salary 7 /
FIRST_NAME SALARY RANK PR CD
---------- ---------- ---------- ----------
James 1232.78 1 0 .125 Jason 1234.56 2 .142857143 .25 Robert 2334.78 3 .285714286 .375 Celia 2344.78 4 .428571429 .5 Linda 4322.78 5 .571428571 .625 James 6544.78 6 .714285714 .75 Alison 6661.78 7 .857142857 .875 David 7897.78 8 1 1 8 rows selected. SQL> SQL> SQL> SQL> -- clean the table SQL> drop table Employee
2 /
Table dropped. SQL></source>
cume_dist over
<source lang="sql">
SQL> CREATE TABLE sales(
2 product_id NUMBER(6), 3 cid NUMBER, 4 time_id DATE, 5 sold NUMBER(3), 6 amount NUMBER(10,2), 7 cost NUMBER(10,2) 8 );
Table created. SQL> SQL> select product_id,sum(sold),cume_dist () over (order by sum(sold) asc) as cume_dist
2 from sales 3 where to_char(time_id, "yyyy-mm") = "2001-06" 4 group by product_id 5 order by sum(sold) desc;
no rows selected SQL> SQL> drop table sales; Table dropped.</source>
Use RANK(), PERCENT_RANK(), CUME_DIST()
<source lang="sql">
SQL> SQL> -- create demo table SQL> create table Employee(
2 ID VARCHAR2(4 BYTE) NOT NULL, 3 First_Name VARCHAR2(10 BYTE), 4 Last_Name VARCHAR2(10 BYTE), 5 Start_Date DATE, 6 End_Date DATE, 7 Salary Number(8,2), 8 City VARCHAR2(10 BYTE), 9 Description VARCHAR2(15 BYTE) 10 ) 11 /
Table created. SQL> SQL> -- prepare data SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values ("01","Jason", "Martin", to_date("19960725","YYYYMMDD"), to_date("20060725","YYYYMMDD"), 1234.56, "Toronto", "Programmer") 3 /
1 row created. SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("02","Alison", "Mathews", to_date("19760321","YYYYMMDD"), to_date("19860221","YYYYMMDD"), 6661.78, "Vancouver","Tester") 3 /
1 row created. SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("03","James", "Smith", to_date("19781212","YYYYMMDD"), to_date("19900315","YYYYMMDD"), 6544.78, "Vancouver","Tester") 3 /
1 row created. SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("04","Celia", "Rice", to_date("19821024","YYYYMMDD"), to_date("19990421","YYYYMMDD"), 2344.78, "Vancouver","Manager") 3 /
1 row created. SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("05","Robert", "Black", to_date("19840115","YYYYMMDD"), to_date("19980808","YYYYMMDD"), 2334.78, "Vancouver","Tester") 3 /
1 row created. SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("06","Linda", "Green", to_date("19870730","YYYYMMDD"), to_date("19960104","YYYYMMDD"), 4322.78,"New York", "Tester") 3 /
1 row created. SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("07","David", "Larry", to_date("19901231","YYYYMMDD"), to_date("19980212","YYYYMMDD"), 7897.78,"New York", "Manager") 3 /
1 row created. SQL> insert into Employee(ID, First_Name, Last_Name, Start_Date, End_Date, Salary, City, Description)
2 values("08","James", "Cat", to_date("19960917","YYYYMMDD"), to_date("20020415","YYYYMMDD"), 1232.78,"Vancouver", "Tester") 3 /
1 row created. SQL> SQL> SQL> SQL> -- display data in the table SQL> select * from Employee
2 /
ID FIRST_NAME LAST_NAME START_DAT END_DATE SALARY CITY DESCRIPTION
---------- ---------- --------- --------- ---------- ---------- ---------------
01 Jason Martin 25-JUL-96 25-JUL-06 1234.56 Toronto Programmer 02 Alison Mathews 21-MAR-76 21-FEB-86 6661.78 Vancouver Tester 03 James Smith 12-DEC-78 15-MAR-90 6544.78 Vancouver Tester 04 Celia Rice 24-OCT-82 21-APR-99 2344.78 Vancouver Manager 05 Robert Black 15-JAN-84 08-AUG-98 2334.78 Vancouver Tester 06 Linda Green 30-JUL-87 04-JAN-96 4322.78 New York Tester 07 David Larry 31-DEC-90 12-FEB-98 7897.78 New York Manager 08 James Cat 17-SEP-96 15-APR-02 1232.78 Vancouver Tester 8 rows selected. SQL> SQL> SQL> SQL> SELECT id, first_name, description,
2 RANK() OVER(PARTITION BY description ORDER BY salary) RANK, 3 PERCENT_RANK() OVER(PARTITION BY description ORDER BY salary) PR, 4 CUME_DIST() OVER(PARTITION BY description ORDER BY salary) CD 5 FROM employee 6 /
ID FIRST_NAME DESCRIPTION RANK PR CD
---------- --------------- ---------- ---------- ----------
04 Celia Manager 1 0 .5 07 David Manager 2 1 1 01 Jason Programmer 1 0 1 08 James Tester 1 0 .2 05 Robert Tester 2 .25 .4 06 Linda Tester 3 .5 .6 03 James Tester 4 .75 .8 02 Alison Tester 5 1 1 8 rows selected. SQL> SQL> SQL> -- clean the table SQL> drop table Employee
2 /
Table dropped.</source>