Oracle PL/SQL/Analytical Functions/PERCENT RANK
Содержание
PERCENT_RANK(): calculate the percent rank of a value relative to a group of values
SQL> CREATE TABLE all_sales (
2 year INTEGER,
3 month INTEGER,
4 prd_type_id INTEGER,
5 emp_id INTEGER ,
6 amount NUMBER(8, 2)
7 );
Table created.
SQL>
SQL> insert into all_sales (YEAR,MONTH,PRD_TYPE_ID,EMP_ID,AMOUNT)
2 values(2006,1 ,1 ,21 ,16034.84);
1 row created.
SQL> insert into all_sales (YEAR,MONTH,PRD_TYPE_ID,EMP_ID,AMOUNT)
2 values(2006,2 ,1 ,21 ,15644.65);
1 row created.
SQL> insert into all_sales (YEAR,MONTH,PRD_TYPE_ID,EMP_ID,AMOUNT)
2 values(2006,3 ,2 ,21 ,20167.83);
1 row created.
SQL> insert into all_sales (YEAR,MONTH,PRD_TYPE_ID,EMP_ID,AMOUNT)
2 values(2006,4 ,2 ,21 ,25056.45);
1 row created.
SQL> insert into all_sales (YEAR,MONTH,PRD_TYPE_ID,EMP_ID,AMOUNT)
2 values(2006,5 ,2 ,21 ,NULL);
1 row created.
SQL> insert into all_sales (YEAR,MONTH,PRD_TYPE_ID,EMP_ID,AMOUNT)
2 values(2006,6 ,1 ,21 ,15564.66);
1 row created.
SQL> insert into all_sales (YEAR,MONTH,PRD_TYPE_ID,EMP_ID,AMOUNT)
2 values(2006,7 ,1 ,21 ,15644.65);
1 row created.
SQL> insert into all_sales (YEAR,MONTH,PRD_TYPE_ID,EMP_ID,AMOUNT)
2 values(2006,8 ,1 ,21 ,16434.82);
1 row created.
SQL> insert into all_sales (YEAR,MONTH,PRD_TYPE_ID,EMP_ID,AMOUNT)
2 values(2006,9 ,1 ,21 ,19654.57);
1 row created.
SQL> insert into all_sales (YEAR,MONTH,PRD_TYPE_ID,EMP_ID,AMOUNT)
2 values(2006,10 ,1 ,21 ,21764.19);
1 row created.
SQL> insert into all_sales (YEAR,MONTH,PRD_TYPE_ID,EMP_ID,AMOUNT)
2 values(2006,11 ,1 ,21 ,13026.73);
1 row created.
SQL> insert into all_sales (YEAR,MONTH,PRD_TYPE_ID,EMP_ID,AMOUNT)
2 values(2006,12 ,2 ,21 ,10034.64);
1 row created.
SQL> insert into all_sales (YEAR,MONTH,PRD_TYPE_ID,EMP_ID,AMOUNT)
2 values(2005,1 ,2 ,22 ,16634.84);
1 row created.
SQL> insert into all_sales (YEAR,MONTH,PRD_TYPE_ID,EMP_ID,AMOUNT)
2 values(2005,1 ,2 ,21 ,26034.84);
1 row created.
SQL> insert into all_sales (YEAR,MONTH,PRD_TYPE_ID,EMP_ID,AMOUNT)
2 values(2005,2 ,1 ,21 ,12644.65);
1 row created.
SQL> insert into all_sales (YEAR,MONTH,PRD_TYPE_ID,EMP_ID,AMOUNT)
2 values(2005,3 ,1 ,21 ,NULL);
1 row created.
SQL> insert into all_sales (YEAR,MONTH,PRD_TYPE_ID,EMP_ID,AMOUNT)
2 values(2005,4 ,1 ,21 ,25026.45);
1 row created.
SQL> insert into all_sales (YEAR,MONTH,PRD_TYPE_ID,EMP_ID,AMOUNT)
2 values(2005,5 ,1 ,21 ,17212.66);
1 row created.
SQL> insert into all_sales (YEAR,MONTH,PRD_TYPE_ID,EMP_ID,AMOUNT)
2 values(2005,6 ,1 ,21 ,15564.26);
1 row created.
SQL> insert into all_sales (YEAR,MONTH,PRD_TYPE_ID,EMP_ID,AMOUNT)
2 values(2005,7 ,2 ,21 ,62654.82);
1 row created.
SQL> insert into all_sales (YEAR,MONTH,PRD_TYPE_ID,EMP_ID,AMOUNT)
2 values(2005,8 ,2 ,21 ,26434.82);
1 row created.
SQL> insert into all_sales (YEAR,MONTH,PRD_TYPE_ID,EMP_ID,AMOUNT)
2 values(2005,9 ,2 ,21 ,15644.65);
1 row created.
SQL> insert into all_sales (YEAR,MONTH,PRD_TYPE_ID,EMP_ID,AMOUNT)
2 values(2005,10 ,2 ,21 ,21264.19);
1 row created.
SQL> insert into all_sales (YEAR,MONTH,PRD_TYPE_ID,EMP_ID,AMOUNT)
2 values(2005,11 ,1 ,21 ,13026.73);
1 row created.
SQL> insert into all_sales (YEAR,MONTH,PRD_TYPE_ID,EMP_ID,AMOUNT)
2 values(2005,12 ,1 ,21 ,10032.64);
1 row created.
SQL>
SQL> select * from all_sales;
YEAR MONTH PRD_TYPE_ID EMP_ID AMOUNT
---------- ---------- ----------- ---------- ----------
2006 1 1 21 16034.84
2006 2 1 21 15644.65
2006 3 2 21 20167.83
2006 4 2 21 25056.45
2006 5 2 21
2006 6 1 21 15564.66
2006 7 1 21 15644.65
2006 8 1 21 16434.82
2006 9 1 21 19654.57
2006 10 1 21 21764.19
2006 11 1 21 13026.73
2006 12 2 21 10034.64
2005 1 2 22 16634.84
2005 1 2 21 26034.84
2005 2 1 21 12644.65
2005 3 1 21
2005 4 1 21 25026.45
2005 5 1 21 17212.66
2005 6 1 21 15564.26
2005 7 2 21 62654.82
2005 8 2 21 26434.82
2005 9 2 21 15644.65
2005 10 2 21 21264.19
2005 11 1 21 13026.73
2005 12 1 21 10032.64
25 rows selected.
SQL>
SQL> --PERCENT_RANK(): calculate the percent rank of a value relative to a group of values.
SQL>
SQL> SELECT
2 prd_type_id, SUM(amount),
3 PERCENT_RANK() OVER (ORDER BY SUM(amount) DESC) AS percent_rank
4 FROM all_sales
5 GROUP BY prd_type_id
6 ORDER BY prd_type_id;
PRD_TYPE_ID SUM(AMOUNT) PERCENT_RANK
----------- ----------- ------------
1 227276.5 0
2 223927.08 1
SQL>
SQL> drop table all_sales;
Table dropped.
SQL>
SQL>
Percent_rank (PR) = (Rank-1)/(Number of rows-1)
SQL>
SQL> -- create demo table
SQL> create table Employee(
2 empno Number(3) NOT NULL, -- Employee ID
3 ename VARCHAR2(10 BYTE), -- Employee Name
4 hireDate DATE, -- Date Employee Hired
5 orig_salary Number(8,2), -- Orignal Salary
6 curr_salary Number(8,2), -- Current Salary
7 region VARCHAR2(1 BYTE) -- Region where employeed
8 )
9 /
Table created.
SQL>
SQL>
SQL> -- prepare data for employee table
SQL> insert into Employee(empno, ename, hireDate, orig_salary, curr_salary, region)
2 values(122,"Alison",to_date("19960321","YYYYMMDD"), 45000, NULL, "E")
3 /
1 row created.
SQL> insert into Employee(empno, ename, hireDate, orig_salary, curr_salary, region)
2 values(123, "James",to_date("19781212","YYYYMMDD"), 23000, 32000, "W")
3 /
1 row created.
SQL> insert into Employee(empno, ename, hireDate, orig_salary, curr_salary, region)
2 values(104,"Celia",to_date("19821024","YYYYMMDD"), NULL, 58000, "E")
3 /
1 row created.
SQL> insert into Employee(empno, ename, hireDate, orig_salary, curr_salary, region)
2 values(105,"Robert",to_date("19840115","YYYYMMDD"), 31000, NULL, "W")
3 /
1 row created.
SQL> insert into Employee(empno, ename, hireDate, orig_salary, curr_salary, region)
2 values(116,"Linda", to_date("19870730","YYYYMMDD"), NULL, 53000, "E")
3 /
1 row created.
SQL> insert into Employee(empno, ename, hireDate, orig_salary, curr_salary, region)
2 values(117,"David", to_date("19901231","YYYYMMDD"), 78000, NULL, "W")
3 /
1 row created.
SQL> insert into Employee(empno, ename, hireDate, orig_salary, curr_salary, region)
2 values(108,"Jode", to_date("19960917","YYYYMMDD"), 21000, 29000, "E")
3 /
1 row created.
SQL>
SQL> -- display data in the table
SQL> select * from Employee
2 /
EMPNO ENAME HIREDATE ORIG_SALARY CURR_SALARY R
---------- ---------- --------- ----------- ----------- -
122 Alison 21-MAR-96 45000 E
123 James 12-DEC-78 23000 32000 W
104 Celia 24-OCT-82 58000 E
105 Robert 15-JAN-84 31000 W
116 Linda 30-JUL-87 53000 E
117 David 31-DEC-90 78000 W
108 Jode 17-SEP-96 21000 29000 E
7 rows selected.
SQL>
SQL>
SQL>
SQL>
SQL> --PERCENT_RANK will compute the cumulative fraction of the ranking that exists for a particular ranking value. This calculation and the one for CUME_DIST are like the values one would see in a histogram. PERCENT_RANK is set to compute so that the first row is zero, and the other values in this
column are computed based on the formula:
SQL>
SQL> --Percent_rank (PR) = (Rank-1)/(Number of rows-1)
SQL>
SQL> --For example, using our Employee table with PERCENT_RANK and CUME_DIST:
SQL>
SQL> SELECT empno, ename, region,
2 RANK() OVER(PARTITION BY region ORDER BY curr_salary)
3 RANK,
4 PERCENT_RANK() OVER(PARTITION BY region ORDER BY
5 curr_salary) PR,
6 CUME_DIST() OVER(PARTITION BY region ORDER BY curr_salary)
7 CD
8 FROM employee;
EMPNO ENAME R RANK PR CD
---------- ---------- - ---------- ---------- ----------
108 Jode E 1 0 .25
116 Linda E 2 .333333333 .5
104 Celia E 3 .666666667 .75
122 Alison E 4 1 1
123 James W 1 0 .333333333
117 David W 2 .5 1
105 Robert W 2 .5 1
7 rows selected.
SQL>
SQL>
SQL>
SQL>
SQL>
SQL> -- clean the table
SQL> drop table Employee;
Table dropped.
SQL>
SQL>
rank and percent_rank
SQL> CREATE TABLE sales(
2 product_id NUMBER(6),
3 cid NUMBER,
4 time_id DATE,
5 sold NUMBER(3),
6 amount NUMBER(10,2),
7 cost NUMBER(10,2)
8 );
Table created.
SQL> select product_id,sum(sold),rank () over (order by sum(sold) desc) as rank,
2 percent_rank () over (order by sum(sold) asc) as percent_rank
3 from sales
4 where to_char(time_id, "yyyy-mm") = "2001-06"
5 group by product_id
6 order by sum(sold) desc;
no rows selected
SQL>
SQL> drop table sales;
Table dropped.
Rank, Percent_Rank, and Cume_Dist, NTILE
SQL>
SQL> -- create demo table
SQL> create table Employee(
2 empno Number(3) NOT NULL, -- Employee ID
3 ename VARCHAR2(10 BYTE), -- Employee Name
4 hireDate DATE, -- Date Employee Hired
5 orig_salary Number(8,2), -- Orignal Salary
6 curr_salary Number(8,2), -- Current Salary
7 region VARCHAR2(1 BYTE) -- Region where employeed
8 )
9 /
Table created.
SQL>
SQL>
SQL> -- prepare data for employee table
SQL> insert into Employee(empno, ename, hireDate, orig_salary, curr_salary, region)
2 values(122,"Alison",to_date("19960321","YYYYMMDD"), 45000, NULL, "E")
3 /
1 row created.
SQL> insert into Employee(empno, ename, hireDate, orig_salary, curr_salary, region)
2 values(123, "James",to_date("19781212","YYYYMMDD"), 23000, 32000, "W")
3 /
1 row created.
SQL> insert into Employee(empno, ename, hireDate, orig_salary, curr_salary, region)
2 values(104,"Celia",to_date("19821024","YYYYMMDD"), NULL, 58000, "E")
3 /
1 row created.
SQL> insert into Employee(empno, ename, hireDate, orig_salary, curr_salary, region)
2 values(105,"Robert",to_date("19840115","YYYYMMDD"), 31000, NULL, "W")
3 /
1 row created.
SQL> insert into Employee(empno, ename, hireDate, orig_salary, curr_salary, region)
2 values(116,"Linda", to_date("19870730","YYYYMMDD"), NULL, 53000, "E")
3 /
1 row created.
SQL> insert into Employee(empno, ename, hireDate, orig_salary, curr_salary, region)
2 values(117,"David", to_date("19901231","YYYYMMDD"), 78000, NULL, "W")
3 /
1 row created.
SQL> insert into Employee(empno, ename, hireDate, orig_salary, curr_salary, region)
2 values(108,"Jode", to_date("19960917","YYYYMMDD"), 21000, 29000, "E")
3 /
1 row created.
SQL>
SQL> -- display data in the table
SQL> select * from Employee
2 /
EMPNO ENAME HIREDATE ORIG_SALARY CURR_SALARY R
---------- ---------- --------- ----------- ----------- -
122 Alison 21-MAR-96 45000 E
123 James 12-DEC-78 23000 32000 W
104 Celia 24-OCT-82 58000 E
105 Robert 15-JAN-84 31000 W
116 Linda 30-JUL-87 53000 E
117 David 31-DEC-90 78000 W
108 Jode 17-SEP-96 21000 29000 E
7 rows selected.
SQL>
SQL>
SQL>
SQL> --The use of NTILE with a small amount of data like we have done here is poor statistics, but a reasonable database demonstration. To truly deal with NTILE in a statistical sense, we"d have to use a lot more data.
SQL>
SQL> --What about nulls with the NTILE function? Here is an example using the same query on our Employee table with nulls (Empwnulls):
SQL>
SQL> SELECT ename, curr_salary sal,
2 ntile(2) OVER(ORDER BY curr_salary desc) n2,
3 ntile(3) OVER(ORDER BY curr_salary desc) n3,
4 ntile(4) OVER(ORDER BY curr_salary desc) n4,
5 ntile(5) OVER(ORDER BY curr_salary desc) n5,
6 ntile(6) OVER(ORDER BY curr_salary desc) n6,
7 ntile(8) OVER(ORDER BY curr_salary desc) n8
8 FROM employee;
ENAME SAL N2 N3 N4 N5 N6 N8
---------- ---------- ---------- ---------- ---------- ---------- ---------- ----------
Alison 1 1 1 1 1 1
Robert 1 1 1 1 1 2
David 1 1 2 2 2 3
Celia 58000 1 2 2 2 3 4
Linda 53000 2 2 3 3 4 5
James 32000 2 3 3 4 5 6
Jode 29000 2 3 4 5 6 7
7 rows selected.
SQL>
SQL>
SQL>
SQL> -- clean the table
SQL> drop table Employee;
Table dropped.
SQL>
SQL>