SQL Server/T-SQL Tutorial/Query/DISTINCT — различия между версиями

Материал из SQL эксперт
Перейти к: навигация, поиск
м (1 версия)
 
(нет различий)

Текущая версия на 10:23, 26 мая 2010

A SELECT statement that eliminates duplicate rows

DISTINCT keyword prevents duplicate (identical) rows from being included in the result set.
It also causes the result set to be sorted by its first column.
To use the DISTINCT keyword, code it immediately after the SELECT keyword.

13>
14>
15> create table Bankers(
16>    BankerID             Integer,
17>    BankerName           VARCHAR(20),
18>    BankerContactLName   VARCHAR(20),
19>    BankerContactFName   VARCHAR(20),
20>    BankerCity           VARCHAR(20),
21>    BankerState          VARCHAR(20),
22>    BankerZipCode        VARCHAR(20),
23>    BankerPhone          VARCHAR(20)
24> )
25> GO
1>
2> insert into Bankers values (1, "ABC Inc.","Joe","Smith","Vancouver","BC","11111","111-111-1111");
3> GO
(1 rows affected)
1> insert into Bankers values (2, "DEF Inc.","Red","Rice", "New York", "DE","22222","222-222-2222");
2> GO
(1 rows affected)
1> insert into Bankers values (3, "HJI Inc.","Kit","Cat",  "Paris",    "CA","33333","333-333-3333");
2> GO
(1 rows affected)
1> insert into Bankers values (4, "QWE Inc.","Git","Black","Regina",   "ER","44444","444-444-4444");
2> GO
(1 rows affected)
1> insert into Bankers values (5, "RTY Inc.","Wil","Lee",  "Toronto",  "YU","55555","555-555-5555");
2> GO
(1 rows affected)
1> insert into Bankers values (6, "YUI Inc.","Ted","Larry","Calgary",  "TY","66666","666-666-6666");
2> GO
(1 rows affected)
1> insert into Bankers values (7, "OIP Inc.","Yam","Act",  "San Franc","FG","77777","777-777-7777");
2> GO
(1 rows affected)
1> insert into Bankers values (8, "SAD Inc.","Hit","Eat",  "Orland",   "PO","88888","888-888-8888");
2> GO
(1 rows affected)
1> insert into Bankers values (9, "DFG Inc.","Sad","Lee",  "Wisler",   "PL","99999","999-999-9999");
2> GO
(1 rows affected)
1> insert into Bankers values (0, "GHJ Inc.","Bit","Lee",  "Ticker",   "MN","00000","000-000-0000");
2> GO
(1 rows affected)
1>
2>
3> SELECT DISTINCT BankerCity, BankerState
4> FROM Bankers
5> GO
BankerCity           BankerState
-------------------- --------------------
Calgary              TY
New York             DE
Orland               PO
Paris                CA
Regina               ER
San Franc            FG
Ticker               MN
Toronto              YU
Vancouver            BC
Wisler               PL
(10 rows affected)
1>
2> drop table Bankers;
3> GO
1>
2>


A summary query that uses the DISTINCT keyword

4>
5>
6> create table Billings (
7>     BankerID           INTEGER,
8>     BillingNumber      INTEGER,
9>     BillingDate        datetime,
10>     BillingTotal       INTEGER,
11>     TermsID            INTEGER,
12>     BillingDueDate     datetime ,
13>     PaymentTotal       INTEGER,
14>     CreditTotal        INTEGER
15>
16> );
17> GO
1>
2> INSERT INTO Billings VALUES (1, 1, "2005-01-22", 165, 1,"2005-04-22",123,321);
3> GO
(1 rows affected)
1> INSERT INTO Billings VALUES (2, 2, "2001-02-21", 165, 1,"2002-02-22",123,321);
2> GO
(1 rows affected)
1> INSERT INTO Billings VALUES (3, 3, "2003-05-02", 165, 1,"2005-04-12",123,321);
2> GO
(1 rows affected)
1> INSERT INTO Billings VALUES (4, 4, "1999-03-12", 165, 1,"2005-04-18",123,321);
2> GO
(1 rows affected)
1> INSERT INTO Billings VALUES (5, 5, "2000-04-23", 165, 1,"2005-04-17",123,321);
2> GO
(1 rows affected)
1> INSERT INTO Billings VALUES (6, 6, "2001-06-14", 165, 1,"2005-04-18",123,321);
2> GO
(1 rows affected)
1> INSERT INTO Billings VALUES (7, 7, "2002-07-15", 165, 1,"2005-04-19",123,321);
2> GO
(1 rows affected)
1> INSERT INTO Billings VALUES (8, 8, "2003-08-16", 165, 1,"2005-04-20",123,321);
2> GO
(1 rows affected)
1> INSERT INTO Billings VALUES (9, 9, "2004-09-17", 165, 1,"2005-04-21",123,321);
2> GO
(1 rows affected)
1> INSERT INTO Billings VALUES (0, 0, "2005-10-18", 165, 1,"2005-04-22",123,321);
2> GO
(1 rows affected)
1>
2>
3> SELECT COUNT(DISTINCT BankerID) AS NumberOfBankers,
4>     COUNT(BankerID) AS NumberOfBillings,
5>     AVG(BillingTotal) AS AverageBillingAmount,
6>     SUM(BillingTotal) AS TotalBillingAmount
7> FROM Billings
8> WHERE BillingDate > "2002-01-01"
9> GO
NumberOfBankers NumberOfBillings AverageBillingAmount TotalBillingAmount
--------------- ---------------- -------------------- ------------------
              6                6                  165                990
(1 rows affected)
1>
2>
3>
4> drop table Billings;
5> GO
1>
2>


Correlated subquery using Distinct

7> CREATE TABLE Product(
8>     ProductID               int                NOT NULL,
9>     Name                    nvarchar(25)       NOT NULL,
10>     ProductNumber           nvarchar(25)               ,
11>     Color                   nvarchar(15)       NULL,
12>     StandardCost            money              NOT NULL,
13>     Size                    nvarchar(5)        NULL,
14>     Weight                  decimal(8, 2)      NULL,
15>     ProductLine             nchar(20)           NULL,
16>     SellStartDate           datetime           NOT NULL,
17>     SellEndDate             datetime           NULL
18> )
19> GO
1> insert into Product values(1,"Product A", "1","Red",123.123,"1",1,"ProductLine A","1999-03-22","2000-03-22");
2> GO
(1 rows affected)
1> insert into Product values(2,"Product B", "2","Yellow",234.234,"1",3,"ProductLine B","2000-03-22","2001-03-22");
2> GO
(1 rows affected)
1> insert into Product values(3,"Product C", "3","Pink",345.345,"1",3,"ProductLine V","2001-09-22","2006-02-22");
2> GO
(1 rows affected)
1> insert into Product values(4,"Product D", "4","White",456.456,"1",4,"ProductLine D","2002-08-22","2006-03-22");
2> GO
(1 rows affected)
1> insert into Product values(5,"Product E", "5","Black",567.567,"1",5,"ProductLine E","2003-01-22","2003-04-22");
2> GO
(1 rows affected)
1> insert into Product values(6,"Product F", "6","Blue",678.678,"1",6,"ProductLine W","2004-02-22","2005-05-22");
2> GO
(1 rows affected)
1> insert into Product values(7,"Product G", "7","Drak",789.789,"1",7,"ProductLine Q","2005-03-22","2006-03-22");
2> GO
(1 rows affected)
1> insert into Product values(8,"Product H", "8","Gray",234.123,"1",8,"ProductLine F","2006-04-22","2006-09-22");
2> GO
(1 rows affected)
1> insert into Product values(9,"Product I", "9","Red",543.123,"1",9,"ProductLine R","2007-05-22","2008-03-22");
2> GO
(1 rows affected)
1> insert into Product values(0,"Product J", "0","Gold",765.123,"1",0,"ProductLine J","2008-06-22","2009-03-22");
2> GO
(1 rows affected)
1>
2>
3>
4> CREATE TABLE SalesOrderDetail(
5>     SalesOrderID            int                NOT NULL,
6>     SalesOrderDetailID      int                NOT NULL,
7>     CarrierTrackingNumber   nvarchar(25)       NULL,
8>     OrderQty                smallint           NOT NULL,
9>     ProductID               int                NOT NULL,
10>     SpecialOfferID          int                NOT NULL,
11>     UnitPrice               money              NOT NULL,
12>     UnitPriceDiscount       money              NOT NULL DEFAULT (0.0),
13>     LineTotal               AS ISNULL([UnitPrice] * (1.0 - [UnitPriceDiscount]) * [OrderQty], 0.0)
14> );
15> GO
1>
2> insert into SalesOrderDetail values (1,1,"1",1,1,1,$1,$1);
3> GO
(1 rows affected)
1> insert into SalesOrderDetail values (2,2,"2",2,2,2,$2,$2);
2> GO
(1 rows affected)
1> insert into SalesOrderDetail values (3,3,"3",3,3,3,$3,$3);
2> GO
(1 rows affected)
1> insert into SalesOrderDetail values (4,4,"4",4,4,4,$4,$4);
2> GO
(1 rows affected)
1> insert into SalesOrderDetail values (5,5,"5",5,5,5,$5,$5);
2> GO
(1 rows affected)
1> insert into SalesOrderDetail values (6,6,"6",6,6,6,$6,$6);
2> GO
(1 rows affected)
1> insert into SalesOrderDetail values (7,7,"7",7,7,7,$7,$7);
2> GO
(1 rows affected)
1> insert into SalesOrderDetail values (8,8,"8",8,8,8,$8,$8);
2> GO
(1 rows affected)
1> insert into SalesOrderDetail values (9,9,"9",9,9,9,$9,$9);
2> GO
(1 rows affected)
1>
2>
3> SELECT ProductID
4>   , Name
5> FROM Product AS P
6> WHERE 1 =
7>   (SELECT DISTINCT SpecialOfferID
8>   FROM SalesOrderDetail As SD
9>   WHERE P.ProductID = SD.ProductID)
10> GO
ProductID   Name
----------- -------------------------
          1 Product A
(1 rows affected)
1>
2> drop table Product;
3> GO
1>
2>
3> drop table SalesOrderDetail;
4> GO


DISTINCT can be used only once in a SELECT list, and it must precede all column names in that list.

the DISTINCT clause displays all rows where the combination of columns is distinct.
9> CREATE TABLE employee(
10>    id          INTEGER NOT NULL PRIMARY KEY,
11>    first_name  VARCHAR(10),
12>    last_name   VARCHAR(10),
13>    salary      DECIMAL(10,2),
14>    start_Date  DATETIME,
15>    region      VARCHAR(10),
16>    city        VARCHAR(20),
17>    managerid   INTEGER
18> );
19> GO
1> INSERT INTO employee VALUES (1, "Jason" ,  "Martin", 5890,"2005-03-22","North","Vancouver",3);
2> GO
(1 rows affected)
1> INSERT INTO employee VALUES (2, "Alison",  "Mathews",4789,"2003-07-21","South","Utown",4);
2> GO
(1 rows affected)
1> INSERT INTO employee VALUES (3, "James" ,  "Smith",  6678,"2001-12-01","North","Paris",5);
2> GO
(1 rows affected)
1> INSERT INTO employee VALUES (4, "Celia" ,  "Rice",   5567,"2006-03-03","South","London",6);
2> GO
(1 rows affected)
1> INSERT INTO employee VALUES (5, "Robert",  "Black",  4467,"2004-07-02","East","Newton",7);
2> GO
(1 rows affected)
1> INSERT INTO employee VALUES (6, "Linda" ,  "Green" , 6456,"2002-05-19","East","Calgary",8);
2> GO
(1 rows affected)
1> INSERT INTO employee VALUES (7, "David" ,  "Larry",  5345,"2008-03-18","West","New York",9);
2> GO
(1 rows affected)
1> INSERT INTO employee VALUES (8, "James" ,  "Cat",    4234,"2007-07-17","West","Regina",9);
2> GO
(1 rows affected)
1> INSERT INTO employee VALUES (9, "Joan"  ,  "Act",    6123,"2001-04-16","North","Toronto",10);
2> GO
(1 rows affected)
1>
2> select * from employee;
3> GO
id          first_name last_name  salary       start_Date              region     city                 managerid
----------- ---------- ---------- ------------ ----------------------- ---------- -------------------- -----------
          1 Jason      Martin          5890.00 2005-03-22 00:00:00.000 North      Vancouver                      3
          2 Alison     Mathews         4789.00 2003-07-21 00:00:00.000 South      Utown                          4
          3 James      Smith           6678.00 2001-12-01 00:00:00.000 North      Paris                          5
          4 Celia      Rice            5567.00 2006-03-03 00:00:00.000 South      London                         6
          5 Robert     Black           4467.00 2004-07-02 00:00:00.000 East       Newton                         7
          6 Linda      Green           6456.00 2002-05-19 00:00:00.000 East       Calgary                        8
          7 David      Larry           5345.00 2008-03-18 00:00:00.000 West       New York                       9
          8 James      Cat             4234.00 2007-07-17 00:00:00.000 West       Regina                         9
          9 Joan       Act             6123.00 2001-04-16 00:00:00.000 North      Toronto                       10
(9 rows affected)
1>
2> select distinct region from employee;
3> GO
region
----------
East
North
South
West
(4 rows affected)
1>
2>
3> drop table employee;
4> GO
1>


Distinct with two columns

3> IF OBJECT_ID("dbo.Sessions") IS NOT NULL
4>   DROP TABLE dbo.Sessions;
5> GO
1>
2> CREATE TABLE dbo.Sessions
3> (
4>   keycol    INT         NOT NULL IDENTITY PRIMARY KEY,
5>   app       VARCHAR(10) NOT NULL,
6>   usr       VARCHAR(10) NOT NULL,
7>   starttime DATETIME    NOT NULL,
8>   endtime   DATETIME    NOT NULL,
9>   CHECK(endtime > starttime)
10> );
11> GO
1>
2> INSERT INTO dbo.Sessions(app, usr, starttime, endtime) VALUES("app1", "user1", "20060212 08:30", "20060212 10:30");
3> INSERT INTO dbo.Sessions(app, usr, starttime, endtime) VALUES("app1", "user2", "20060212 08:30", "20060212 08:45");
4> INSERT INTO dbo.Sessions(app, usr, starttime, endtime) VALUES("app1", "user1", "20060212 09:00", "20060212 09:30");
5> INSERT INTO dbo.Sessions(app, usr, starttime, endtime) VALUES("app1", "user2", "20060212 09:15", "20060212 10:30");
6> INSERT INTO dbo.Sessions(app, usr, starttime, endtime) VALUES("app1", "user1", "20060212 09:15", "20060212 09:30");
7> INSERT INTO dbo.Sessions(app, usr, starttime, endtime) VALUES("app1", "user2", "20060212 10:30", "20060212 14:30");
8> INSERT INTO dbo.Sessions(app, usr, starttime, endtime) VALUES("app1", "user1", "20060212 10:45", "20060212 11:30");
9> INSERT INTO dbo.Sessions(app, usr, starttime, endtime) VALUES("app1", "user2", "20060212 11:00", "20060212 12:30");
10> INSERT INTO dbo.Sessions(app, usr, starttime, endtime) VALUES("app2", "user1", "20060212 08:30", "20060212 08:45");
11> INSERT INTO dbo.Sessions(app, usr, starttime, endtime) VALUES("app2", "user2", "20060212 09:00", "20060212 09:30");
12> INSERT INTO dbo.Sessions(app, usr, starttime, endtime) VALUES("app2", "user1", "20060212 11:45", "20060212 12:00");
13> INSERT INTO dbo.Sessions(app, usr, starttime, endtime) VALUES("app2", "user2", "20060212 12:30", "20060212 14:00");
14> INSERT INTO dbo.Sessions(app, usr, starttime, endtime) VALUES("app2", "user1", "20060212 12:45", "20060212 13:30");
15> INSERT INTO dbo.Sessions(app, usr, starttime, endtime) VALUES("app2", "user2", "20060212 13:00", "20060212 14:00");
16> INSERT INTO dbo.Sessions(app, usr, starttime, endtime) VALUES("app2", "user1", "20060212 14:00", "20060212 16:30");
17> INSERT INTO dbo.Sessions(app, usr, starttime, endtime) VALUES("app2", "user2", "20060212 15:30", "20060212 17:00");
18> GO
1>
2> SELECT DISTINCT app, starttime AS ts
3> FROM dbo.Sessions;
4> GO
app        ts
---------- -----------------------
app1       2006-02-12 08:30:00.000
app1       2006-02-12 09:00:00.000
app1       2006-02-12 09:15:00.000
app1       2006-02-12 10:30:00.000
app1       2006-02-12 10:45:00.000
app1       2006-02-12 11:00:00.000
app2       2006-02-12 08:30:00.000
app2       2006-02-12 09:00:00.000
app2       2006-02-12 11:45:00.000
app2       2006-02-12 12:30:00.000
app2       2006-02-12 12:45:00.000
app2       2006-02-12 13:00:00.000
app2       2006-02-12 14:00:00.000
app2       2006-02-12 15:30:00.000
1>
2> drop table sessions;
3> GO
1>


Using SELECT DISTINCT or a GROUP BY statement in the inner query

7> CREATE TABLE Product(
8>     ProductID               int                NOT NULL,
9>     Name                    nvarchar(25)       NOT NULL,
10>     ProductNumber           nvarchar(25)               ,
11>     Color                   nvarchar(15)       NULL,
12>     StandardCost            money              NOT NULL,
13>     Size                    nvarchar(5)        NULL,
14>     Weight                  decimal(8, 2)      NULL,
15>     ProductLine             nchar(20)           NULL,
16>     SellStartDate           datetime           NOT NULL,
17>     SellEndDate             datetime           NULL
18> )
19> GO
1> insert into Product values(1,"Product A", "1","Red",123.123,"1",1,"ProductLine A","1999-03-22","2000-03-22");
2> GO
(1 rows affected)
1> insert into Product values(2,"Product B", "2","Yellow",234.234,"1",3,"ProductLine B","2000-03-22","2001-03-22");
2> GO
(1 rows affected)
1> insert into Product values(3,"Product C", "3","Pink",345.345,"1",3,"ProductLine V","2001-09-22","2006-02-22");
2> GO
(1 rows affected)
1> insert into Product values(4,"Product D", "4","White",456.456,"1",4,"ProductLine D","2002-08-22","2006-03-22");
2> GO
(1 rows affected)
1> insert into Product values(5,"Product E", "5","Black",567.567,"1",5,"ProductLine E","2003-01-22","2003-04-22");
2> GO
(1 rows affected)
1> insert into Product values(6,"Product F", "6","Blue",678.678,"1",6,"ProductLine W","2004-02-22","2005-05-22");
2> GO
(1 rows affected)
1> insert into Product values(7,"Product G", "7","Drak",789.789,"1",7,"ProductLine Q","2005-03-22","2006-03-22");
2> GO
(1 rows affected)
1> insert into Product values(8,"Product H", "8","Gray",234.123,"1",8,"ProductLine F","2006-04-22","2006-09-22");
2> GO
(1 rows affected)
1> insert into Product values(9,"Product I", "9","Red",543.123,"1",9,"ProductLine R","2007-05-22","2008-03-22");
2> GO
(1 rows affected)
1> insert into Product values(0,"Product J", "0","Gold",765.123,"1",0,"ProductLine J","2008-06-22","2009-03-22");
2> GO
(1 rows affected)
1>
2>
3>
4> CREATE TABLE SalesOrderDetail(
5>     SalesOrderID            int                NOT NULL,
6>     SalesOrderDetailID      int                NOT NULL,
7>     CarrierTrackingNumber   nvarchar(25)       NULL,
8>     OrderQty                smallint           NOT NULL,
9>     ProductID               int                NOT NULL,
10>     SpecialOfferID          int                NOT NULL,
11>     UnitPrice               money              NOT NULL,
12>     UnitPriceDiscount       money              NOT NULL DEFAULT (0.0),
13>     LineTotal               AS ISNULL([UnitPrice] * (1.0 - [UnitPriceDiscount]) * [OrderQty], 0.0)
14> );
15> GO
1>
2> insert into SalesOrderDetail values (1,1,"1",1,1,1,$1,$1);
3> GO
(1 rows affected)
1> insert into SalesOrderDetail values (2,2,"2",2,2,2,$2,$2);
2> GO
(1 rows affected)
1> insert into SalesOrderDetail values (3,3,"3",3,3,3,$3,$3);
2> GO
(1 rows affected)
1> insert into SalesOrderDetail values (4,4,"4",4,4,4,$4,$4);
2> GO
(1 rows affected)
1> insert into SalesOrderDetail values (5,5,"5",5,5,5,$5,$5);
2> GO
(1 rows affected)
1> insert into SalesOrderDetail values (6,6,"6",6,6,6,$6,$6);
2> GO
(1 rows affected)
1> insert into SalesOrderDetail values (7,7,"7",7,7,7,$7,$7);
2> GO
(1 rows affected)
1> insert into SalesOrderDetail values (8,8,"8",8,8,8,$8,$8);
2> GO
(1 rows affected)
1> insert into SalesOrderDetail values (9,9,"9",9,9,9,$9,$9);
2> GO
(1 rows affected)
1>
2>
3> SELECT ProductID
4>   , Name
5> FROM Product AS P
6> WHERE 1 =
7>   (SELECT DISTINCT SpecialOfferID
8>   FROM SalesOrderDetail As SD
9>   WHERE P.ProductID = SD.ProductID)
10> GO
ProductID   Name
----------- -------------------------
          1 Product A
(1 rows affected)
1>
2> drop table Product;
3> GO
1>
2>
3> drop table SalesOrderDetail;
4> GO


When the DISTINCT keyword is used, all duplicate values are eliminated before the function COUNT is applied.

This form of COUNT does not count rows with null values for the column.
4>
5>
6> CREATE TABLE employee(
7>    id          INTEGER NOT NULL PRIMARY KEY,
8>    first_name  VARCHAR(10),
9>    last_name   VARCHAR(10),
10>    salary      DECIMAL(10,2),
11>    start_Date  DATETIME,
12>    region      VARCHAR(10),
13>    city        VARCHAR(20),
14>    managerid   INTEGER
15> );
16> GO
1> INSERT INTO employee VALUES (1, "Jason" ,  "Martin", 5890,"2005-03-22","North","Vancouver",3);
2> GO
(1 rows affected)
1> INSERT INTO employee VALUES (2, "Alison",  "Mathews",4789,"2003-07-21","South","Utown",4);
2> GO
(1 rows affected)
1> INSERT INTO employee VALUES (3, "James" ,  "Smith",  6678,"2001-12-01","North","Paris",5);
2> GO
(1 rows affected)
1> INSERT INTO employee VALUES (4, "Celia" ,  "Rice",   5567,"2006-03-03","South","London",6);
2> GO
(1 rows affected)
1> INSERT INTO employee VALUES (5, "Robert",  "Black",  4467,"2004-07-02","East","Newton",7);
2> GO
(1 rows affected)
1> INSERT INTO employee VALUES (6, "Linda" ,  "Green" , 6456,"2002-05-19","East","Calgary",8);
2> GO
(1 rows affected)
1> INSERT INTO employee VALUES (7, "David" ,  "Larry",  5345,"2008-03-18","West","New York",9);
2> GO
(1 rows affected)
1> INSERT INTO employee VALUES (8, "James" ,  "Cat",    4234,"2007-07-17","West","Regina",9);
2> GO
(1 rows affected)
1> INSERT INTO employee VALUES (9, "Joan"  ,  "Act",    6123,"2001-04-16","North","Toronto",10);
2> GO
(1 rows affected)
1>
2> select * from employee;
3> GO
id          first_name last_name  salary       start_Date              region     city                 managerid
----------- ---------- ---------- ------------ ----------------------- ---------- -------------------- -----------
          1 Jason      Martin          5890.00 2005-03-22 00:00:00.000 North      Vancouver                      3
          2 Alison     Mathews         4789.00 2003-07-21 00:00:00.000 South      Utown                          4
          3 James      Smith           6678.00 2001-12-01 00:00:00.000 North      Paris                          5
          4 Celia      Rice            5567.00 2006-03-03 00:00:00.000 South      London                         6
          5 Robert     Black           4467.00 2004-07-02 00:00:00.000 East       Newton                         7
          6 Linda      Green           6456.00 2002-05-19 00:00:00.000 East       Calgary                        8
          7 David      Larry           5345.00 2008-03-18 00:00:00.000 West       New York                       9
          8 James      Cat             4234.00 2007-07-17 00:00:00.000 West       Regina                         9
          9 Joan       Act             6123.00 2001-04-16 00:00:00.000 North      Toronto                       10
(9 rows affected)
1>
2> SELECT region, COUNT(DISTINCT region) job_count
3>        FROM employee
4>        GROUP BY region
5> GO
region     job_count
---------- -----------
East                 1
North                1
South                1
West                 1
(4 rows affected)
1> drop table employee;
2> GO