SQL Server/T-SQL Tutorial/Query/DISTINCT

Материал из SQL эксперт
Перейти к: навигация, поиск

A SELECT statement that eliminates duplicate rows

   <source lang="sql">

DISTINCT keyword prevents duplicate (identical) rows from being included in the result set. It also causes the result set to be sorted by its first column. To use the DISTINCT keyword, code it immediately after the SELECT keyword.

13> 14> 15> create table Bankers( 16> BankerID Integer, 17> BankerName VARCHAR(20), 18> BankerContactLName VARCHAR(20), 19> BankerContactFName VARCHAR(20), 20> BankerCity VARCHAR(20), 21> BankerState VARCHAR(20), 22> BankerZipCode VARCHAR(20), 23> BankerPhone VARCHAR(20) 24> ) 25> GO 1> 2> insert into Bankers values (1, "ABC Inc.","Joe","Smith","Vancouver","BC","11111","111-111-1111"); 3> GO (1 rows affected) 1> insert into Bankers values (2, "DEF Inc.","Red","Rice", "New York", "DE","22222","222-222-2222"); 2> GO (1 rows affected) 1> insert into Bankers values (3, "HJI Inc.","Kit","Cat", "Paris", "CA","33333","333-333-3333"); 2> GO (1 rows affected) 1> insert into Bankers values (4, "QWE Inc.","Git","Black","Regina", "ER","44444","444-444-4444"); 2> GO (1 rows affected) 1> insert into Bankers values (5, "RTY Inc.","Wil","Lee", "Toronto", "YU","55555","555-555-5555"); 2> GO (1 rows affected) 1> insert into Bankers values (6, "YUI Inc.","Ted","Larry","Calgary", "TY","66666","666-666-6666"); 2> GO (1 rows affected) 1> insert into Bankers values (7, "OIP Inc.","Yam","Act", "San Franc","FG","77777","777-777-7777"); 2> GO (1 rows affected) 1> insert into Bankers values (8, "SAD Inc.","Hit","Eat", "Orland", "PO","88888","888-888-8888"); 2> GO (1 rows affected) 1> insert into Bankers values (9, "DFG Inc.","Sad","Lee", "Wisler", "PL","99999","999-999-9999"); 2> GO (1 rows affected) 1> insert into Bankers values (0, "GHJ Inc.","Bit","Lee", "Ticker", "MN","00000","000-000-0000"); 2> GO (1 rows affected) 1> 2> 3> SELECT DISTINCT BankerCity, BankerState 4> FROM Bankers 5> GO BankerCity BankerState


--------------------

Calgary TY New York DE Orland PO Paris CA Regina ER San Franc FG Ticker MN Toronto YU Vancouver BC Wisler PL (10 rows affected) 1> 2> drop table Bankers; 3> GO 1> 2></source>


A summary query that uses the DISTINCT keyword

   <source lang="sql">

4> 5> 6> create table Billings ( 7> BankerID INTEGER, 8> BillingNumber INTEGER, 9> BillingDate datetime, 10> BillingTotal INTEGER, 11> TermsID INTEGER, 12> BillingDueDate datetime , 13> PaymentTotal INTEGER, 14> CreditTotal INTEGER 15> 16> ); 17> GO 1> 2> INSERT INTO Billings VALUES (1, 1, "2005-01-22", 165, 1,"2005-04-22",123,321); 3> GO (1 rows affected) 1> INSERT INTO Billings VALUES (2, 2, "2001-02-21", 165, 1,"2002-02-22",123,321); 2> GO (1 rows affected) 1> INSERT INTO Billings VALUES (3, 3, "2003-05-02", 165, 1,"2005-04-12",123,321); 2> GO (1 rows affected) 1> INSERT INTO Billings VALUES (4, 4, "1999-03-12", 165, 1,"2005-04-18",123,321); 2> GO (1 rows affected) 1> INSERT INTO Billings VALUES (5, 5, "2000-04-23", 165, 1,"2005-04-17",123,321); 2> GO (1 rows affected) 1> INSERT INTO Billings VALUES (6, 6, "2001-06-14", 165, 1,"2005-04-18",123,321); 2> GO (1 rows affected) 1> INSERT INTO Billings VALUES (7, 7, "2002-07-15", 165, 1,"2005-04-19",123,321); 2> GO (1 rows affected) 1> INSERT INTO Billings VALUES (8, 8, "2003-08-16", 165, 1,"2005-04-20",123,321); 2> GO (1 rows affected) 1> INSERT INTO Billings VALUES (9, 9, "2004-09-17", 165, 1,"2005-04-21",123,321); 2> GO (1 rows affected) 1> INSERT INTO Billings VALUES (0, 0, "2005-10-18", 165, 1,"2005-04-22",123,321); 2> GO (1 rows affected) 1> 2> 3> SELECT COUNT(DISTINCT BankerID) AS NumberOfBankers, 4> COUNT(BankerID) AS NumberOfBillings, 5> AVG(BillingTotal) AS AverageBillingAmount, 6> SUM(BillingTotal) AS TotalBillingAmount 7> FROM Billings 8> WHERE BillingDate > "2002-01-01" 9> GO NumberOfBankers NumberOfBillings AverageBillingAmount TotalBillingAmount


---------------- -------------------- ------------------
             6                6                  165                990

(1 rows affected) 1> 2> 3> 4> drop table Billings; 5> GO 1> 2></source>


Correlated subquery using Distinct

   <source lang="sql">

7> CREATE TABLE Product( 8> ProductID int NOT NULL, 9> Name nvarchar(25) NOT NULL, 10> ProductNumber nvarchar(25) , 11> Color nvarchar(15) NULL, 12> StandardCost money NOT NULL, 13> Size nvarchar(5) NULL, 14> Weight decimal(8, 2) NULL, 15> ProductLine nchar(20) NULL, 16> SellStartDate datetime NOT NULL, 17> SellEndDate datetime NULL 18> ) 19> GO 1> insert into Product values(1,"Product A", "1","Red",123.123,"1",1,"ProductLine A","1999-03-22","2000-03-22"); 2> GO (1 rows affected) 1> insert into Product values(2,"Product B", "2","Yellow",234.234,"1",3,"ProductLine B","2000-03-22","2001-03-22"); 2> GO (1 rows affected) 1> insert into Product values(3,"Product C", "3","Pink",345.345,"1",3,"ProductLine V","2001-09-22","2006-02-22"); 2> GO (1 rows affected) 1> insert into Product values(4,"Product D", "4","White",456.456,"1",4,"ProductLine D","2002-08-22","2006-03-22"); 2> GO (1 rows affected) 1> insert into Product values(5,"Product E", "5","Black",567.567,"1",5,"ProductLine E","2003-01-22","2003-04-22"); 2> GO (1 rows affected) 1> insert into Product values(6,"Product F", "6","Blue",678.678,"1",6,"ProductLine W","2004-02-22","2005-05-22"); 2> GO (1 rows affected) 1> insert into Product values(7,"Product G", "7","Drak",789.789,"1",7,"ProductLine Q","2005-03-22","2006-03-22"); 2> GO (1 rows affected) 1> insert into Product values(8,"Product H", "8","Gray",234.123,"1",8,"ProductLine F","2006-04-22","2006-09-22"); 2> GO (1 rows affected) 1> insert into Product values(9,"Product I", "9","Red",543.123,"1",9,"ProductLine R","2007-05-22","2008-03-22"); 2> GO (1 rows affected) 1> insert into Product values(0,"Product J", "0","Gold",765.123,"1",0,"ProductLine J","2008-06-22","2009-03-22"); 2> GO (1 rows affected) 1> 2> 3> 4> CREATE TABLE SalesOrderDetail( 5> SalesOrderID int NOT NULL, 6> SalesOrderDetailID int NOT NULL, 7> CarrierTrackingNumber nvarchar(25) NULL, 8> OrderQty smallint NOT NULL, 9> ProductID int NOT NULL, 10> SpecialOfferID int NOT NULL, 11> UnitPrice money NOT NULL, 12> UnitPriceDiscount money NOT NULL DEFAULT (0.0), 13> LineTotal AS ISNULL([UnitPrice] * (1.0 - [UnitPriceDiscount]) * [OrderQty], 0.0) 14> ); 15> GO 1> 2> insert into SalesOrderDetail values (1,1,"1",1,1,1,$1,$1); 3> GO (1 rows affected) 1> insert into SalesOrderDetail values (2,2,"2",2,2,2,$2,$2); 2> GO (1 rows affected) 1> insert into SalesOrderDetail values (3,3,"3",3,3,3,$3,$3); 2> GO (1 rows affected) 1> insert into SalesOrderDetail values (4,4,"4",4,4,4,$4,$4); 2> GO (1 rows affected) 1> insert into SalesOrderDetail values (5,5,"5",5,5,5,$5,$5); 2> GO (1 rows affected) 1> insert into SalesOrderDetail values (6,6,"6",6,6,6,$6,$6); 2> GO (1 rows affected) 1> insert into SalesOrderDetail values (7,7,"7",7,7,7,$7,$7); 2> GO (1 rows affected) 1> insert into SalesOrderDetail values (8,8,"8",8,8,8,$8,$8); 2> GO (1 rows affected) 1> insert into SalesOrderDetail values (9,9,"9",9,9,9,$9,$9); 2> GO (1 rows affected) 1> 2> 3> SELECT ProductID 4> , Name 5> FROM Product AS P 6> WHERE 1 = 7> (SELECT DISTINCT SpecialOfferID 8> FROM SalesOrderDetail As SD 9> WHERE P.ProductID = SD.ProductID) 10> GO ProductID Name


-------------------------
         1 Product A

(1 rows affected) 1> 2> drop table Product; 3> GO 1> 2> 3> drop table SalesOrderDetail; 4> GO</source>


DISTINCT can be used only once in a SELECT list, and it must precede all column names in that list.

   <source lang="sql">

the DISTINCT clause displays all rows where the combination of columns is distinct. 9> CREATE TABLE employee( 10> id INTEGER NOT NULL PRIMARY KEY, 11> first_name VARCHAR(10), 12> last_name VARCHAR(10), 13> salary DECIMAL(10,2), 14> start_Date DATETIME, 15> region VARCHAR(10), 16> city VARCHAR(20), 17> managerid INTEGER 18> ); 19> GO 1> INSERT INTO employee VALUES (1, "Jason" , "Martin", 5890,"2005-03-22","North","Vancouver",3); 2> GO (1 rows affected) 1> INSERT INTO employee VALUES (2, "Alison", "Mathews",4789,"2003-07-21","South","Utown",4); 2> GO (1 rows affected) 1> INSERT INTO employee VALUES (3, "James" , "Smith", 6678,"2001-12-01","North","Paris",5); 2> GO (1 rows affected) 1> INSERT INTO employee VALUES (4, "Celia" , "Rice", 5567,"2006-03-03","South","London",6); 2> GO (1 rows affected) 1> INSERT INTO employee VALUES (5, "Robert", "Black", 4467,"2004-07-02","East","Newton",7); 2> GO (1 rows affected) 1> INSERT INTO employee VALUES (6, "Linda" , "Green" , 6456,"2002-05-19","East","Calgary",8); 2> GO (1 rows affected) 1> INSERT INTO employee VALUES (7, "David" , "Larry", 5345,"2008-03-18","West","New York",9); 2> GO (1 rows affected) 1> INSERT INTO employee VALUES (8, "James" , "Cat", 4234,"2007-07-17","West","Regina",9); 2> GO (1 rows affected) 1> INSERT INTO employee VALUES (9, "Joan" , "Act", 6123,"2001-04-16","North","Toronto",10); 2> GO (1 rows affected) 1> 2> select * from employee; 3> GO id first_name last_name salary start_Date region city managerid


---------- ---------- ------------ ----------------------- ---------- -------------------- -----------
         1 Jason      Martin          5890.00 2005-03-22 00:00:00.000 North      Vancouver                      3
         2 Alison     Mathews         4789.00 2003-07-21 00:00:00.000 South      Utown                          4
         3 James      Smith           6678.00 2001-12-01 00:00:00.000 North      Paris                          5
         4 Celia      Rice            5567.00 2006-03-03 00:00:00.000 South      London                         6
         5 Robert     Black           4467.00 2004-07-02 00:00:00.000 East       Newton                         7
         6 Linda      Green           6456.00 2002-05-19 00:00:00.000 East       Calgary                        8
         7 David      Larry           5345.00 2008-03-18 00:00:00.000 West       New York                       9
         8 James      Cat             4234.00 2007-07-17 00:00:00.000 West       Regina                         9
         9 Joan       Act             6123.00 2001-04-16 00:00:00.000 North      Toronto                       10

(9 rows affected) 1> 2> select distinct region from employee; 3> GO region


East North South West (4 rows affected) 1> 2> 3> drop table employee; 4> GO 1></source>


Distinct with two columns

   <source lang="sql">

3> IF OBJECT_ID("dbo.Sessions") IS NOT NULL 4> DROP TABLE dbo.Sessions; 5> GO 1> 2> CREATE TABLE dbo.Sessions 3> ( 4> keycol INT NOT NULL IDENTITY PRIMARY KEY, 5> app VARCHAR(10) NOT NULL, 6> usr VARCHAR(10) NOT NULL, 7> starttime DATETIME NOT NULL, 8> endtime DATETIME NOT NULL, 9> CHECK(endtime > starttime) 10> ); 11> GO 1> 2> INSERT INTO dbo.Sessions(app, usr, starttime, endtime) VALUES("app1", "user1", "20060212 08:30", "20060212 10:30"); 3> INSERT INTO dbo.Sessions(app, usr, starttime, endtime) VALUES("app1", "user2", "20060212 08:30", "20060212 08:45"); 4> INSERT INTO dbo.Sessions(app, usr, starttime, endtime) VALUES("app1", "user1", "20060212 09:00", "20060212 09:30"); 5> INSERT INTO dbo.Sessions(app, usr, starttime, endtime) VALUES("app1", "user2", "20060212 09:15", "20060212 10:30"); 6> INSERT INTO dbo.Sessions(app, usr, starttime, endtime) VALUES("app1", "user1", "20060212 09:15", "20060212 09:30"); 7> INSERT INTO dbo.Sessions(app, usr, starttime, endtime) VALUES("app1", "user2", "20060212 10:30", "20060212 14:30"); 8> INSERT INTO dbo.Sessions(app, usr, starttime, endtime) VALUES("app1", "user1", "20060212 10:45", "20060212 11:30"); 9> INSERT INTO dbo.Sessions(app, usr, starttime, endtime) VALUES("app1", "user2", "20060212 11:00", "20060212 12:30"); 10> INSERT INTO dbo.Sessions(app, usr, starttime, endtime) VALUES("app2", "user1", "20060212 08:30", "20060212 08:45"); 11> INSERT INTO dbo.Sessions(app, usr, starttime, endtime) VALUES("app2", "user2", "20060212 09:00", "20060212 09:30"); 12> INSERT INTO dbo.Sessions(app, usr, starttime, endtime) VALUES("app2", "user1", "20060212 11:45", "20060212 12:00"); 13> INSERT INTO dbo.Sessions(app, usr, starttime, endtime) VALUES("app2", "user2", "20060212 12:30", "20060212 14:00"); 14> INSERT INTO dbo.Sessions(app, usr, starttime, endtime) VALUES("app2", "user1", "20060212 12:45", "20060212 13:30"); 15> INSERT INTO dbo.Sessions(app, usr, starttime, endtime) VALUES("app2", "user2", "20060212 13:00", "20060212 14:00"); 16> INSERT INTO dbo.Sessions(app, usr, starttime, endtime) VALUES("app2", "user1", "20060212 14:00", "20060212 16:30"); 17> INSERT INTO dbo.Sessions(app, usr, starttime, endtime) VALUES("app2", "user2", "20060212 15:30", "20060212 17:00"); 18> GO 1> 2> SELECT DISTINCT app, starttime AS ts 3> FROM dbo.Sessions; 4> GO app ts


-----------------------

app1 2006-02-12 08:30:00.000 app1 2006-02-12 09:00:00.000 app1 2006-02-12 09:15:00.000 app1 2006-02-12 10:30:00.000 app1 2006-02-12 10:45:00.000 app1 2006-02-12 11:00:00.000 app2 2006-02-12 08:30:00.000 app2 2006-02-12 09:00:00.000 app2 2006-02-12 11:45:00.000 app2 2006-02-12 12:30:00.000 app2 2006-02-12 12:45:00.000 app2 2006-02-12 13:00:00.000 app2 2006-02-12 14:00:00.000 app2 2006-02-12 15:30:00.000 1> 2> drop table sessions; 3> GO 1></source>


Using SELECT DISTINCT or a GROUP BY statement in the inner query

   <source lang="sql">

7> CREATE TABLE Product( 8> ProductID int NOT NULL, 9> Name nvarchar(25) NOT NULL, 10> ProductNumber nvarchar(25) , 11> Color nvarchar(15) NULL, 12> StandardCost money NOT NULL, 13> Size nvarchar(5) NULL, 14> Weight decimal(8, 2) NULL, 15> ProductLine nchar(20) NULL, 16> SellStartDate datetime NOT NULL, 17> SellEndDate datetime NULL 18> ) 19> GO 1> insert into Product values(1,"Product A", "1","Red",123.123,"1",1,"ProductLine A","1999-03-22","2000-03-22"); 2> GO (1 rows affected) 1> insert into Product values(2,"Product B", "2","Yellow",234.234,"1",3,"ProductLine B","2000-03-22","2001-03-22"); 2> GO (1 rows affected) 1> insert into Product values(3,"Product C", "3","Pink",345.345,"1",3,"ProductLine V","2001-09-22","2006-02-22"); 2> GO (1 rows affected) 1> insert into Product values(4,"Product D", "4","White",456.456,"1",4,"ProductLine D","2002-08-22","2006-03-22"); 2> GO (1 rows affected) 1> insert into Product values(5,"Product E", "5","Black",567.567,"1",5,"ProductLine E","2003-01-22","2003-04-22"); 2> GO (1 rows affected) 1> insert into Product values(6,"Product F", "6","Blue",678.678,"1",6,"ProductLine W","2004-02-22","2005-05-22"); 2> GO (1 rows affected) 1> insert into Product values(7,"Product G", "7","Drak",789.789,"1",7,"ProductLine Q","2005-03-22","2006-03-22"); 2> GO (1 rows affected) 1> insert into Product values(8,"Product H", "8","Gray",234.123,"1",8,"ProductLine F","2006-04-22","2006-09-22"); 2> GO (1 rows affected) 1> insert into Product values(9,"Product I", "9","Red",543.123,"1",9,"ProductLine R","2007-05-22","2008-03-22"); 2> GO (1 rows affected) 1> insert into Product values(0,"Product J", "0","Gold",765.123,"1",0,"ProductLine J","2008-06-22","2009-03-22"); 2> GO (1 rows affected) 1> 2> 3> 4> CREATE TABLE SalesOrderDetail( 5> SalesOrderID int NOT NULL, 6> SalesOrderDetailID int NOT NULL, 7> CarrierTrackingNumber nvarchar(25) NULL, 8> OrderQty smallint NOT NULL, 9> ProductID int NOT NULL, 10> SpecialOfferID int NOT NULL, 11> UnitPrice money NOT NULL, 12> UnitPriceDiscount money NOT NULL DEFAULT (0.0), 13> LineTotal AS ISNULL([UnitPrice] * (1.0 - [UnitPriceDiscount]) * [OrderQty], 0.0) 14> ); 15> GO 1> 2> insert into SalesOrderDetail values (1,1,"1",1,1,1,$1,$1); 3> GO (1 rows affected) 1> insert into SalesOrderDetail values (2,2,"2",2,2,2,$2,$2); 2> GO (1 rows affected) 1> insert into SalesOrderDetail values (3,3,"3",3,3,3,$3,$3); 2> GO (1 rows affected) 1> insert into SalesOrderDetail values (4,4,"4",4,4,4,$4,$4); 2> GO (1 rows affected) 1> insert into SalesOrderDetail values (5,5,"5",5,5,5,$5,$5); 2> GO (1 rows affected) 1> insert into SalesOrderDetail values (6,6,"6",6,6,6,$6,$6); 2> GO (1 rows affected) 1> insert into SalesOrderDetail values (7,7,"7",7,7,7,$7,$7); 2> GO (1 rows affected) 1> insert into SalesOrderDetail values (8,8,"8",8,8,8,$8,$8); 2> GO (1 rows affected) 1> insert into SalesOrderDetail values (9,9,"9",9,9,9,$9,$9); 2> GO (1 rows affected) 1> 2> 3> SELECT ProductID 4> , Name 5> FROM Product AS P 6> WHERE 1 = 7> (SELECT DISTINCT SpecialOfferID 8> FROM SalesOrderDetail As SD 9> WHERE P.ProductID = SD.ProductID) 10> GO ProductID Name


-------------------------
         1 Product A

(1 rows affected) 1> 2> drop table Product; 3> GO 1> 2> 3> drop table SalesOrderDetail; 4> GO</source>


When the DISTINCT keyword is used, all duplicate values are eliminated before the function COUNT is applied.

   <source lang="sql">

This form of COUNT does not count rows with null values for the column. 4> 5> 6> CREATE TABLE employee( 7> id INTEGER NOT NULL PRIMARY KEY, 8> first_name VARCHAR(10), 9> last_name VARCHAR(10), 10> salary DECIMAL(10,2), 11> start_Date DATETIME, 12> region VARCHAR(10), 13> city VARCHAR(20), 14> managerid INTEGER 15> ); 16> GO 1> INSERT INTO employee VALUES (1, "Jason" , "Martin", 5890,"2005-03-22","North","Vancouver",3); 2> GO (1 rows affected) 1> INSERT INTO employee VALUES (2, "Alison", "Mathews",4789,"2003-07-21","South","Utown",4); 2> GO (1 rows affected) 1> INSERT INTO employee VALUES (3, "James" , "Smith", 6678,"2001-12-01","North","Paris",5); 2> GO (1 rows affected) 1> INSERT INTO employee VALUES (4, "Celia" , "Rice", 5567,"2006-03-03","South","London",6); 2> GO (1 rows affected) 1> INSERT INTO employee VALUES (5, "Robert", "Black", 4467,"2004-07-02","East","Newton",7); 2> GO (1 rows affected) 1> INSERT INTO employee VALUES (6, "Linda" , "Green" , 6456,"2002-05-19","East","Calgary",8); 2> GO (1 rows affected) 1> INSERT INTO employee VALUES (7, "David" , "Larry", 5345,"2008-03-18","West","New York",9); 2> GO (1 rows affected) 1> INSERT INTO employee VALUES (8, "James" , "Cat", 4234,"2007-07-17","West","Regina",9); 2> GO (1 rows affected) 1> INSERT INTO employee VALUES (9, "Joan" , "Act", 6123,"2001-04-16","North","Toronto",10); 2> GO (1 rows affected) 1> 2> select * from employee; 3> GO id first_name last_name salary start_Date region city managerid


---------- ---------- ------------ ----------------------- ---------- -------------------- -----------
         1 Jason      Martin          5890.00 2005-03-22 00:00:00.000 North      Vancouver                      3
         2 Alison     Mathews         4789.00 2003-07-21 00:00:00.000 South      Utown                          4
         3 James      Smith           6678.00 2001-12-01 00:00:00.000 North      Paris                          5
         4 Celia      Rice            5567.00 2006-03-03 00:00:00.000 South      London                         6
         5 Robert     Black           4467.00 2004-07-02 00:00:00.000 East       Newton                         7
         6 Linda      Green           6456.00 2002-05-19 00:00:00.000 East       Calgary                        8
         7 David      Larry           5345.00 2008-03-18 00:00:00.000 West       New York                       9
         8 James      Cat             4234.00 2007-07-17 00:00:00.000 West       Regina                         9
         9 Joan       Act             6123.00 2001-04-16 00:00:00.000 North      Toronto                       10

(9 rows affected) 1> 2> SELECT region, COUNT(DISTINCT region) job_count 3> FROM employee 4> GROUP BY region 5> GO region job_count


-----------

East 1 North 1 South 1 West 1 (4 rows affected) 1> drop table employee; 2> GO</source>