-
Notifications
You must be signed in to change notification settings - Fork 0
/
FindandRemoveDuplicates.sql
65 lines (53 loc) · 1.78 KB
/
FindandRemoveDuplicates.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
/* Finds duplicates in a table and returns the distinct key column of the duplicate
OurColumn is where we put our unique column identifier
OurTableName is where we put our table that has duplicates
*/
;WITH DuplicateFinder (OurColumn,DupCounter)
AS
(SELECT OurColumn
,ROW_NUMBER() OVER(PARTITION BY OurColumn ORDER BY OurColumn) AS DupCounter
FROM OurTableName)
SELECT * INTO #investigate FROM DuplicateFinder
WHERE DupCounter > 1
SELECT DISTINCT OurColumn
FROM #investigate
/* Deletes duplicates in a table; this assumes that our table has duplicates across all columns
OurColumn is where we put our unique column identifier
OurTableName is where we put our table that has duplicates
*/
;WITH DuplicateRemover (OurColumn,DupCounter)
AS
(SELECT OurColumn
,ROW_NUMBER() OVER(PARTITION BY OurColumn ORDER BY OurColumn) AS DupCounter
FROM OurTableName)
DELETE FROM DuplicateRemover
WHERE DupCounter > 1
/* EXAMPLE */
-- Creates a temp table
DECLARE @clean TABLE (
A INT,
B INT,
Date DATETIME
)
-- Insert values into the temp table
INSERT INTO @clean VALUES(1,1,getdate())
INSERT INTO @clean VALUES(1,1,DATEADD(DD,-2,getdate()))
INSERT INTO @clean VALUES(1,1,DATEADD(DD,-5,getdate()))
INSERT INTO @clean VALUES(1,2,DATEADD(DD,-10,getdate()))
INSERT INTO @clean VALUES(1,2,DATEADD(DD,-15,getdate()))
INSERT INTO @clean VALUES(3,1,getdate())
-- Shows the full amount of values in the temp table
SELECT *
FROM @clean
-- Deletes the duplicate B's (notice that there were 3 1s and 2 2s in the B column)
;WITH DuplicateRemover (A,B,DupCounter)
AS
(SELECT A
,B
,ROW_NUMBER() OVER(PARTITION BY A,B ORDER BY Date DESC) AS DupCounter
FROM @clean)
DELETE FROM DuplicateRemover
WHERE DupCounter > 1
-- Temp table without the duplicates
SELECT *
FROM @clean