TSQL script - CTE to remove duplicate rows

In this post, let us see how to remove duplicate records using common table expression.
Another example in this post, shows how to remove duplicate values in the delimited string.

Example 1:




CREATE TABLE #Table (C1 INT,C2 VARCHAR(10))

INSERT INTO #Table VALUES (1,'SQL Server')
INSERT INTO #Table VALUES (1,'SQL Server')
INSERT INTO #Table VALUES (2,'Oracle')
SELECT * FROM #Table

;WITH Delete_Duplicate_Row_cte
     AS (SELECT ROW_NUMBER()OVER(PARTITION BY C1, C2 ORDER BY C1,C2) ROW_NUM,*
         FROM   #Table ) 
DELETE FROM Delete_Duplicate_Row_cte WHERE  ROW_NUM > 1

SELECT * FROM #Table

--DROP TABLE #Table






Example 2:





DECLARE @TMP1 TABLE (ID INT ,Column1 VARCHAR(100))
DECLARE @TMP TABLE (ID INT IDENTITY(1,1),Column1 VARCHAR(100))
INSERT INTO @TMP SELECT 'Oracle|Oracle|SQL Server'
INSERT INTO @TMP SELECT 'DB2|DB2|SQL Server'

--splitting

;WITH cte AS (
    SELECT 
        ID,
        CAST('' + REPLACE(Column1, '|', '') + '' AS XML) AS Column1
    FROM @TMP
)
INSERT INTO @TMP1
SELECT 
    ID,
    x.i.value('.', 'VARCHAR(10)') 
FROM cte
CROSS APPLY Column1.nodes('//i') x(i)

--Deleting duplicate

;WITH Delete_Duplicate_Row
     AS (SELECT ROW_NUMBER()OVER(PARTITION BY ID,Column1 ORDER BY Column1) ROW_NUM,*
         FROM   @TMP1 ) 
DELETE FROM Delete_Duplicate_Row WHERE  ROW_NUM > 1

--Concatenating

SELECT MAX(ID) ID, 
       STUFF((SELECT ' | ' + Column1
              FROM   @TMP1 X WHERE X.ID = Y.ID GROUP  BY ID,Column1
              FOR XML PATH('')), 1, 2, '') CSV                
FROM   @TMP1 Y
GROUP  BY ID





See Also:

1 comment:

Rishi Daftary said...

Interesting post and I too write many times on SQL Server blog. Would love to follow you. You can visit me @ www.techknackblogs.com