Tuesday, July 1, 2008

How to Identify and Delete Duplicate SQL Server Records

How to Identify and Delete Duplicate SQL Server Records: "How to Identify and Delete Duplicate SQL Server Records"

/**********************************************Example of a simple duplicate data delete script.**********************************************/
/**********************************************Set up test environment**********************************************/SET NOCOUNT ON
--Create test tableIF OBJECT_ID('tDupData') IS NOT NULLDROP TABLE tDupDataGO
CREATE TABLE tDupData(lngCompanyID INTEGER ,strCompanyName VARCHAR(20),strAddress VARCHAR(10),dtmModified DATETIME)
--Create test dataINSERT INTO tDupData VALUES (1,'CompanyOne','Address1','01/15/2003')INSERT INTO tDupData VALUES (2,'CompanyTwo','Address2','01/15/2003')INSERT INTO tDupData VALUES (3,'CompanyThree','Address3','01/15/2003')INSERT INTO tDupData VALUES (2,'CompanyTwo','Address','01/16/2003') INSERT INTO tDupData VALUES (3,'CompanyThree','Address','01/16/2003')
-- Dup Data INSERT INTO tDupData VALUES (1,'CompanyOne','Address1','01/15/2003') GO
/**********************************************Finish set up**********************************************/
/**********************************************Simple duplicate data**********************************************/select * from tDupData--Create temp table to hold duplicate dataCREATE TABLE #tempduplicatedata(lngCompanyID INTEGER ,strCompanyName VARCHAR(20),strAddress VARCHAR(10),dtmModified DATETIME)
--Identify and save dup data into temp tableINSERT INTO #tempduplicatedataSELECT * FROM tDupDataGROUP BY lngCompanyID,strCompanyName,strAddress, dtmModifiedHAVING COUNT(*) > 1
--Confirm number of dup rowsSELECT @@ROWCOUNT AS 'Number of Duplicate Rows'
--Delete dup from original tableDELETE FROM tDupData FROM tDupDataINNER JOIN #tempduplicatedataON tDupData.lngCompanyID = #tempduplicatedata.lngCompanyIDAND tDupData.strCompanyName = #tempduplicatedata.strCompanyNameAND tDupData.strAddress = #tempduplicatedata.strAddressAND tDupData.dtmModified = #tempduplicatedata.dtmModified
--Insert the delete data backINSERT INTO tDupDataSELECT * FROM #tempduplicatedata
--Check for dup data.SELECT * FROM tDupDataGROUP BY lngCompanyID,strCompanyName,strAddress,dtmModifiedHAVING COUNT(*) > 1
--Check tableSELECT * FROM tDupData
--Drop temp tableDROP TABLE #tempduplicatedata
--drop test tableIF OBJECT_ID('tDupData') IS NOT NULLDROP TABLE tDupDataGO

No comments: