How to get SQL to remove non-numeric characters from a field

So I was working with phone numbers recently and the field was filthy. I mean there was absolutely no data entry validation whatsoever. Everything has been entered into this field from email addresses entered accidentally to phone numbers with little notes like (This is Steve’s number) to just random characters. There are millions of legitimate numbers in this field so in order to make the field workable the junk has to be taken out first.

A colleague of mine was kind enough to share this script with me for replacing characters. I’ve found it to be very beneficial as it is much more efficient than the solution I had been using of replacing all junk characters in a column row by row. This approach is set based replacing a single unwanted character from an entire column at a time.

CREATE TABLE #temp ([FreeText] VARCHAR(50) NOT NULL)

INSERT #temp
VALUES ('Hi There!!! 222  10 - 3476')

INSERT #temp
VALUES ('p@yahoo.com $%*$& 1234567 $%^&^')

DECLARE @DisallowedCharsASCIICodeMin TINYINT
	,@DisallowedCharsASCIICodeMax TINYINT
	,@ASCIICode TINYINT
	,@Char CHAR(1)
	,@ReplaceChar CHAR(1)
DECLARE @DisallowedChars TABLE (
	[ASCIICode] TINYINT NOT NULL
	,[Char] CHAR(1) NOT NULL
	,[ReplaceChar] CHAR(1) NULL
	,[Replaced] BIT NOT NULL DEFAULT 0
	,PRIMARY KEY ([ASCIICode])
	)

SELECT @DisallowedCharsASCIICodeMin = 32
	,@DisallowedCharsASCIICodeMax = 126

SET @ASCIICode = @DisallowedCharsASCIICodeMin

WHILE @ASCIICode <= @DisallowedCharsASCIICodeMax
BEGIN
	INSERT @DisallowedChars (
		[ASCIICode]
		,[Char]
		)
	VALUES (
		@ASCIICode
		,CHAR(@ASCIICode)
		)

	SET @ASCIICode = @ASCIICode + 1
END

DELETE @DisallowedChars
WHERE [Char] IN (
		'0'
		,'1'
		,'2'
		,'3'
		,'4'
		,'5'
		,'6'
		,'7'
		,'8'
		,'9'
		)

WHILE EXISTS (
		SELECT 1
		FROM @DisallowedChars
		WHERE [Replaced] = 0
		)
BEGIN
	SELECT TOP 1 @ASCIICode = [ASCIICode]
		,@Char = [Char]
	FROM @DisallowedChars
	WHERE [Replaced] = 0

	UPDATE #temp
	SET [FreeText] = CASE 
			WHEN @ReplaceChar IS NULL
				THEN REPLACE([FreeText], @Char, '')
			ELSE REPLACE([FreeText], @Char, @ReplaceChar)
			END
	WHERE [FreeText] LIKE '%' + @Char + '%'

	UPDATE @DisallowedChars
	SET [Replaced] = 1
	WHERE [ASCIICode] = @ASCIICode
END

SELECT *
FROM #temp

DROP TABLE #temp
GO

 

For my needs I’ve turned this logic into a stored procedure. I’ve also made a few changes. Firstly I’ve expanded the range of charters to exclude from unprintable characters to additional Unicode characters. I’ve also removed the option to add in a replace character as I only want non-numeric characters to be removed from the field. This allows me to also remove the case statement.

-- Drop stored procedure if it already exists
IF EXISTS (
		SELECT *
		FROM INFORMATION_SCHEMA.ROUTINES
		WHERE SPECIFIC_SCHEMA = N'dbo'
			AND SPECIFIC_NAME = N'GetNumeric'
		)
	DROP PROCEDURE dbo.GetNumeric
GO
-- Create Procedure
CREATE PROCEDURE dbo.GetNumeric @TableName VARCHAR(255)
	,@ColumnName VARCHAR(255)
AS
SET NOCOUNT ON

DECLARE @sql AS VARCHAR(500)

CREATE TABLE #temp (
	DirtyColumn VARCHAR(255) NOT NULL
	,CleanColumn VARCHAR(255) NOT NULL
	)

--SELECT @TableName
SET @sql = 'SELECT ' + @ColumnName + ' AS DirtyColumn
,' + @ColumnName + ' AS CleanColumn FROM ' + @TableName

INSERT INTO #temp (
	DirtyColumn
	,CleanColumn
	)
EXEC (@sql)

DECLARE @DisallowedCharsASCIICodeMin TINYINT
	,@DisallowedCharsASCIICodeMax TINYINT
	,@ASCIICode TINYINT
	,@Char CHAR(1)
	,@ReplaceChar CHAR(1)
DECLARE @DisallowedChars TABLE (
	[ASCIICode] TINYINT NOT NULL
	,[Char] CHAR(1) NOT NULL
	,[ReplaceChar] CHAR(1) NULL
	,[Replaced] BIT NOT NULL DEFAULT 0
	,PRIMARY KEY ([ASCIICode])
	)

SELECT @DisallowedCharsASCIICodeMin = 1
	,@DisallowedCharsASCIICodeMax = 254

SET @ASCIICode = @DisallowedCharsASCIICodeMin

WHILE @ASCIICode <= @DisallowedCharsASCIICodeMax
BEGIN
	INSERT @DisallowedChars (
		[ASCIICode]
		,[Char]
		)
	VALUES (
		@ASCIICode
		,CHAR(@ASCIICode)
		)

	SET @ASCIICode = @ASCIICode + 1
END

DELETE @DisallowedChars
WHERE [Char] IN (
		'0'
		,'1'
		,'2'
		,'3'
		,'4'
		,'5'
		,'6'
		,'7'
		,'8'
		,'9'
		)

WHILE EXISTS (
		SELECT 1
		FROM @DisallowedChars
		WHERE [Replaced] = 0
		)
BEGIN
	SELECT TOP 1 @ASCIICode = [ASCIICode]
		,@Char = [Char]
	FROM @DisallowedChars
	WHERE [Replaced] = 0

	UPDATE #temp
	SET [CleanColumn] = REPLACE([CleanColumn], @Char, '')
	WHERE [CleanColumn] LIKE '%' + @Char + '%'

	UPDATE @DisallowedChars
	SET [Replaced] = 1
	WHERE [ASCIICode] = @ASCIICode
END

SET @sql = 'UPDATE T1
SET T1.' + @ColumnName + ' = T2.CleanColumn
FROM ' + @TableName + ' AS T1
INNER JOIN #temp AS T2
ON T1.' + @ColumnName + ' = T2.DirtyColumn
WHERE T1.' + @ColumnName + '= T2.DirtyColumn;'

EXEC (@sql)

DROP TABLE #temp
GO


GO

 

After deploying the SP above to your test environment you can test it with the following script.

IF OBJECT_ID('dbo.DirtyPhoneNumbers', 'U') IS NOT NULL
	DROP TABLE dbo.DirtyPhoneNumbers;

CREATE TABLE DirtyPhoneNumbers (PhoneNumbers VARCHAR(255));
GO

INSERT INTO DirtyPhoneNumbers (PhoneNumbers)
VALUES ('afef2313newfnaksdfn')
	,('afef2313n!!!!!!!!!!&dfn')
	,('afef====+++22221sdfn')
	,('afef!"£$%^&&7575757sdfn')

SELECT *
FROM DirtyPhoneNumbers

EXEC dbo.GetNumeric 'DirtyPhoneNumbers'
	,'PhoneNumbers'

SELECT *
FROM DirtyPhoneNumbers
Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s