The other day I was working on a project and I needed to know all of the different types of characters contained in a SQL field so that I could make a regular expression pattern that would not fail. Thus, I decided to write a stored procedure that would loop through a range of characters and test if that character is contained in a specified field in any records. From there I can get a list of all of the used characters and then more intelligently write my regular expression pattern.
This could also be handy if you are trying to cast a field and you keep getting errors because of a character that won’t cast correctly. An example would be trying to cast a varchar or char field into an int and for some reason or another one of the records has a letter in the field. Your cast will fail and it can be tricky to track down where the issue is. With this stored procedure you can quickly see what the offending character is and then query up all the records containing that character.
Example usage:
exec FieldCharacterSet 'dbo', 'mytable, 'myfield', 1, 0, 255
Feel free to use this if you find is useful, and leave a comment letting me know if it worked well for you.
-- =============================================
-- Author: Davin Studer
-- Create date: 9/26/2010
-- Description: This stored procedure will let you
-- know all the characters that are contained within
-- a field of a specified table. This is handy for things
-- such as creating regular expression patterns and figuring
-- out casting issues.
-- =============================================
create procedure [dbo].[FieldCharacterSet]
@TableSchema nvarchar(255) = '',-- this is the schema for table to look at
@TableName nvarchar(255) = '', -- this is the table to look at
@FieldName nvarchar(255) = '', -- this is the field that will be scanned
@RTrim bit = 1, -- should the field be right trimmed before finding the character set ... mostly for char field types
@LowCharRange int = 0, -- character scan start point
@HighCharRange int = 255 -- character scan end point
as
begin
-- SET NOCOUNT ON added to prevent extra result sets from
-- interfering with SELECT statements.
set nocount on;
declare
@count bigint = 0,
@sql nvarchar(max) = '',
@selects nvarchar(max) = '',
@error varchar(200) = '',
@field nvarchar(264) = '',
@sqlbuild nvarchar(max) = ''
declare @chars table(
[character] nchar(1)
)
declare @statements table(
[sql] varchar(max)
)
if @TableName = ''
begin
set @error = 'You must supply a table name.'
raiserror (@error, 15, 1)
end
if @FieldName = ''
begin
set @error = 'You must supply a field name.'
raiserror (@error, 15, 1)
end
if @error = ''
begin
-- Bracket the table name to avoid naming issues
if left(@TableName, 1) <> '[' and right(@TableName, 1) <> ']'
begin
set @TableName = '[' + @TableName + ']'
end
-- Bracket the field name to avoid naming issues
if left(@FieldName, 1) <> '[' and right(@FieldName, 1) <> ']'
begin
set @FieldName = '[' + @FieldName + ']'
end
-- Should we rtrim the field to deal with trailing spaces ... mostly for char fields
if @RTrim = 1
begin
set @field = 'rtrim(' + @FieldName + ')'
end
-- Create a temp table to hold our examples of each character matched
if object_id('tempdb..##fieldCharacterSetTemp') is not null
begin
drop table ##fieldCharacterSetTemp
end
set @sql = 'select * into ##fieldCharacterSetTemp from (select t1.* from ' + @TableSchema + '.' + @TableName + ' t1 inner join ' + @TableSchema + '.' + @TableName + ' t2 on 1 = 1 where 1 = 0) X'
exec(@sql)
--Loop through the ascii characters low to high
while @LowCharRange <= @HighCharRange
begin
--Build the from and where clauses
set @sqlbuild = ''
set @sqlbuild = @sqlbuild + 'from '
if @TableSchema <> ''
begin
set @sqlbuild = @sqlbuild + @TableSchema + '.'
end
set @sqlbuild = @sqlbuild + @TableName + nchar(13)
set @sqlbuild = @sqlbuild + 'where ' + @field
-- If the character is A-Z or a-z then case sensitive compare
if (@LowCharRange between 65 and 90) or (@LowCharRange between 97 and 122)
begin
set @sqlbuild = @sqlbuild + ' COLLATE Latin1_General_CS_AS'
end
set @sqlbuild = @sqlbuild + ' like '
-- Deal with a few special SQL characters
if @LowCharRange = 37 -- % character
begin
set @sqlbuild = @sqlbuild + '''%\%%'' escape ''\''' + nchar(13)
end
else if @LowCharRange = 39 -- ' character
begin
set @sqlbuild = @sqlbuild + '''%''''%''' + nchar(13)
end
else if @LowCharRange = 95 -- _ character
begin
set @sqlbuild = @sqlbuild + '''%\_%'' escape ''\''' + nchar(13)
end
else
begin
set @sqlbuild = @sqlbuild + '''%' + nchar(@LowCharRange) + '%''' + nchar(13)
end
-- Buld main counting sql
set @sql = ''
set @sql = @sql + 'select @countOut = count(*)' + nchar(13)
set @sql = @sql + @sqlbuild
--Get count of rows that have the character
execute sp_executesql @sql,N'@countOut bigint = 0 output',@countOut=@count output
--If any rows have this character add it to the result set
if @count > 0
begin
insert into @chars values (nchar(@LowCharRange))
-- Build the select sql to look at example lines that have this character
set @selects = ''
set @selects = @selects + 'select top 1 *' + nchar(13)
set @selects = @selects + @sqlbuild
insert into @statements values (rtrim(replace(@selects, nchar(13), ' ')))
set @sql = ''
set @sql = @sql + 'insert into ##fieldCharacterSetTemp' +nchar(13)
set @sql = @sql + @selects
exec(@sql)
end
-- Next character
set @LowCharRange = @LowCharRange + 1
end
select * from @chars
select * from @statements
if object_id('tempdb..##fieldCharacterSetTemp') is not null
begin
select * from ##fieldCharacterSetTemp
drop table ##fieldCharacterSetTemp
end
end
end