Skip to content

September 2011

Finding the Character Set For a SQL Field

The other day I was working on a project and I needed to know all of the different types of characters contained in a SQL field so that I could make a regular expression pattern that would not fail. Thus, I decided to write a stored procedure that would loop through a range of characters and test if that character is contained in a specified field in any records. From there I can get a list of all of the used characters and then more intelligently write my regular expression pattern.

This could also be handy if you are trying to cast a field and you keep getting errors because of a character that won’t cast correctly. An example would be trying to cast a varchar or char field into an int and for some reason or another one of the records has a letter in the field. Your cast will fail and it can be tricky to track down where the issue is. With this stored procedure you can quickly see what the offending character is and then query up all the records containing that character.

Example usage:

exec FieldCharacterSet 'dbo', 'mytable, 'myfield', 1, 0, 255

Feel free to use this if you find is useful, and leave a comment letting me know if it worked well for you.

-- =============================================
-- Author:        Davin Studer
-- Create date: 9/26/2010
-- Description:    This stored procedure will let you
-- know all the characters that are contained within
-- a field of a specified table.  This is handy for things
-- such as creating regular expression patterns and figuring
-- out casting issues.
-- =============================================
create procedure [dbo].[FieldCharacterSet]
    @TableSchema nvarchar(255) = '',-- this is the schema for table to look at
    @TableName nvarchar(255) = '',    -- this is the table to look at
    @FieldName nvarchar(255) = '',    -- this is the field that will be scanned
    @RTrim bit = 1,                    -- should the field be right trimmed before finding the character set ... mostly for char field types
    @LowCharRange int = 0,            -- character scan start point
    @HighCharRange int = 255        -- character scan end point
as
begin
    -- SET NOCOUNT ON added to prevent extra result sets from
    -- interfering with SELECT statements.
    set nocount on;
 
    declare
        @count bigint = 0,
        @sql nvarchar(max) = '',
        @selects nvarchar(max) = '',
        @error varchar(200) = '',
        @field nvarchar(264) = '',
        @sqlbuild nvarchar(max) = ''
    declare @chars table(
        [character] nchar(1)
    )
    declare @statements table(
        [sql] varchar(max)
    )
 
    if @TableName = ''
    begin
        set @error = 'You must supply a table name.'
        raiserror (@error, 15, 1)
    end
 
    if @FieldName = ''
    begin
        set @error = 'You must supply a field name.'
        raiserror (@error, 15, 1)
    end
    
    if @error = ''
    begin
        -- Bracket the table name to avoid naming issues
        if left(@TableName, 1) <> '[' and right(@TableName, 1) <> ']'
        begin
            set @TableName = '[' + @TableName + ']'
        end
        
        -- Bracket the field name to avoid naming issues
        if left(@FieldName, 1) <> '[' and right(@FieldName, 1) <> ']'
        begin
            set @FieldName = '[' + @FieldName + ']'
        end
        
        -- Should we rtrim the field to deal with trailing spaces ... mostly for char fields
        if @RTrim = 1
        begin
            set @field = 'rtrim(' + @FieldName + ')'
        end
        
        -- Create a temp table to hold our examples of each character matched
        if object_id('tempdb..##fieldCharacterSetTemp') is not null
        begin
            drop table ##fieldCharacterSetTemp
        end
        set @sql = 'select * into ##fieldCharacterSetTemp from (select t1.* from ' + @TableSchema + '.' + @TableName + ' t1 inner join ' + @TableSchema + '.' + @TableName + ' t2 on 1 = 1 where 1 = 0) X'
        exec(@sql)
 
        --Loop through the ascii characters low to high
        while @LowCharRange <= @HighCharRange
        begin
            --Build the from and where clauses
            set @sqlbuild = ''
            set @sqlbuild = @sqlbuild + 'from '
            if @TableSchema <> ''
            begin
                set @sqlbuild = @sqlbuild + @TableSchema + '.'
            end
            set @sqlbuild = @sqlbuild + @TableName + nchar(13)
            set @sqlbuild = @sqlbuild + 'where ' + @field
            -- If the character is A-Z or a-z then case sensitive compare
            if (@LowCharRange between 65 and 90) or (@LowCharRange between 97 and 122)
            begin
                set @sqlbuild = @sqlbuild + ' COLLATE Latin1_General_CS_AS'
            end
            set @sqlbuild = @sqlbuild + ' like '
            -- Deal with a few special SQL characters
            if @LowCharRange = 37 -- % character
            begin
                set @sqlbuild = @sqlbuild + '''%\%%'' escape ''\''' + nchar(13)
            end
            else if @LowCharRange = 39 -- ' character
            begin
                set @sqlbuild = @sqlbuild + '''%''''%''' + nchar(13)
            end
            else if @LowCharRange = 95 -- _ character
            begin
                set @sqlbuild = @sqlbuild + '''%\_%'' escape ''\''' + nchar(13)
            end
            else
            begin
                set @sqlbuild = @sqlbuild + '''%' + nchar(@LowCharRange) + '%''' + nchar(13)
            end
 
            -- Buld main counting sql
            set @sql = ''
            set @sql = @sql + 'select @countOut = count(*)' + nchar(13)
            set @sql = @sql + @sqlbuild
 
            --Get count of rows that have the character
            execute sp_executesql @sql,N'@countOut bigint = 0 output',@countOut=@count output
 
            --If any rows have this character add it to the result set
            if @count > 0
            begin
                insert into @chars values (nchar(@LowCharRange))
 
                -- Build the select sql to look at example lines that have this character
                set @selects = ''
                set @selects = @selects + 'select top 1 *' + nchar(13)
                set @selects = @selects + @sqlbuild
                
                insert into @statements values (rtrim(replace(@selects, nchar(13), ' ')))
 
                set @sql = ''
                set @sql = @sql + 'insert into ##fieldCharacterSetTemp' +nchar(13)
                set @sql = @sql + @selects
                
                exec(@sql)
            end
            
            -- Next character
            set @LowCharRange = @LowCharRange + 1
        end
 
        select * from @chars
        select * from @statements
        if object_id('tempdb..##fieldCharacterSetTemp') is not null
        begin
            select * from ##fieldCharacterSetTemp
            drop table ##fieldCharacterSetTemp
        end
    end
end