Skip to content

procedure

Finding the Character Set For a SQL Field

The other day I was working on a project and I needed to know all of the different types of characters contained in a SQL field so that I could make a regular expression pattern that would not fail. Thus, I decided to write a stored procedure that would loop through a range of characters and test if that character is contained in a specified field in any records. From there I can get a list of all of the used characters and then more intelligently write my regular expression pattern.

This could also be handy if you are trying to cast a field and you keep getting errors because of a character that won’t cast correctly. An example would be trying to cast a varchar or char field into an int and for some reason or another one of the records has a letter in the field. Your cast will fail and it can be tricky to track down where the issue is. With this stored procedure you can quickly see what the offending character is and then query up all the records containing that character.

Example usage:

exec FieldCharacterSet 'dbo', 'mytable, 'myfield', 1, 0, 255

Feel free to use this if you find is useful, and leave a comment letting me know if it worked well for you.

-- =============================================
-- Author:        Davin Studer
-- Create date: 9/26/2010
-- Description:    This stored procedure will let you
-- know all the characters that are contained within
-- a field of a specified table.  This is handy for things
-- such as creating regular expression patterns and figuring
-- out casting issues.
-- =============================================
create procedure [dbo].[FieldCharacterSet]
    @TableSchema nvarchar(255) = '',-- this is the schema for table to look at
    @TableName nvarchar(255) = '',    -- this is the table to look at
    @FieldName nvarchar(255) = '',    -- this is the field that will be scanned
    @RTrim bit = 1,                    -- should the field be right trimmed before finding the character set ... mostly for char field types
    @LowCharRange int = 0,            -- character scan start point
    @HighCharRange int = 255        -- character scan end point
as
begin
    -- SET NOCOUNT ON added to prevent extra result sets from
    -- interfering with SELECT statements.
    set nocount on;
 
    declare
        @count bigint = 0,
        @sql nvarchar(max) = '',
        @selects nvarchar(max) = '',
        @error varchar(200) = '',
        @field nvarchar(264) = '',
        @sqlbuild nvarchar(max) = ''
    declare @chars table(
        [character] nchar(1)
    )
    declare @statements table(
        [sql] varchar(max)
    )
 
    if @TableName = ''
    begin
        set @error = 'You must supply a table name.'
        raiserror (@error, 15, 1)
    end
 
    if @FieldName = ''
    begin
        set @error = 'You must supply a field name.'
        raiserror (@error, 15, 1)
    end
    
    if @error = ''
    begin
        -- Bracket the table name to avoid naming issues
        if left(@TableName, 1) <> '[' and right(@TableName, 1) <> ']'
        begin
            set @TableName = '[' + @TableName + ']'
        end
        
        -- Bracket the field name to avoid naming issues
        if left(@FieldName, 1) <> '[' and right(@FieldName, 1) <> ']'
        begin
            set @FieldName = '[' + @FieldName + ']'
        end
        
        -- Should we rtrim the field to deal with trailing spaces ... mostly for char fields
        if @RTrim = 1
        begin
            set @field = 'rtrim(' + @FieldName + ')'
        end
        
        -- Create a temp table to hold our examples of each character matched
        if object_id('tempdb..##fieldCharacterSetTemp') is not null
        begin
            drop table ##fieldCharacterSetTemp
        end
        set @sql = 'select * into ##fieldCharacterSetTemp from (select t1.* from ' + @TableSchema + '.' + @TableName + ' t1 inner join ' + @TableSchema + '.' + @TableName + ' t2 on 1 = 1 where 1 = 0) X'
        exec(@sql)
 
        --Loop through the ascii characters low to high
        while @LowCharRange <= @HighCharRange
        begin
            --Build the from and where clauses
            set @sqlbuild = ''
            set @sqlbuild = @sqlbuild + 'from '
            if @TableSchema <> ''
            begin
                set @sqlbuild = @sqlbuild + @TableSchema + '.'
            end
            set @sqlbuild = @sqlbuild + @TableName + nchar(13)
            set @sqlbuild = @sqlbuild + 'where ' + @field
            -- If the character is A-Z or a-z then case sensitive compare
            if (@LowCharRange between 65 and 90) or (@LowCharRange between 97 and 122)
            begin
                set @sqlbuild = @sqlbuild + ' COLLATE Latin1_General_CS_AS'
            end
            set @sqlbuild = @sqlbuild + ' like '
            -- Deal with a few special SQL characters
            if @LowCharRange = 37 -- % character
            begin
                set @sqlbuild = @sqlbuild + '''%\%%'' escape ''\''' + nchar(13)
            end
            else if @LowCharRange = 39 -- ' character
            begin
                set @sqlbuild = @sqlbuild + '''%''''%''' + nchar(13)
            end
            else if @LowCharRange = 95 -- _ character
            begin
                set @sqlbuild = @sqlbuild + '''%\_%'' escape ''\''' + nchar(13)
            end
            else
            begin
                set @sqlbuild = @sqlbuild + '''%' + nchar(@LowCharRange) + '%''' + nchar(13)
            end
 
            -- Buld main counting sql
            set @sql = ''
            set @sql = @sql + 'select @countOut = count(*)' + nchar(13)
            set @sql = @sql + @sqlbuild
 
            --Get count of rows that have the character
            execute sp_executesql @sql,N'@countOut bigint = 0 output',@countOut=@count output
 
            --If any rows have this character add it to the result set
            if @count > 0
            begin
                insert into @chars values (nchar(@LowCharRange))
 
                -- Build the select sql to look at example lines that have this character
                set @selects = ''
                set @selects = @selects + 'select top 1 *' + nchar(13)
                set @selects = @selects + @sqlbuild
                
                insert into @statements values (rtrim(replace(@selects, nchar(13), ' ')))
 
                set @sql = ''
                set @sql = @sql + 'insert into ##fieldCharacterSetTemp' +nchar(13)
                set @sql = @sql + @selects
                
                exec(@sql)
            end
            
            -- Next character
            set @LowCharRange = @LowCharRange + 1
        end
 
        select * from @chars
        select * from @statements
        if object_id('tempdb..##fieldCharacterSetTemp') is not null
        begin
            select * from ##fieldCharacterSetTemp
            drop table ##fieldCharacterSetTemp
        end
    end
end

Convert CSV data into a SQL table

Recently I have been working with some report data that is stored statically in a database in CSV format. I had a need to have that CSV data displayed in a tabular format. I searched around and couldn’t find anything written to do this in SQL, so I decided to write a stored procedure to do this very thing. It will take a CSV and parse it out into a SQL table. For instance if I have report data that looks like this…

"FirstName","LastName","DOB","Children"
"John","Smith","1/5/1980","Bob,Sally,Joe,Chris"
"Jane","Smith","2/25/1982","Bob,Sally,Joe,Chris"
"Bruce","Wayne","5/13/1975",""
"Peter","Parker","5/23/1970",""
"","","",""

It would be transformed into this …

FIRSTNAMELASTNAMEDOBCHILDREN
JohnSmith1/5/1980Bob,Sally,Joe,Chris
JaneSmith2/25/1982Bob,Sally,Joe,Chris
BruceWayne5/13/1975 
PeterParker5/23/1970 
    

Note that in the last column the commas are preserved because they are within the quotation mark text qualifiers. Below is the stored procedure that does this. Feel free to use it, and leave a comment if you found it useful.

-- ==========================================================================================
-- Author:  Davin Studer
-- Create date: 4/5/2011
-- Description: This will take a CSV input
-- and transform it into a table
--
-- Params:
-- @string          - The CSV string
-- @textQualifier   - Character that denotes a string within the CSV
-- @columnDelimiter - Character that denotes different columns ... doesn't have to be a comma
-- @rowOneIsHeader  - Does the first row contain column names?
-- ==========================================================================================
create procedure [dbo].[CSVToTable]
    @string nvarchar(max) = '',
    @textQualifier varchar(1) = '',
    @columnDelimiter varchar(1) = ',',
    @rowOneIsHeader bit = 0
as
begin
    -- set nocount on added to prevent extra result sets from
    -- interfering with select statements.
    set nocount on;
 
    -- We need and input string
    if isnull(@string, '') = ''
    begin
        raiserror ('Please supply an CSV string', 15, 1)
        return
    end
 
    -- We need and column delimiter
    if isnull(@columnDelimiter, '') = ''
    begin
        raiserror ('Please supply a column delimiter.', 15, 1)
        return
    end
  
    -- Make sue the user doesn't pass null as a value
    if isnull(@textQualifier, '') = ''
    begin
        set @textQualifier = ''
    end
 
    -- Make sue the user doesn't pass null as a value
    if isnull(@rowOneIsHeader, '') = ''
    begin
        set @rowOneIsHeader = 0
    end
 
    declare
        @columns int = 1,
        @columnNames nvarchar(max) = '',
        @stop bit = 0,
        @position int = 0,
        @temp nvarchar(1) = '',
        @dataStart int = 0,
        @sql nvarchar(max) = '',
        @qualifierToggle bit = 0,
        @tempString nvarchar(max) = '',
        @delimiterReplacementUTFNumber int = 2603
 
    -- Get rid of the ##tempCSVSplitToTable table if it exists
    if object_id('tempdb..##tempCSVSplitToTable') is not null
    begin
        drop table ##tempCSVSplitToTable
    end
 
    -- Get rid of white space
    set @string = rtrim(ltrim(@string))
 
    -- Set the EOL to char(13)
    set @string = replace(@string, char(13) + char(10), char(13))
    set @string = replace(@string, char(10), char(13))
 
    -- Deal with the delimiter character within the text qualifier characters
    if @textQualifier <> ''
    begin
        while @position <> len(@string)
        begin
            set @temp = substring(@string, @position, 1)
            if @temp = @textQualifier
            begin
                if @qualifierToggle = 0
                begin
                    set @qualifierToggle = 1
                end
                else
                begin
                    set @qualifierToggle = 0
                end
            end
            if @temp = @columnDelimiter and @qualifierToggle = 1
            begin
                set @tempString = @tempString + nchar(@delimiterReplacementUTFNumber) -- replace with UTF delimiter replacement character
            end
            else
            begin
                set @tempString = @tempString + @temp
            end
            set @position = @position + 1
        end
 
        set @string = @tempString
    end
 
    -- Get rid of text qualifier ... we don't need it now
    if @textQualifier <> ''
    begin
        set @string = replace(@string, @textQualifier, '')
    end
 
    -- Get column names
    set @position = 1
    while @stop = 0
    begin
        set @temp = substring(@string, @position, 1)
        if @temp = @columnDelimiter
        begin
            set @columns = @columns + 1
            set @columnNames = @columnNames + ','
        end
        else if @temp = char(13)
        begin
            set @stop = 1
        end
        else
        begin
            set @columnNames = @columnNames + @temp
        end
        set @position = @position + 1
    end
 
    set @dataStart = @position
 
    if @rowOneIsHeader = 0
    begin
        set @dataStart = 1
        set @position = 1
        set @columnNames = ''
        while @position - 1 < @columns
        begin
            set @columnNames = @columnNames + ',Column' + cast(@position as varchar(1))
            set @position = @position + 1
        end
        set @columnNames = substring(@columnNames, 2, len(@columnNames))
    end
 
    -- Build ##tempCSVSplitToTable table
    set @sql = @sql + 'create table ##tempCSVSplitToTable (' + char(13) + '['
    set @stop = 0
    set @position = 1
    while @stop = 0
    begin
        set @temp = substring(@columnNames, @position, 1)
        if @temp <> ','
        begin
            set @sql = @sql + @temp
        end
        else
        begin
            set @sql = @sql + '] nvarchar(max),' + char(13) + '['
        end
   
        set @position = @position + 1
 
        if @position - 1 = len(@columnNames)
        begin
            set @sql = @sql + '] nvarchar(max)'
            set @stop = 1
        end
    end
    set @sql = @sql + ')' + char(13)
    exec (@sql)
 
    -- insert values into ##tempCSVSplitToTable table
    set @position = @dataStart
    while @position - 1 < len(@string)
    begin
        set @stop = 0
        set @sql = 'insert into ##tempCSVSplitToTable ([' + replace(@columnNames, ',', '],[') + ']) values ('''
        while @stop = 0
        begin
            set @temp = substring(@string, @position, 1)
 
            -- end of column
            if @temp = @columnDelimiter
            begin
                set @sql = @sql + ''','''
            end
            -- EOL
            else if @temp = char(13) or datalength(@temp) = 0
            begin
                set @stop = 1
            end
            -- deal with apostrophe in data
            else if @temp = ''''
            begin
                set @sql = @sql + ''''''
            end
            -- column data that isn't an apostrophe
            else
            begin
                set @sql = @sql + @temp
            end
 
            set @position = @position + 1
        end 
        set @sql = @sql + ''')'
 
        -- Get rid of any UTF delimiter replacements that were put in to take the place of the delimiter character within the text qualifier
        if @textQualifier <> ''
        begin
            set @sql = replace(@sql, nchar(@delimiterReplacementUTFNumber), @columnDelimiter)
        end
 
        exec(@sql)
    end
 
    select * from ##tempCSVSplitToTable
 
    -- destroy ##tempCSVSplitToTable table
    if object_id('tempdb..##tempCSVSplitToTable') is not null
    begin
        drop table ##tempCSVSplitToTable
    end
end