Category Archives: SQL

Populate your dimensions with Unknown Members

Often dimensions need a “Unknown” and/or “Not Applicable” members to be inserted for data where the full set of attributes for a fact record do not come from a source system (effectively failed dimension surrogate key lookups). These often have negative integer surrogate keys (while I have seen up to 10 different types of Unknown members in a dimension, this usually suggests some data quality issues, so one or two would normally suffice).

If you have many dimensions or they are quite wide with many data types, this can be a time consuming task. The below script will help you automate this task by creating a set of INSERT statements based on your dimension table meta data.

I contemplated about adding paramaters to determine how many members to generate (ie Unknown, Not Applicable, Early Arriving fact etc) , but thought it best to let you rerun the script and change those values in the @dataTypes  table variable.


SET NOCOUNT ON

DECLARE @crlf CHAR(2)

SET @crlf = char(13) + char(10)

DECLARE @UnknownSurrogateKeyValue AS VARCHAR(255)
DECLARE @SurrogateKeyName AS VARCHAR(255)
DECLARE @DimensionTableNamePrefix AS VARCHAR(255)

–Unknown member surrogate key value:
SET @UnknownSurrogateKeyValue = ‘-1’
–String within dimension surrogate key column name
SET @SurrogateKeyName = ‘key’
–Prefix for Dimension table names
SET @DimensionTableNamePrefix = ‘Dim’

–Create Table for default values
DECLARE @dataTypes AS TABLE
  (
     DATA_TYPE     VARCHAR(50)
     ,MIN_DESC     VARCHAR(255)
     ,MAX_DESC     VARCHAR(255)
     ,DEFAULT_DESC VARCHAR(255)
  )

INSERT INTO @dataTypes
            (DATA_TYPE
             ,MIN_DESC
             ,MAX_DESC
             ,DEFAULT_DESC)
VALUES
–Exact Numerics
( ‘bigint’
  ,‘-9223372036854775808’
  ,‘9223372036854775807’
  ,‘0’ ),
            ( ‘numeric’
              ,‘0’
              ,‘0’
              ,‘0’ ),
            ( ‘bit’
              ,‘0’
              ,‘1’
              ,‘0’ ),
            ( ‘smallint’
              ,‘-32768’
              ,‘32767’
              ,‘0’ ),
            ( ‘decimal’
              ,‘0’
              ,‘0’
              ,‘0’ ),
            ( ‘smallmoney’
              ,‘-214748.3648’
              ,‘214748.3647’
              ,‘0’ ),
            ( ‘int’
              ,‘-2147483648’
              ,‘2147483647’
              ,‘0’ ),
            ( ‘tinyint’
              ,‘0’
              ,‘255’
              ,‘0’ ),
            ( ‘money ‘
              ,‘-922337203685477.5808’
              ,‘922337203685477.5807’
              ,‘0’ )
–Approximate Numerics
,
            ( ‘float’
              ,‘0’
              ,‘0’
              ,‘0’ ),
            ( ‘real’
              ,‘0’
              ,‘0’
              ,‘0’ )
–Binary Strings
,
            ( ‘binary’
              ,‘0’
              ,‘1’
              ,‘0’ ),
            ( ‘varbinary’
              ,‘0’
              ,‘1’
              ,‘0’ )
–Other Data Types
,
            ( ‘sql_variant’
              ,‘0’
              ,‘1’
              ,‘0’ )
–Date and Time
,
            ( ‘date’
              ,‘0001-01-01’
              ,‘9999-12-31’
              ,‘1900-01-01’ ),
            ( ‘datetimeoffset’
              ,‘0001-01-01 00:00:00.0000000 +00:00’
              ,‘9999-12-31 23:59:59.9999999 +00:00’
              ,‘1900-01-01 00:00:00.0000000 +00:00’ ),
            ( ‘datetime2’
              ,‘0001-01-01 00:00:00.0000000’
              ,‘9999-12-31 23:59:59.9999999’
              ,‘1900-01-01 00:00:00.0000000’ ),
            ( ‘smalldatetime’
              ,‘1900-01-01 00:00:00’
              ,‘2079-06-06 23:59:59’
              ,‘1900-01-01 00:00:00’ ),
            ( ‘datetime’
              ,‘1753-01-01 00:00:00.000’
              ,‘9999-12-31 23:59:59.997’
              ,‘1900-01-01 00:00:00.000’ ),
            ( ‘time’
              ,’00:00:00.0000000′
              ,’23:59:59.9999999′
              ,’00:00:00′ )
–Character Strings
,
            ( ‘char’
              ,‘U’
              ,‘Unknown’
              ,‘UNK’ ),
            ( ‘varchar’
              ,‘U’
              ,‘Unknown’
              ,‘UNK’ ),
            ( ‘text’
              ,‘U’
              ,‘Unknown’
              ,‘UNK’ )
–Unicode Character Strings
,
            ( ‘nchar’
              ,‘U’
              ,‘Unknown’
              ,‘UNK’ ),
            ( ‘nvarchar’
              ,‘U’
              ,‘Unknown’
              ,‘UNK’ ),
            ( ‘ntext’
              ,‘U’
              ,‘Unknown’
              ,‘UNK’ )
–Binary Strings
,
            ( ‘image’
              ,
              ,
              , )
–Other Data Types
,
            ( ‘hierarchyid’
              ,‘/’
              ,‘/’
              ,‘/’ ),
            ( ‘uniqueidentifier’
              ,‘null’
              ,‘null’
              ,‘null’ ),
            ( ‘xml’
              ,
              ,
              , )

SELECT ‘INSERT INTO ‘ + t.TABLE_NAME + ‘ (‘ + @crlf
       –{ column_list }
       + replace(stuff(( SELECT ‘,’ + c.COLUMN_NAME FROM
       INFORMATION_SCHEMA.columns c
       WHERE c.COLUMN_NAME = COLUMN_NAME AND c.TABLE_NAME = t.TABLE_NAME ORDER
       BY
       c.ORDINAL_POSITION FOR XML PATH() ), 1, 1, ), ‘,’, ‘,’) + ‘) ‘ +
       @crlf +
       ‘VALUES ‘
       + @crlf + ‘(‘
       –{ VALUES }
       + replace(stuff(( SELECT ‘,”’ + CASE WHEN c.COLUMN_NAME LIKE ‘%’ +
       @SurrogateKeyName + ‘%’ THEN @UnknownSurrogateKeyValue WHEN
       CHARACTER_MAXIMUM_LENGTH > len(dt.DEFAULT_DESC) THEN dt.MAX_DESC WHEN
       CHARACTER_MAXIMUM_LENGTH < len(dt.DEFAULT_DESC) THEN dt.MIN_DESC ELSE
       DT.DEFAULT_DESC END + ”” FROM INFORMATION_SCHEMA.columns c INNER JOIN
       @dataTypes dt ON
       c.DATA_TYPE = dt.DATA_TYPE WHERE c.COLUMN_NAME = COLUMN_NAME AND
       c.TABLE_NAME =
       t.TABLE_NAME ORDER BY c.ORDINAL_POSITION FOR XML PATH() ), 1, 1, ),
       ‘,’, ‘,’
       ) + ‘ ); ‘
FROM   INFORMATION_SCHEMA.tables t
WHERE  t.TABLE_TYPE = ‘BASE TABLE’
       AND t.TABLE_NAME LIKE @DimensionTableNamePrefix + ‘%’ 


SQL Server Denial

A colleague stumbled across this and it gave me a laugh. Looks like Microsoft’s competitors may know their products better than Microsoft themselves…

Create views for all your tables in one step

I have to admit that I am a little lazy. So if there is something I have to do more than once I’ll try and find a quicker way.

One of these tasks in any DW build is creating views for all your dimension and fact tables to create an abstraction layer between the relational tables and the client tool or SSAS Data Source Views. These views can be useful for all sorts of functionality such simplifying currency conversion or creating calculated fields.

What really becomes a nuisance is when using a RAD approach you can find yourself re-creating these views several times over.

The below script automates this process. For it to work properly you must set SSMS to “Results to Text”:

image

This could be wrapped in a stored proc if you were super keen.

DECLARE @exclCols TABLE (COLUMN_NAME varchar(255))
DECLARE @crlf char(2)
DECLARE @viewPrefix varchar(10)
 
INSERT INTO @exclCols (COLUMN_NAME)
VALUES
-- Metadata columns you want excluded from the views
     ('source_item_id')
    ,('source_line_no')
    ,('load_date')
    ,('update_date')
 
SET @crlf = CHAR(13) + CHAR(10)
-- Set view prefix to 'uvw' or uvw_'
SET @viewPrefix = 'uvw'
 
SELECT 
    CASE 
    -- Set first line of create view statement
    WHEN c0.ORDINAL_POSITION = 
            (SELECT 
                MIN(c1.ORDINAL_POSITION) 
             FROM 
                INFORMATION_SCHEMA.COLUMNS c1 
             WHERE 
                c1.TABLE_NAME = c0.TABLE_NAME 
                    AND 
                c1.COLUMN_NAME NOT IN (SELECT COLUMN_NAME FROM @exclCols)
            ) 
        THEN 
        'CREATE VIEW ' + @viewPrefix + c0.TABLE_NAME + ' AS ' + @crlf + 
        -- Add comments to view
        '/* CREATED BY:   ' + Suser_Sname() + ' */ ' + @crlf + 
        '/* CREATED DATE: ' + CONVERT(varchar(255), GETDATE(), 127) + ' */ ' + @crlf + 
        ' SELECT ' + @crlf + 
        '   ' + COLUMN_NAME
    -- Set last line of create view statement
    WHEN c0.ORDINAL_POSITION = 
            (SELECT 
                MAX(c1.ORDINAL_POSITION) 
             FROM 
                INFORMATION_SCHEMA.COLUMNS c1
             WHERE 
                c1.TABLE_NAME = c0.TABLE_NAME 
                    AND 
                c1.COLUMN_NAME NOT IN (SELECT COLUMN_NAME FROM @exclCols)
            )
        THEN
        '  ,' + c0.COLUMN_NAME + @crlf +
        ' FROM ' + @crlf +
        '  ' + c0.TABLE_NAME + @crlf + 
        'GO;' + @crlf + @crlf 
    -- Set other lines of create view statement
    ELSE
    '  ,' + c0.COLUMN_NAME 
    END 
FROM
    INFORMATION_SCHEMA.COLUMNS c0 
INNER JOIN INFORMATION_SCHEMA.TABLES t0 ON 
    c0.TABLE_CATALOG = t0.TABLE_CATALOG 
        AND 
    c0.TABLE_SCHEMA = t0.TABLE_SCHEMA 
        AND
    c0.TABLE_NAME = t0.TABLE_NAME 
WHERE 
    c0.COLUMN_NAME NOT IN (SELECT COLUMN_NAME FROM @exclCols)
        AND 
    -- Prevents creating views of views
    t0.TABLE_TYPE = 'BASE TABLE'

Running this against AdventureworksDW will result in an output something like:

CREATE VIEW uvwDimAccount AS 
/* CREATED BY:   MYDOMAIN\jsmith */ 
/* CREATED DATE: 2010-08-17T16:47:31.780 */ 
 SELECT 
   AccountKey
  ,ParentAccountKey
  ,AccountCodeAlternateKey
  ,ParentAccountCodeAlternateKey
  ,AccountDescription
  ,AccountType
  ,Operator
  ,CustomMembers
  ,ValueType
  ,CustomMemberOptions
 FROM 
  DimAccount
GO;
 
CREATE VIEW uvwDimCurrency AS 
/* CREATED BY:   MYDOMAIN\jsmith */ 
/* CREATED DATE: 2010-08-17T16:47:31.780 */ 
 SELECT 
   CurrencyKey
  ,CurrencyAlternateKey
  ,CurrencyName
 FROM 
  DimCurrency
GO;

etc…

%d bloggers like this: