CREATE TABLE DIM_AGING_BUCKET
(
AGING_BUCKET_KEY INT NOT NULL,
AGING_BUCKET_DESC NVARCHAR(35),
BEGIN_DAY_RANGE INT,
END_DAY_RANGE INT,
CONSTRAINT PK_DIM_SERVICE_REQUEST_AGING_BUCKET PRIMARY KEY CLUSTERED (AGING_BUCKET_KEY)
)
GO
INSERT INTO DIM_AGING_BUCKET
VALUES
(0, NULL, NULL, NULL),
(1, '0 DAYS', -1, 0),
(2, '1-3 DAYS',1,3),
(3, '4-5 DAYS', 4,5),
(4, '6-15 DAYS', 6,15),
(5, '16-30 DAYS', 16,30),
(6, '31-60 DAYS', 31,60),
(7, '61-90 DAYS', 61,90),
(8, '91-120 DAYS', 91,120),
(9, '121-180 DAYS', 121,180),
(10,'181-360 DAYS', 181,360),
(11,'OVER 360 DAYS',361,50000)
GO
For this example were going to use data from an imaginary table that has staged issue ticket data. This data contains the issue identifier (a degenerate dimension), the natural key for the transaction type, the natural key for the employee assigned to the issue, the natural key of the customer associated to the issue, and the open and close dates of the issue. We can represent this with this sample data:
CREATE TABLE STG_TRANSACTION_RECORDS
(
TRANSACTION_NO INT,
TRANSACTION_TYPE_ID INT,
EMPLOYEE_ID INT,
CUSTOMER_ID INT,
TRANSACTION_OPEN_DATE DATETIME,
TRANSACTION_CLOSE_DATE DATETIME
)
INSERT INTO STG_TRANSACTION_RECORDS(TRANSACTION_NO, TRANSACTION_TYPE_ID, EMPLOYEE_ID, CUSTOMER_ID, TRANSACTION_OPEN_DATE, TRANSACTION_CLOSE_DATE)
VALUES
(1, 2, 5, 7, '1/1/2012','1/5/2012'),
(2, 3, 6, 9, '5/1/2012','6/20/2012'),
(3, 8, 22, 61, '12/22/2012','4/1/2013'),
(4, 7, 11, 13, '7/9/2013','7/22/2013'),
(5, 9, 22, 47, '7/23/2013',NULL)
GO
We want to be able to load this data into a star schema that will utilize our bucket dimension. This schema may look something like this:
Now in order to populate the fact table we would need to join the staging table to the dimensions on the natural keys and retrieve the surrogate keys from these dimensions. The measures (DAYS_OPEN) would simply be a datediff between the open and close dates and (NUMBER_TRANSACTION) will always be a 1 so we can sum on this across the dimensions. This leaves us with one problem, how do we join to DIM_AGING_BUCKET if we have no natural key in either the dimension or the staging table? We in essence are forced to join on a range. This SQL statement will gather up our surrogate keys, calculate our measure and handle the "range join" we need for our bucket dimension:
SELECT
STG.TRANSACTION_NO,
CUST.CUSTOMER_KEY,
AG.AGING_BUCKET_KEY
AGING_BUCKET_KEY,
TYP.TRANSACTION_TYPE_KEY,
EMP.EMPLOYEE_KEY,
OPEND.DAY_KEY AS OPEN_DAY_KEY,
ISNULL(CLOSED.DAY_KEY,0) AS CLOSE_DAY_KEY,
DATEDIFF(DAY, STG.TRANSACTION_OPEN_DATE, CASE WHEN STG.TRANSACTION_CLOSE_DATE IS NULL THEN GETDATE() ELSE STG.TRANSACTION_CLOSE_DATE END) as DAYS_OPEN ,
1 AS NUMBER_TRANSACTION
FROM
STG_TRANSACTION_RECORDS STG
JOIN DIM_TRANS_TYPE TYP ON STG.TRANSACTION_TYPE_ID=TYP.TRANSACTION_TYPE_ID
JOIN DIM_EMPLOYEE EMP ON STG.EMPLOYEE_ID=EMP.EMPOYEE_ID
JOIN DIM_CUSTOMER CUST ON STG.CUSTOMER_ID=CUST.CUSTOMER_ID
JOIN DIM_DAY OPEND ON STG.TRANSACTION_OPEN_DATE =OPEND.FULL_DATE
LEFT JOIN DIM_DAY CLOSED ON STG.TRANSACTION_CLOSE_DATE=CLOSED.FULL_DATE
JOIN DIM_AGING_BUCKET AG ON DATEDIFF(DAY, STG.TRANSACTION_OPEN_DATE, CASE WHEN STG.TRANSACTION_CLOSE_DATE IS NULL THEN GETDATE() ELSE STG.TRANSACTION_CLOSE_DATE END) BETWEEN AG.BEGIN_DAY_RANGE AND AG.END_DAY_RANGE
As long as your bucket ranges do not overlap, a join based on a range should be fine. Now you can group by the buckets in DIM_AGING_BUCKET while summing on the NUMBER_TRANSACTION field to display the number of issues in the bucket.
No comments:
Post a Comment