• subscribe
January 12, 2010 12:00 AM

Using SSAS and SSIS to Build an IIS Web Log Cube

Use these tools to track historical statistics and usage
SQL Server Pro
InstantDoc ID #103424

 

Listing 1. Copy_log.bat

@ECHO OFF

for /f "tokens=2,3,4 delims=/ " %%i in ('date /t') do (

set my_day=%%j

set my_month=%%i

set my_year=%%k

)

set my_year=%my_year:~2,4%

set fileName=ex%my_year%%my_month%%my_day%.log

copy "C:\WINDOWS\system32\LogFiles\W3SVC1234567890\%fileName%" "C:\web\log"

EXIT

 

Listing 2. Delete_log.bat

@ECHO OFF

for /f "tokens=2,3,4 delims=/ " %%i in ('date /t') do (

set my_day=%%j

set my_month=%%i

set my_year=%%k

)

set my_year=%my_year:~2,4%

set fileName=ex%my_year%%my_month%%my_day%.log

DEL "C:\web\log\%fileName%"

EXIT

 

Listing 3. TreLog_FTP.bat

@ECHO OFF

REM 5/09/07 - David Pruden

REM copy web log files from web site to load into tblTreLog

cd C:\

REM delete old web log files

DEL ex*.log

REM connect to www.website.com, get current files

ftp -s:"C:\Program Files\Microsoft SQL Server\MSSQL\Scripts\TreLogFTP.src" 127.0.0.1

copy ex*.log TreLog.log

EXIT

 

Listing 4. TreLogFTP.src

[LOGIN]

[PASSWORD]

cd log

prompt

mget *.log

quit

 

Web Listing 1. Table DDL

(these tables are to be created under the control of the SQL Server database engine)

CREATE TABLE [dbo].[tblTreLog](

      [entry] [varchar](4096) NULL

) ON [PRIMARY]

 

CREATE TABLE [dbo].[tblTreWebLog](

      [logID] [int] IDENTITY(1,1) NOT NULL,

      [logDateTime] [datetime] NULL,

      [hostIP] [varchar](15) NULL,

      [method] [varchar](8) NULL,

      [directory] [varchar](2048) NULL,

      [port] [varchar](3) NULL,

      [sourceIP] [varchar](15) NULL,

      [browser] [varchar](2048) NULL,

      [int] NULL,

      [substatus] [int] NULL,

 CONSTRAINT [PK_tblTreWebLog] PRIMARY KEY CLUSTERED

(

      [logID] ASC

)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]

) ON [PRIMARY]

 

CREATE TABLE [dbo].[tblTreWebLogArchive](

      [logID] [int] IDENTITY(1,1) NOT NULL,

      [logDateTime] [datetime] NULL,

      [hostIP] [varchar](15) NULL,

      [method] [varchar](8) NULL,

      [directory] [varchar](2048) NULL,

      [port] [varchar](3) NULL,

      [sourceIP] [varchar](15) NULL,

      [browser] [varchar](2048) NULL,

      [int] NULL,

      [substatus] [int] NULL,

 CONSTRAINT [PK_tblTreWebLogArchive] PRIMARY KEY CLUSTERED

(

      [logID] ASC

)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]

) ON [PRIMARY]

GO

CREATE NONCLUSTERED INDEX [tblTreWebLogArchive_logDate_IDX] ON [dbo].[tblTreWebLogArchive]

(

      [logDateTime] ASC

)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]

GO

 

 

Listing 5. SSIS Task 2

IF EXISTS (

      SELECT DATEPART(dw,GETDATE()) AS weekday

      WHERE DATEPART(dw,GETDATE()) BETWEEN 2 AND 6

      )

 

BEGIN

      TRUNCATE TABLE tblTreLog

END

 

 

Listing 6. SSIS Task 4

CREATE PROC delete_tblTreLog_lastRow_SP   

AS   

DELETE FROM tblTreLog  

WHERE entry = '' 

OR  entry LIKE '#%'

 

 

Web Listing 2. SSIS Task 6

select   dateadd(hh,-4,convert(datetime,left(entry,19))) as logDateTime,

         right(left(entry,33),13) as hostIP,

         right(left(entry,(charindex('/',entry)-2)),len(left(entry,(charindex('/',entry)-2)))-34) as method,

         case

               when entry like '% - % - %'

               then right(left(entry, charindex (' - ', entry)), len(left(entry, charindex (' - ', entry))) - charindex('/', entry)+2)

               else right(left(entry, charindex (' - ', entry)-3) ,(len(left(entry, charindex (' - ', entry)-3)))-(charindex('/',entry)-2))

         end as directory,

         case

               when entry like '% - % - %'

               then ltrim(rtrim(right(left(entry, charindex(' - ',entry)+5),4)))

               else ltrim(rtrim(right(left(entry,charindex(' - ',entry)),4)))

         end as port,

         case

               when entry like '% - % - %'

               then left(right(entry, len(entry) - charindex(' - ',entry,charindex(' - ', entry)+1)-2 ),charindex(' ',right(entry, len(entry) - charindex(' - ',entry,charindex(' - ', entry)+1)-2 )))

               else left(right(entry, len(entry)-charindex(' - ', entry)-2),charindex(' ',right(entry, len(entry)-charindex(' - ', entry)-2)))

         end as sourceIP,

         left(right(entry, len(entry)-charindex(

         case

               when entry like '% - % - %'

               then left(right(entry, len(entry) - charindex(' - ',entry,charindex(' - ', entry)+1)-2 ),charindex(' ',right(entry, len(entry) - charindex(' - ',entry,charindex(' - ', entry)+1)-2 )))

               else left(right(entry, len(entry)-charindex(' - ', entry)-2),charindex(' ',right(entry, len(entry)-charindex(' - ', entry)-2)))

         end , entry)-

         len(case

               when entry like '% - % - %'

               then left(right(entry, len(entry) - charindex(' - ',entry,charindex(' - ', entry)+1)-2 ),charindex(' ',right(entry, len(entry) - charindex(' - ',entry,charindex(' - ', entry)+1)-2 )))

               else left(right(entry, len(entry)-charindex(' - ', entry)-2),charindex(' ',right(entry, len(entry)-charindex(' - ', entry)-2)))

               end )

         ),

         charindex(' ',right(entry, len(entry)-charindex(

         case

               when entry like '% - % - %'

               then left(right(entry, len(entry) - charindex(' - ',entry,charindex(' - ', entry)+1)-2 ),charindex(' ',right(entry, len(entry) - charindex(' - ',entry,charindex(' - ', entry)+1)-2 )))

               else left(right(entry, len(entry)-charindex(' - ', entry)-2),charindex(' ',right(entry, len(entry)-charindex(' - ', entry)-2)))

         end , entry)-

         len(case

               when entry like '% - % - %'

               then left(right(entry, len(entry) - charindex(' - ',entry,charindex(' - ', entry)+1)-2 ),charindex(' ',right(entry, len(entry) - charindex(' - ',entry,charindex(' - ', entry)+1)-2 )))

               else left(right(entry, len(entry)-charindex(' - ', entry)-2),charindex(' ',right(entry, len(entry)-charindex(' - ', entry)-2)))

         end )

         )))   AS browser,

         left(ltrim(rtrim(right(entry,6))),3) as status,

         ltrim(rtrim(right(ltrim(rtrim(right(entry,6))),2))) as subStatus

from     tblTreLog

order by convert(datetime,left(entry,19))

 

 

Listing 7. SSIS Task 7

DELETE FROM tblTreWebLogArchive

WHERE logDateTime < dateadd(d, -365, getdate())

 

 

Listing 8. SSIS Task 8

INSERT   INTO tblTreWebLogArchive

SELECT   logDateTime,

         hostIP,

         method,

         directory,

         port,

         sourceIP,

         browser,

         status,

         substatus

FROM     tblTreWebLog

 

Web Listing 3. SSAS view

CREATE VIEW vwTreWebLogCube   

AS    

SELECT 

  DATEPART(yy,logDateTime) AS logYear, 

  DATEPART(mm,logDateTime) AS logMonth,  

  CONVERT(DATETIME,CONVERT(VARCHAR(12),logDateTime,101)) AS logDateTime,   

  CASE    

   WHEN CHARINDEX('/', directory, 2) > 0    

   THEN LEFT(directory, CHARINDEX('/',directory,2)-1)   

   ELSE directory   

  END AS directory,   

  ,   

  1 AS Request   

FROM tblTreWebLogArchive   

WHERE CASE    

   WHEN CHARINDEX('/', directory, 2) > 0    

   THEN LEFT(directory, CHARINDEX('/',directory,2)-1)   

   ELSE directory   

  END = '/index.html'   

OR CASE    

   WHEN CHARINDEX('/', directory, 2) > 0    

   THEN LEFT(directory, CHARINDEX('/',directory,2)-1)   

   ELSE directory   

  END = '/accounting'   

OR CASE     

   WHEN CHARINDEX('/', directory, 2) > 0    

   THEN LEFT(directory, CHARINDEX('/',directory,2)-1)   

   ELSE directory   

  END = '/bankinvermont'   

OR CASE    

   WHEN CHARINDEX('/', directory, 2) > 0    

   THEN LEFT(directory, CHARINDEX('/',directory,2)-1)   

   ELSE directory   

  END = '/cash'   

OR CASE    

   WHEN CHARINDEX('/', directory, 2) > 0    

   THEN LEFT(directory, CHARINDEX('/',directory,2)-1)   

   ELSE directory   

  END = '/debt'   

OR CASE    

   WHEN CHARINDEX('/', directory, 2) > 0    

   THEN LEFT(directory, CHARINDEX('/',directory,2)-1)   

   ELSE directory   

  END = '/literacy'   

OR CASE    

   WHEN CHARINDEX('/', directory, 2) > 0    

   THEN LEFT(directory, CHARINDEX('/',directory,2)-1)   

   ELSE directory   

  END = '/office'   

OR CASE    

   WHEN CHARINDEX('/', directory, 2) > 0    

   THEN LEFT(directory, CHARINDEX('/',directory,2)-1)   

   ELSE directory   

  END = '/pension'   

OR CASE    

   WHEN CHARINDEX('/', directory, 2) > 0    

   THEN LEFT(directory, CHARINDEX('/',directory,2)-1)   

   ELSE directory   

  END = '/reporting'   

OR CASE    

   WHEN CHARINDEX('/', directory, 2) > 0    

   THEN LEFT(directory, CHARINDEX('/',directory,2)-1)   

   ELSE directory   

  END = '/retirement'   

OR CASE    

   WHEN CHARINDEX('/', directory, 2) > 0    

   THEN LEFT(directory, CHARINDEX('/',directory,2)-1)   

   ELSE directory   

  END = '/unclaimed'   

OR CASE    

   WHEN CHARINDEX('/', directory, 2) > 0    

   THEN LEFT(directory, CHARINDEX('/',directory,2)-1)   

   ELSE directory   

  END = '/VendorPortal'

 

 

 



ARTICLE TOOLS

Comments
  • Pruden
    1 year ago
    Jan 31, 2011

    I have recently updated this process to analyze SharePoint logs. I now use Microsoft's Log Parser (http://www.microsoft.com/downloads/en/details.aspx?FamilyID=890cd06b-abf8-4c25-91b2-f8d975cf8c07&displaylang=en) to parse the web log files to a csv which I load then load into the SQL database. It is much cleaner than the previous process for loading the log files.

You must log on before posting a comment.

Are you a new visitor? Register Here