Home EDA
Post
Cancel

EDA

  • Temporal EDA

    • Exploratory Data Analysis (EDA) Process
      • Iterative
      • No specific checklist for EDA questions image
  • SQL functions for EDA

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    
          -- CONVERT Syntax:
          CONVERT (data_type[(length)], expression[,style])
          -- Returns expression based on data_type
    
          --DATEPART Syntax
          DATEPART(datepart, date)
          -- Returns int
    
          -- DATENAME syntax
          DATENAME(datepart, date)
          -- Returns nvarchar
    
          -- DATEDIFF Syntax
          DATEDIFF(datepart, startdate, enddate)
          -- Retrurns int; can't use datepart weekday value
    
          -- datepart value s= year, quarter, month, dayofyear, day, week, weekday, hour,
          -- minute, second, microsecond, nanosecond
    
    1
    2
    3
    4
    5
    
          -- CONVERT
          SELECT
              TOP 1 PickUpDate,
              CONVERT(DATE, PickupDate) AS DateOly
          FROM YellowTripData
    
    1
    2
    3
    4
    5
    6
    7
    
          -- DATEPART
          SELECT
              TOP 3 COUNT(ID) AS NumberofRides,
              DATEPART(HOUR, PickupDate) AS Hour
          FROM YellowTripData
          GROUP BY DATEPART(HOUR, PickupDate)
          ORDER BY COUNT(ID) DESC
    
          -- DATENAME
          SELECT
              TOP 3 ROUND(
                  SUM(FareAmount),
                  0
              ) as TotalFareAmt,
              DATENAME(WEEKDAY, PickupDate) AS DayofWeek
          FROM YellowTripData
          GROUP BY DATENAME(WEEKDAY, PickupDate)
          ORDER BY SUM(FareAmount) DESC;
    
    1
    2
    3
    4
    5
    6
    7
    
          --DATEDIFF
          SELECT
              AVG(
              DATEDIFF(SECOND, PickupDate, DropOffDate)/ 60
              ) AS AvgRideLengthInMin
          FROM YellowTripData
          WHERE DATENAME(WEEKDAY, PickupDate) = 'Sunday';
    

Variables for datetime data

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
```sql
    -- DECLARE variable and assign initial value
    DECLARE @StartTime as time = '08:00 AM'


    -- DECLARE variabel and then SET value
    DECLARE @StarTime AS time
    SET @StartTIme = '08:00  AM'

    --  DECLARE variable then SET value
    DECLARE @BeginDate as date
    SET
        @BeginDate = (
            SELECT TOP 1 PickupDate
            FROM YellowTripData
            ORDER BY PickupDate ASC
        )
```
  • CASTing

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    
          -- CAST syntax
          CAST (expression AS data_type [(length)])
          -- Returns expression based on data_type
    
          -- DECLARE datetime variabel
          -- SET value to @BeginDate and @StartTime while CASTing
          DECLARE @StartDateTime as datetime
          SET @StartDateTime = CAST(@BeginDate as datetime) + CAST(@StartTime as datetime)
    
          -- DECLARE table variable with two columns
          DECLARE @TaxiRideDates TABLE (
              StartDate date,
              EndDate date
          )
    
          -- INSERT static values into table variable
          INSERT INTO @TaxiRideDates (StartDate, EndDate)
          SELECT '3/1/2018', '3/2/2018'
    
          -- INSERT query result
          INSERT INTO @TaxiRideDates(StartDate, EndDate)
          SELECT DISTINCT
              CAST(PickuPDate as date),
              CAST(DropOffDate as date)
              FROM YellowTripData;
    

Date manipulation

  • GETDATE

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    
          SELECT GETDATE()
    
          DECLARE @CurrentDateTime AS datetime
          SET @CurrentDateTime = GETDATE()
          SELECT @CurrentDateTIme
    
    
          -- DATEADD Syntax:
          DATEADD(datepart, number, date)
          -- Returns expression based on data_type
    
          -- Oneday after 2/27/2019
          SELECT DATEADD(day,1,'2/27/2019')
    
  • DATEADD and GETDATE

    1
    2
    3
    4
    5
    6
    7
    
          --Yesterday
          SELECT DATEADD(d,-1,GETDATE())
    
          -- Yesterday's Taxi Passenger Count
          SELECT SUM(PassengerCount)
          FROM YellowTripData
          WHERE CAST(PickupDate as date) = DATEADD(d,-1,GETDATE())
    
  • DATEDIFF?

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    
          SELECT DATEDIFF(day, '2/27/2019','2/28/2019')
    
          SELECT DATEDIFF(year, '12/31/2017', '1/1/2019')
    
          -- First Day of Current Week
          SELECT DATEADD(week, DATEDIFF(week, 0, GETDATE()),0)
    
          -- First step
          GETDATE()
    
          -- How many weeks between today and 1/1/1900?
          SELECT DATEDIFF(week,0,GETDATE())
    
          -- Add zero to the 6218nd week
          SELECT DATEADD(week, DATEDIFF(week,0,GETDATE()),0)
    
This post is licensed under CC BY 4.0 by the author.