Temporal EDA
- Exploratory Data Analysis (EDA) Process
- Iterative
- No specific checklist for EDA questions
- Exploratory Data Analysis (EDA) Process
SQL functions for EDA
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
-- CONVERT Syntax: CONVERT (data_type[(length)], expression[,style]) -- Returns expression based on data_type --DATEPART Syntax DATEPART(datepart, date) -- Returns int -- DATENAME syntax DATENAME(datepart, date) -- Returns nvarchar -- DATEDIFF Syntax DATEDIFF(datepart, startdate, enddate) -- Retrurns int; can't use datepart weekday value -- datepart value s= year, quarter, month, dayofyear, day, week, weekday, hour, -- minute, second, microsecond, nanosecond
1 2 3 4 5
-- CONVERT SELECT TOP 1 PickUpDate, CONVERT(DATE, PickupDate) AS DateOly FROM YellowTripData
1 2 3 4 5 6 7
-- DATEPART SELECT TOP 3 COUNT(ID) AS NumberofRides, DATEPART(HOUR, PickupDate) AS Hour FROM YellowTripData GROUP BY DATEPART(HOUR, PickupDate) ORDER BY COUNT(ID) DESC
-- DATENAME SELECT TOP 3 ROUND( SUM(FareAmount), 0 ) as TotalFareAmt, DATENAME(WEEKDAY, PickupDate) AS DayofWeek FROM YellowTripData GROUP BY DATENAME(WEEKDAY, PickupDate) ORDER BY SUM(FareAmount) DESC;
1 2 3 4 5 6 7
--DATEDIFF SELECT AVG( DATEDIFF(SECOND, PickupDate, DropOffDate)/ 60 ) AS AvgRideLengthInMin FROM YellowTripData WHERE DATENAME(WEEKDAY, PickupDate) = 'Sunday';
Variables for datetime data
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
```sql
-- DECLARE variable and assign initial value
DECLARE @StartTime as time = '08:00 AM'
-- DECLARE variabel and then SET value
DECLARE @StarTime AS time
SET @StartTIme = '08:00 AM'
-- DECLARE variable then SET value
DECLARE @BeginDate as date
SET
@BeginDate = (
SELECT TOP 1 PickupDate
FROM YellowTripData
ORDER BY PickupDate ASC
)
```
CASTing
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
-- CAST syntax CAST (expression AS data_type [(length)]) -- Returns expression based on data_type -- DECLARE datetime variabel -- SET value to @BeginDate and @StartTime while CASTing DECLARE @StartDateTime as datetime SET @StartDateTime = CAST(@BeginDate as datetime) + CAST(@StartTime as datetime) -- DECLARE table variable with two columns DECLARE @TaxiRideDates TABLE ( StartDate date, EndDate date ) -- INSERT static values into table variable INSERT INTO @TaxiRideDates (StartDate, EndDate) SELECT '3/1/2018', '3/2/2018' -- INSERT query result INSERT INTO @TaxiRideDates(StartDate, EndDate) SELECT DISTINCT CAST(PickuPDate as date), CAST(DropOffDate as date) FROM YellowTripData;
Date manipulation
GETDATE
1 2 3 4 5 6 7 8 9 10 11 12 13
SELECT GETDATE() DECLARE @CurrentDateTime AS datetime SET @CurrentDateTime = GETDATE() SELECT @CurrentDateTIme -- DATEADD Syntax: DATEADD(datepart, number, date) -- Returns expression based on data_type -- Oneday after 2/27/2019 SELECT DATEADD(day,1,'2/27/2019')
DATEADD and GETDATE
1 2 3 4 5 6 7
--Yesterday SELECT DATEADD(d,-1,GETDATE()) -- Yesterday's Taxi Passenger Count SELECT SUM(PassengerCount) FROM YellowTripData WHERE CAST(PickupDate as date) = DATEADD(d,-1,GETDATE())
DATEDIFF?
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
SELECT DATEDIFF(day, '2/27/2019','2/28/2019') SELECT DATEDIFF(year, '12/31/2017', '1/1/2019') -- First Day of Current Week SELECT DATEADD(week, DATEDIFF(week, 0, GETDATE()),0) -- First step GETDATE() -- How many weeks between today and 1/1/1900? SELECT DATEDIFF(week,0,GETDATE()) -- Add zero to the 6218nd week SELECT DATEADD(week, DATEDIFF(week,0,GETDATE()),0)