Dates and Times

John Karlen
4/25/17

Dataset and Package

# install.packages('nycflights13')
# install.packages('lubridate')
# install.packages('dplyr')
library(nycflights13)
library(lubridate)
library(dplyr)

Parsing and Formats

year, month, day

ymd("20100604")
[1] "2010-06-04"
mdy("06-04-2011")
[1] "2011-06-04"
dmy("04/06/2011")
[1] "2011-06-04"

Parsing and Formats

year, month, day, hour, minute, second, datetime

ymd_hms("2015-09-09 14:00:00")
[1] "2015-09-09 14:00:00 UTC"
as_datetime('2015-09-03T16:37:00Z')
[1] "2015-09-03 04:00:00 UTC"

Parsing and Formats (DateTime)

johns_bday <- ymd_hms("1991-01-20T00:20:00Z")
date(johns_bday)
[1] "1991-01-20"
hour(johns_bday)
[1] 0

Flights!

take a look at the flights data set

cat(names(flights))
year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time arr_delay carrier flight tailnum origin dest air_time distance hour minute time_hour

Mutate a Date!

flights <- flights %>% mutate(date = ymd(paste0(year, "-", month, "-", day)))
flights <- flights %>% mutate(date_time = ymd_hm(paste0(year, "-", month, "-", day, " ", hour, ":", minute)))
flights %>% select(date_time)
# A tibble: 336,776 × 1
             date_time
                <dttm>
1  2013-01-01 05:15:00
2  2013-01-01 05:29:00
3  2013-01-01 05:40:00
4  2013-01-01 05:45:00
5  2013-01-01 06:00:00
6  2013-01-01 05:58:00
7  2013-01-01 06:00:00
8  2013-01-01 06:00:00
9  2013-01-01 06:00:00
10 2013-01-01 06:00:00
# ... with 336,766 more rows

Sort by Date

flights %>% arrange(date) %>% select(date) %>% head()
# A tibble: 6 × 1
        date
      <date>
1 2013-01-01
2 2013-01-01
3 2013-01-01
4 2013-01-01
5 2013-01-01
6 2013-01-01

Applications

What was the time of the first flight in 2013?

flights %>% arrange(date_time) %>% select(date_time)
# A tibble: 336,776 × 1
             date_time
                <dttm>
1  2013-01-01 05:15:00
2  2013-01-01 05:29:00
3  2013-01-01 05:40:00
4  2013-01-01 05:45:00
5  2013-01-01 05:58:00
6  2013-01-01 05:59:00
7  2013-01-01 06:00:00
8  2013-01-01 06:00:00
9  2013-01-01 06:00:00
10 2013-01-01 06:00:00
# ... with 336,766 more rows

Accessor Functions

what day of the week was Dana born on?

danas_bday <- ymd("1992-06-15")
month(danas_bday)
[1] 6
wday(danas_bday)
[1] 2
wday(danas_bday, label = T)
[1] Mon
Levels: Sun < Mon < Tues < Wed < Thurs < Fri < Sat

First Flight of each Month

aggregator functions!

flights %>% arrange(date) %>% group_by(month(date_time)) %>% 
    filter(row_number() == 1) %>% ungroup() %>%
    select(date_time, tailnum)
# A tibble: 12 × 2
             date_time tailnum
                <dttm>   <chr>
1  2013-01-01 05:15:00  N14228
2  2013-02-01 05:00:00  N197UW
3  2013-03-01 21:59:00  N706JB
4  2013-04-01 05:00:00  N566UW
5  2013-05-01 16:55:00  N628VA
6  2013-06-01 23:59:00  N618JB
7  2013-07-01 20:29:00  N653JB
8  2013-08-01 21:30:00  N618JB
9  2013-09-01 23:59:00  N663JB
10 2013-10-01 05:00:00  N538UW
11 2013-11-01 23:59:00  N568JB
12 2013-12-01 23:59:00  N715JB

Applications

what was the tailnumber of the 20th flight in September?

flights %>% arrange(date) %>% group_by(month(date)) %>%
    filter(row_number() == 20) %>% ungroup() %>%
    select(date_time, tailnum) %>% filter(month(date_time) == 9)
# A tibble: 1 × 2
            date_time tailnum
               <dttm>   <chr>
1 2013-09-01 06:10:00  N320US

Time Series

get all flights of a certain plane, “N14228”

flights %>% filter(tailnum == 'N14228') %>% 
    arrange(date_time) %>%
    select(date_time, tailnum)
# A tibble: 111 × 2
             date_time tailnum
                <dttm>   <chr>
1  2013-01-01 05:15:00  N14228
2  2013-01-08 14:40:00  N14228
3  2013-01-09 07:00:00  N14228
4  2013-01-09 11:44:00  N14228
5  2013-01-13 08:24:00  N14228
6  2013-01-16 17:30:00  N14228
7  2013-01-22 18:08:00  N14228
8  2013-01-23 10:56:00  N14228
9  2013-01-23 15:29:00  N14228
10 2013-01-25 07:20:00  N14228
# ... with 101 more rows

Applications

what day of the week was N0EGMQ's 100th flight

flights %>% filter(tailnum == 'N0EGMQ') %>% 
    arrange(date_time) %>% filter(row_number() == 100) %>%
    select(date_time) %>% mutate(wday(date_time))
# A tibble: 1 × 2
            date_time `wday(date_time)`
               <dttm>             <dbl>
1 2013-04-04 20:55:00                 5

Today!

what if we want the date today?

date <- today()

date
[1] "2017-06-21"

Intervals!

how old is John?

johns_bday <- ymd("1991-01-20")

today() - johns_bday
Time difference of 9649 days

Durations!

how old is John?

johns_bday <- ymd("1991-01-20")
johns_duration <- as.duration(today() - johns_bday)

johns_duration
[1] "833673600s (~26.42 years)"

Durations!

how old is John… in weeks?

johns_bday <- ymd("1991-01-20")

johns_duration <- as.duration(today() - johns_bday)

johns_duration / dweeks(1)
[1] 1378.429

Plotting Dates!

you can combine lubridate with R's plotting functionality

hist(wday(flights$date), breaks = seq(.5,7.5,1))

plot of chunk unnamed-chunk-20

Plotting Dates! (ggplot)

you can combine lubridate with ggplot too

library(ggplot2)
flights %>% group_by(date) %>% summarize(avg_dist = mean(distance)) %>% 
    ggplot(aes(date, avg_dist)) + geom_line()

plot of chunk unnamed-chunk-21

Plotting Moar Dates!

Let's look at one month

flights %>% filter(month(date) == 7) %>% group_by(date) %>% 
    summarize(avg_dist = mean(distance)) %>% 
    ggplot(aes(date, avg_dist)) + geom_line()

plot of chunk unnamed-chunk-22

Applications

Show me a plot of average flight distance vs day of the week (1-7)

# hint:
wday(johns_bday)
[1] 1

Plotting Even Moar Dates!

Let's look at how distance depends on weekday

flights %>% mutate(weekday = wday(date)) %>% group_by(weekday) %>%
    summarize(avg_dist = mean(distance)) %>% 
    ggplot(aes(weekday, avg_dist)) + geom_point()

plot of chunk unnamed-chunk-24

Applications

Give me the smallest non-zero time between consecutive flights of the same plane

flights %>% arrange(tailnum, date_time) %>% mutate(row = row_number(), next_time = date_time[row+1]) %>% arrange(tailnum) %>% select(tailnum, date_time, next_time) %>% group_by(tailnum) %>% filter(date_time != max(date_time), n() > 1) %>% ungroup() %>% mutate(diff_between_flights = next_time - date_time) %>% arrange(diff_between_flights) %>% filter(diff_between_flights > 0)
# A tibble: 332,323 × 4
   tailnum           date_time           next_time diff_between_flights
     <chr>              <dttm>              <dttm>               <time>
1   N11164 2013-05-09 19:30:00 2013-05-09 19:31:00              60 secs
2   N11551 2013-05-11 11:59:00 2013-05-11 12:00:00              60 secs
3   N13964 2013-12-29 17:34:00 2013-12-29 17:35:00              60 secs
4   N309JB 2013-02-09 22:49:00 2013-02-09 22:50:00              60 secs
5   N713EV 2013-11-27 10:56:00 2013-11-27 10:57:00              60 secs
6   N750EV 2013-09-02 15:59:00 2013-09-02 16:00:00              60 secs
7     <NA> 2013-02-08 13:59:00 2013-02-08 14:00:00              60 secs
8     <NA> 2013-02-08 14:45:00 2013-02-08 14:46:00              60 secs
9     <NA> 2013-02-08 14:55:00 2013-02-08 14:56:00              60 secs
10    <NA> 2013-02-08 15:05:00 2013-02-08 15:06:00              60 secs
# ... with 332,313 more rows