diff --git a/CRAN-RELEASE b/CRAN-RELEASE deleted file mode 100644 index ada7f73..0000000 --- a/CRAN-RELEASE +++ /dev/null @@ -1,2 +0,0 @@ -This package was submitted to CRAN on 2020-01-07. -Once it is accepted, delete this file and tag the release (commit 11164a1210). diff --git a/DESCRIPTION b/DESCRIPTION index bf365d5..aa2a993 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: schrute Title: The Entire Transcript from the Office in Tidy Format -Version: 0.1.1 +Version: 0.2.0 Authors@R: c(person(given = "Brad", family = "Lindblad", @@ -36,4 +36,4 @@ VignetteBuilder: Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 6.1.1 +RoxygenNote: 7.0.2 diff --git a/NEWS.md b/NEWS.md index 4e270f2..af2f302 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,6 @@ +schrute 0.1.2 +* Added IMDB ratings, votes and air dates + # schrute 0.1.1 * Minor bug fixes * Added writer and director feature columns diff --git a/R/data-theoffice.R b/R/data-theoffice.R index e0eb335..d98e817 100644 --- a/R/data-theoffice.R +++ b/R/data-theoffice.R @@ -1,7 +1,7 @@ #' The entire script transcriptions from The Office #' #' -#' @format A tibble with 55130 observations of 9 variables: +#' @format A tibble with 55130 observations of 12 variables: #' \describe{ #' \item{index}{row index} #' \item{season}{season number} @@ -12,6 +12,9 @@ #' \item{character}{name of the character saying the line} #' \item{text}{words spoken by that actor} #' \item{text_w_direction}{words spoken by that actor with stage direction included} +#' \item{imdb_rating}{rating from imdb} +#' \item{total_votes}{total votes for episode on imdb} +#' \item{air_date}{date the episode originally aired} #'} #' @source \url{https://transcripts.foreverdreaming.org} "theoffice" diff --git a/README.md b/README.md index fb6829b..f11de9a 100644 --- a/README.md +++ b/README.md @@ -17,8 +17,6 @@ downloads](https://cranlogs.r-pkg.org/badges/schrute)](https://cran.r-project.or Analyze and have fun with the text from the best series of all time -Also available in python as the [schrutepy package](https://github.com/bradlindblad/schrutepy) - ## Installation You can install the released version of schrute from @@ -42,16 +40,19 @@ library(tibble) tibble::glimpse(schrute::theoffice) #> Observations: 55,130 -#> Variables: 9 -#> $ index 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, … -#> $ season "01", "01", "01", "01", "01", "01", "01", "01",… -#> $ episode "01", "01", "01", "01", "01", "01", "01", "01",… -#> $ episode_name "Pilot", "Pilot", "Pilot", "Pilot", "Pilot", "P… -#> $ director "Ken Kwapis", "Ken Kwapis", "Ken Kwapis", "Ken … -#> $ writer "Ricky Gervais;Stephen Merchant;Greg Daniels", … -#> $ character "Michael", "Jim", "Michael", "Jim", "Michael", … -#> $ text "All right Jim. Your quarterlies look very good… -#> $ text_w_direction "All right Jim. Your quarterlies look very good… +#> Variables: 12 +#> $ index 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1… +#> $ season "01", "01", "01", "01", "01", "01", "01", "01", "01"… +#> $ episode "01", "01", "01", "01", "01", "01", "01", "01", "01"… +#> $ episode_name "Pilot", "Pilot", "Pilot", "Pilot", "Pilot", "Pilot"… +#> $ director "Ken Kwapis", "Ken Kwapis", "Ken Kwapis", "Ken Kwapi… +#> $ writer "Ricky Gervais;Stephen Merchant;Greg Daniels", "Rick… +#> $ character "Michael", "Jim", "Michael", "Jim", "Michael", "Mich… +#> $ text "All right Jim. Your quarterlies look very good. How… +#> $ text_w_direction "All right Jim. Your quarterlies look very good. How… +#> $ imdb_rating 7.6, 7.6, 7.6, 7.6, 7.6, 7.6, 7.6, 7.6, 7.6, 7.6, 7.… +#> $ total_votes 3706, 3706, 3706, 3706, 3706, 3706, 3706, 3706, 3706… +#> $ air_date 2005-03-24, 2005-03-24, 2005-03-24, 2005-03-24, 2005… ``` Or view the short vignette with: diff --git a/cran-comments.md b/cran-comments.md index 3f2d1a3..0564618 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,8 +1,7 @@ -## Resubmission comments -* Minor bug fix +* added 3 new fields to dataset ## Test environments -* local Linux Mint 19.3, R 3.6.1 +* local Linux Mint 19.3, R 3.6.3 * ubuntu 14.04 (on travis-ci), R 3.6.1 * win-builder (devel and release) diff --git a/data-raw/get_data.R b/data-raw/get_data.R index 1d6f84a..04a1672 100644 --- a/data-raw/get_data.R +++ b/data-raw/get_data.R @@ -47,6 +47,13 @@ theoffice <- final %>% director,writer, character,text,text_w_direction) +imdb <- read.csv('https://github.com/raw/rfordatascience/tidytuesday/master/data/2020/2020-03-17/office_ratings.csv') %>% + dplyr::mutate(season = ifelse(nchar(season) < 2, paste0("0", season), season)) %>% + dplyr::mutate(episode = ifelse(nchar(episode) < 2, paste0("0", episode), episode)) + +theoffice <- theoffice %>% + dplyr::left_join(imdb, by = c('season', 'episode')) %>% + dplyr::select(-title) usethis::use_data(theoffice, overwrite = TRUE) diff --git a/data/theoffice.rda b/data/theoffice.rda index 996fe67..d438398 100644 Binary files a/data/theoffice.rda and b/data/theoffice.rda differ diff --git a/docs/404.html b/docs/404.html index 6324f9a..ef865d1 100644 --- a/docs/404.html +++ b/docs/404.html @@ -76,7 +76,7 @@ schrute - 0.1.1 + 0.2.0 diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index b610322..f5fdeff 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -76,7 +76,7 @@ schrute - 0.1.1 + 0.2.0 diff --git a/docs/LICENSE.html b/docs/LICENSE.html index 07ad34a..fa3203b 100644 --- a/docs/LICENSE.html +++ b/docs/LICENSE.html @@ -76,7 +76,7 @@ schrute - 0.1.1 + 0.2.0 diff --git a/docs/articles/index.html b/docs/articles/index.html index 34db91b..b9e8063 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -76,7 +76,7 @@ schrute - 0.1.1 + 0.2.0 diff --git a/docs/articles/theoffice.html b/docs/articles/theoffice.html index 14ac747..cd087e9 100644 --- a/docs/articles/theoffice.html +++ b/docs/articles/theoffice.html @@ -38,7 +38,7 @@ schrute - 0.1.1 + 0.2.0 @@ -111,16 +111,19 @@

Take a peek at the format:

+#> Variables: 12 +#> $ index <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1… +#> $ season <chr> "01", "01", "01", "01", "01", "01", "01", "01", "01"… +#> $ episode <chr> "01", "01", "01", "01", "01", "01", "01", "01", "01"… +#> $ episode_name <chr> "Pilot", "Pilot", "Pilot", "Pilot", "Pilot", "Pilot"… +#> $ director <chr> "Ken Kwapis", "Ken Kwapis", "Ken Kwapis", "Ken Kwapi… +#> $ writer <chr> "Ricky Gervais;Stephen Merchant;Greg Daniels", "Rick… +#> $ character <chr> "Michael", "Jim", "Michael", "Jim", "Michael", "Mich… +#> $ text <chr> "All right Jim. Your quarterlies look very good. How… +#> $ text_w_direction <chr> "All right Jim. Your quarterlies look very good. How… +#> $ imdb_rating <dbl> 7.6, 7.6, 7.6, 7.6, 7.6, 7.6, 7.6, 7.6, 7.6, 7.6, 7.… +#> $ total_votes <int> 3706, 3706, 3706, 3706, 3706, 3706, 3706, 3706, 3706… +#> $ air_date <fct> 2005-03-24, 2005-03-24, 2005-03-24, 2005-03-24, 2005…
 mydata %>%
   dplyr::filter(season == '01') %>%
   dplyr::filter(episode == '01') %>%
@@ -137,6 +140,9 @@ 

character text text_w_direction +imdb_rating +total_votes +air_date @@ -149,6 +155,9 @@

Michael All right Jim. Your quarterlies look very good. How are things at the library? All right Jim. Your quarterlies look very good. How are things at the library? +7.6 +3706 +2005-03-24 2 @@ -160,6 +169,9 @@

Jim Oh, I told you. I couldn’t close it. So… Oh, I told you. I couldn’t close it. So… +7.6 +3706 +2005-03-24 3 @@ -171,6 +183,9 @@

Michael So you’ve come to the master for guidance? Is this what you’re saying, grasshopper? So you’ve come to the master for guidance? Is this what you’re saying, grasshopper? +7.6 +3706 +2005-03-24 @@ -178,7 +193,7 @@

We can tokenize all of the lines with a few lines from the tidytext package:

-

This increases our data set to 570566 records, where each record contains a word from the script.

+

This increases our data set to 570450 records, where each record contains a word from the script.

 token.mydata %>%
   dplyr::filter(season == '01') %>%
   dplyr::filter(episode == '01') %>%
@@ -194,6 +209,9 @@ 

writer character text_w_direction +imdb_rating +total_votes +air_date word @@ -206,6 +224,9 @@

Ricky Gervais;Stephen Merchant;Greg Daniels Michael All right Jim. Your quarterlies look very good. How are things at the library? +7.6 +3706 +2005-03-24 all @@ -217,6 +238,9 @@

Ricky Gervais;Stephen Merchant;Greg Daniels Michael All right Jim. Your quarterlies look very good. How are things at the library? +7.6 +3706 +2005-03-24 right @@ -228,6 +252,9 @@

Ricky Gervais;Stephen Merchant;Greg Daniels Michael All right Jim. Your quarterlies look very good. How are things at the library? +7.6 +3706 +2005-03-24 jim @@ -240,20 +267,20 @@

And then see what the most common words are:

+#> # … with 18,936 more rows

tidy.token.mydata %>%
   dplyr::count(word, sort = TRUE) %>%
   dplyr::filter(n > 400) %>%
diff --git a/docs/articles/theoffice_files/figure-html/unnamed-chunk-9-1.png b/docs/articles/theoffice_files/figure-html/unnamed-chunk-9-1.png
index 09916a8..bc4b988 100644
Binary files a/docs/articles/theoffice_files/figure-html/unnamed-chunk-9-1.png and b/docs/articles/theoffice_files/figure-html/unnamed-chunk-9-1.png differ
diff --git a/docs/authors.html b/docs/authors.html
index 2a88b8b..c7d4e2c 100644
--- a/docs/authors.html
+++ b/docs/authors.html
@@ -76,7 +76,7 @@
       
       
         schrute
-        0.1.1
+        0.2.0
       
     
@@ -133,13 +133,13 @@

Citation

Lindblad B (2020). schrute: The Entire Transcript from the Office in Tidy Format. -R package version 0.1.1, https://bradlindblad.github.io/schrute/. +R package version 0.2.0, https://bradlindblad.github.io/schrute/.

@Manual{,
   title = {{schrute}: The Entire Transcript from the Office in Tidy Format},
   author = {Brad Lindblad},
   year = {2020},
-  note = {R package version 0.1.1},
+  note = {R package version 0.2.0},
   url = {https://bradlindblad.github.io/schrute/},
 }
diff --git a/docs/index.html b/docs/index.html index 5c06140..4171e41 100644 --- a/docs/index.html +++ b/docs/index.html @@ -40,7 +40,7 @@ schrute - 0.1.1 + 0.2.0

@@ -113,16 +113,19 @@

tibble::glimpse(schrute::theoffice) #> Observations: 55,130 -#> Variables: 9 -#> $ index <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, … -#> $ season <chr> "01", "01", "01", "01", "01", "01", "01", "01",… -#> $ episode <chr> "01", "01", "01", "01", "01", "01", "01", "01",… -#> $ episode_name <chr> "Pilot", "Pilot", "Pilot", "Pilot", "Pilot", "P… -#> $ director <chr> "Ken Kwapis", "Ken Kwapis", "Ken Kwapis", "Ken … -#> $ writer <chr> "Ricky Gervais;Stephen Merchant;Greg Daniels", … -#> $ character <chr> "Michael", "Jim", "Michael", "Jim", "Michael", … -#> $ text <chr> "All right Jim. Your quarterlies look very good… -#> $ text_w_direction <chr> "All right Jim. Your quarterlies look very good… +#> Variables: 12 +#> $ index <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1… +#> $ season <chr> "01", "01", "01", "01", "01", "01", "01", "01", "01"… +#> $ episode <chr> "01", "01", "01", "01", "01", "01", "01", "01", "01"… +#> $ episode_name <chr> "Pilot", "Pilot", "Pilot", "Pilot", "Pilot", "Pilot"… +#> $ director <chr> "Ken Kwapis", "Ken Kwapis", "Ken Kwapis", "Ken Kwapi… +#> $ writer <chr> "Ricky Gervais;Stephen Merchant;Greg Daniels", "Rick… +#> $ character <chr> "Michael", "Jim", "Michael", "Jim", "Michael", "Mich… +#> $ text <chr> "All right Jim. Your quarterlies look very good. How… +#> $ text_w_direction <chr> "All right Jim. Your quarterlies look very good. How… +#> $ imdb_rating <dbl> 7.6, 7.6, 7.6, 7.6, 7.6, 7.6, 7.6, 7.6, 7.6, 7.6, 7.… +#> $ total_votes <int> 3706, 3706, 3706, 3706, 3706, 3706, 3706, 3706, 3706… +#> $ air_date <fct> 2005-03-24, 2005-03-24, 2005-03-24, 2005-03-24, 2005…

Or view the short vignette with:

vignette("theoffice")
diff --git a/docs/news/index.html b/docs/news/index.html index 3e02600..55add2d 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -76,7 +76,7 @@ schrute - 0.1.1 + 0.2.0 diff --git a/docs/reference/index.html b/docs/reference/index.html index 37fae00..62ecdf6 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -76,7 +76,7 @@ schrute - 0.1.1 + 0.2.0 diff --git a/docs/reference/pipe.html b/docs/reference/pipe.html index d4ee522..076f1e6 100644 --- a/docs/reference/pipe.html +++ b/docs/reference/pipe.html @@ -77,7 +77,7 @@ schrute - 0.1.1 + 0.2.0 diff --git a/docs/reference/schrute.html b/docs/reference/schrute.html index c533eae..02aa05e 100644 --- a/docs/reference/schrute.html +++ b/docs/reference/schrute.html @@ -77,7 +77,7 @@ schrute - 0.1.1 + 0.2.0 diff --git a/docs/reference/theoffice.html b/docs/reference/theoffice.html index 4bb032e..0a994dc 100644 --- a/docs/reference/theoffice.html +++ b/docs/reference/theoffice.html @@ -77,7 +77,7 @@ schrute - 0.1.1 + 0.2.0 @@ -142,7 +142,7 @@

The entire script transcriptions from The Office

Format

-

A tibble with 55130 observations of 9 variables:

+

A tibble with 55130 observations of 12 variables:

index

row index

season

season number

episode

episode number

@@ -152,6 +152,9 @@

Formatcharacter

name of the character saying the line

text

words spoken by that actor

text_w_direction

words spoken by that actor with stage direction included

+
imdb_rating

rating from imdb

+
total_votes

total votes for episode on imdb

+
air_date

date the episode originally aired

Source

diff --git a/man/schrute.Rd b/man/schrute.Rd index e64b134..e5a5c0c 100644 --- a/man/schrute.Rd +++ b/man/schrute.Rd @@ -3,7 +3,6 @@ \docType{package} \name{schrute} \alias{schrute} -\alias{schrute-package} \title{schrute: The Entire Transcript from The Office in Tidy Format.} \description{ Analyze and have fun with the text from the best series of all time diff --git a/man/theoffice.Rd b/man/theoffice.Rd index 55d8995..138fb96 100644 --- a/man/theoffice.Rd +++ b/man/theoffice.Rd @@ -4,7 +4,7 @@ \name{theoffice} \alias{theoffice} \title{The entire script transcriptions from The Office} -\format{A tibble with 55130 observations of 9 variables: +\format{A tibble with 55130 observations of 12 variables: \describe{ \item{index}{row index} \item{season}{season number} @@ -15,6 +15,9 @@ \item{character}{name of the character saying the line} \item{text}{words spoken by that actor} \item{text_w_direction}{words spoken by that actor with stage direction included} +\item{imdb_rating}{rating from imdb} +\item{total_votes}{total votes for episode on imdb} +\item{air_date}{date the episode originally aired} }} \source{ \url{https://transcripts.foreverdreaming.org}