Example: Top 1,000 movies from the Internet Movie Database
example-imdb.Rmd
(this vignette was used as an example during my Compstat 2024 talk)
Read in the data
imdb <- read.csv("imdb1000.csv")
dplyr::glimpse(imdb)
#> Rows: 1,000
#> Columns: 14
#> $ title <chr> "The Shawshank Redemption", "The Godfather", "The Dark Knight…
#> $ year <int> 1994, 1972, 2008, 1974, 1957, 2003, 1994, 1993, 2010, 1999, 2…
#> $ runtime <int> 142, 175, 152, 202, 96, 201, 154, 195, 148, 139, 178, 142, 16…
#> $ genre <chr> "Drama", "Crime, Drama", "Action, Crime, Drama", "Crime, Dram…
#> $ rating <dbl> 9.3, 9.2, 9.0, 9.0, 9.0, 8.9, 8.9, 8.9, 8.8, 8.8, 8.8, 8.8, 8…
#> $ overview <chr> "Two imprisoned men bond over a number of years, finding sola…
#> $ director <chr> "Frank Darabont", "Francis Ford Coppola", "Christopher Nolan"…
#> $ star1 <chr> "Tim Robbins", "Marlon Brando", "Christian Bale", "Al Pacino"…
#> $ star2 <chr> "Morgan Freeman", "Al Pacino", "Heath Ledger", "Robert De Nir…
#> $ star3 <chr> "Bob Gunton", "James Caan", "Aaron Eckhart", "Robert Duvall",…
#> $ star4 <chr> "William Sadler", "Diane Keaton", "Michael Caine", "Diane Kea…
#> $ votes <int> 2343110, 1620367, 2303232, 1129952, 689845, 1642758, 1826188,…
#> $ genre1 <chr> "Drama", "Crime", "Action", "Crime", "Crime", "Action", "Crim…
#> $ genre2 <chr> "Drama", "Drama", "Crime", "Drama", "Drama", "Adventure", "Dr…
Explore
library(plotscaper)
create_schema(imdb) |>
add_scatterplot(c("runtime", "votes"), list(queries = c("title"))) |>
add_barplot(c("director")) |>
add_histogram(c("runtime")) |>
add_fluctplot(c("genre1", "genre2")) |>
render()