Example: Top 1,000 movies from the Internet Movie Database • plotscaper

(this vignette was used as an example during my Compstat 2024 talk)

Install package

devtools::install_github("bartonicek/plotscaper")

Read in the data


imdb <- read.csv("imdb1000.csv")
dplyr::glimpse(imdb)
#> Rows: 1,000
#> Columns: 14
#> $ title    <chr> "The Shawshank Redemption", "The Godfather", "The Dark Knight…
#> $ year     <int> 1994, 1972, 2008, 1974, 1957, 2003, 1994, 1993, 2010, 1999, 2…
#> $ runtime  <int> 142, 175, 152, 202, 96, 201, 154, 195, 148, 139, 178, 142, 16…
#> $ genre    <chr> "Drama", "Crime, Drama", "Action, Crime, Drama", "Crime, Dram…
#> $ rating   <dbl> 9.3, 9.2, 9.0, 9.0, 9.0, 8.9, 8.9, 8.9, 8.8, 8.8, 8.8, 8.8, 8…
#> $ overview <chr> "Two imprisoned men bond over a number of years, finding sola…
#> $ director <chr> "Frank Darabont", "Francis Ford Coppola", "Christopher Nolan"…
#> $ star1    <chr> "Tim Robbins", "Marlon Brando", "Christian Bale", "Al Pacino"…
#> $ star2    <chr> "Morgan Freeman", "Al Pacino", "Heath Ledger", "Robert De Nir…
#> $ star3    <chr> "Bob Gunton", "James Caan", "Aaron Eckhart", "Robert Duvall",…
#> $ star4    <chr> "William Sadler", "Diane Keaton", "Michael Caine", "Diane Kea…
#> $ votes    <int> 2343110, 1620367, 2303232, 1129952, 689845, 1642758, 1826188,…
#> $ genre1   <chr> "Drama", "Crime", "Action", "Crime", "Crime", "Action", "Crim…
#> $ genre2   <chr> "Drama", "Drama", "Crime", "Drama", "Drama", "Adventure", "Dr…

Explore

library(plotscaper)

create_schema(imdb) |> 
  add_scatterplot(c("runtime", "votes"), list(queries = c("title"))) |>
  add_barplot(c("director")) |>
  add_histogram(c("runtime")) |>
  add_fluctplot(c("genre1", "genre2")) |>
  render()