Skip to content

Latest commit

 

History

History
89 lines (80 loc) · 2.18 KB

README.md

File metadata and controls

89 lines (80 loc) · 2.18 KB

imdb-stats

License: MIT

Loads some of the TSV files of https://datasets.imdbws.com/ into a MySQL server.

Edit load-data.sh to set your MySQL info and then run it:

./load-data.sh

This will require around 500MB in your /tmp/ directory. After the data load is done, you can experiment with ad hoc queries. Examples:

select
    r.averageRating, r.numVotes, b.startYear, b.primaryTitle
from
    title_ratings r, title_basics b
where
    r.tconst = b.tconst and
    b.titleType = 'movie' and
    b.startYear > 1998 and
    r.numVotes > 100000 and
    r.averageRating > 6
order by r.averageRating desc
limit 50;
select
    b.startYear, avg(averageRating), count(*)
from
    title_ratings r, title_basics b
where
    r.tconst = b.tconst and
    b.titleType = 'movie' and
    r.numVotes > 1000
group by b.startYear
order by b.startYear;
select
    count(*)
from
    title_ratings r, title_basics b
where
    r.tconst = b.tconst and
    b.titleType = 'movie' and
    r.numVotes > 10;
select
    b.startYear, avg(b.runtimeMinutes), count(*)
from
    title_ratings r, title_basics b
where
    r.tconst = b.tconst and
    b.titleType = 'movie' and
    r.numVotes > 1000 and
    r.averageRating > 7 and
    b.startYear > year(CURDATE()) - 30 and
    b.runtimeMinutes is not null
group by b.startYear
order by b.startYear;
select
    round(r.averageRating), avg(b.runtimeMinutes), count(*)
from
    title_ratings r, title_basics b
where
    r.tconst = b.tconst and
    b.titleType = 'movie' and
    r.numVotes > 100 and
    b.startYear > year(CURDATE()) - 15 and
    b.runtimeMinutes is not null
group by round(r.averageRating)
order by round(r.averageRating);

License

MIT