Commit e78b13f1 authored by richardARPANET's avatar richardARPANET

adds get_title_auxiliary

parent 462160c5
Pipeline #56 failed with stage
in 5 minutes and 30 seconds
......@@ -3,10 +3,10 @@
Release History
---------------
5.4.6 (unreleased)
5.5.9 (unreleased)
++++++++++++++++++
- Nothing changed yet.
- Adds ``get_title_auxiliary`` method.
5.4.5 (2018-04-29)
......
......@@ -7,3 +7,7 @@ create-wheel = yes
[tool:pytest]
addopts = -x -s -v
norecursedirs = .git
[flake8]
exclude = .git,__pycache__,legacy,build,dist,.tox
max-complexity = 13
import re
from datetime import date
from dateutil.parser import parse
from dataclasses import dataclass
REGEX_IMDB_ID = re.compile(r'([a-zA-Z]{2}[0-9]{7})')
from .objects import (
Title, TitleEpisode, TitleEpisodes, Name, TitleName, Image, TitleRelease
)
@dataclass
class Title:
title: str
type: str
year: int
genres: tuple
writers: tuple
creators: tuple
credits: tuple
directors: tuple
rating_count: int = 0
rating: float = None
plot_outline: str = None
release_date: date = None
releases: tuple = ()
@dataclass
class TitleRelease:
date: date
region: str
@dataclass
class TitleName:
name: str
job: str
category: str
imdb_id: str
REGEX_IMDB_ID = re.compile(r'([a-zA-Z]{2}[0-9]{7})')
class ImdbFacade(object):
......@@ -43,44 +15,54 @@ class ImdbFacade(object):
self._client = client
def get_title(self, imdb_id):
base_title_data = self._client.get_title(imdb_id=imdb_id)
title = base_title_data['base']['title']
year = base_title_data['base']['year']
rating = float(base_title_data['ratings']['rating'])
type_ = base_title_data['base']['titleType']
releases_data = self._client.get_title_releases(imdb_id=imdb_id)
release_date = parse(releases_data['releases'][0]['date']).date()
releases = tuple(
TitleRelease(date=parse(r['date']).date(), region=r['region'])
for r in releases_data['releases']
title_data, _ = self._get_title_data(imdb_id=imdb_id)
try:
episodes = TitleEpisodes(facade=self, imdb_id=imdb_id)
except LookupError:
episodes = ()
return Title(episodes=episodes, **title_data)
def get_title_episode(self, imdb_id):
title_data, title_aux_data = self._get_title_data(imdb_id=imdb_id)
try:
season = title_aux_data['season']
episode = title_aux_data['episode']
except KeyError:
season = None
episode = None
return TitleEpisode(season=season, episode=episode, **title_data)
def get_name(self, imdb_id):
name_data = self._client.get_name(imdb_id=imdb_id)
name = name_data['base']['name']
imdb_id = self._parse_id(name_data['base']['id'])
try:
image_data = name_data['base']['image']
image = Image(
url=image_data['url'],
height=image_data['height'],
width=image_data['width'],
)
except KeyError:
image = None
gender = name_data['base']['gender'].lower()
date_of_birth = parse(name_data['base']['birthDate']).date()
birth_place = name_data['base']['birthPlace']
try:
bios = tuple(b['text'] for b in name_data['base']['miniBios'])
except KeyError:
bios = ()
filmography_data = self._client.get_name_filmography(imdb_id)
filmography = tuple(
self._parse_id(f['id']) for f in filmography_data['filmography']
)
rating_count = base_title_data['ratings']['ratingCount']
plot_outline = base_title_data['plot']['outline']['text']
top_crew_data = self._client.get_title_top_crew(imdb_id=imdb_id)
writers = self._get_writers(top_crew_data)
directors = self._get_directors(top_crew_data)
creators = self._get_creators(top_crew_data)
genres = tuple(
g.lower() for g in
self._client.get_title_genres(imdb_id=imdb_id)['genres']
)
credits_data = self._client.get_title_credits(imdb_id=imdb_id)
credits = self._get_credits(credits_data)
return Title(
title=title,
year=year,
rating=rating,
type=type_,
release_date=release_date,
releases=releases,
plot_outline=plot_outline,
rating_count=rating_count,
writers=writers,
directors=directors,
creators=creators,
genres=genres,
credits=credits,
return Name(
name=name, imdb_id=imdb_id, date_of_birth=date_of_birth,
gender=gender, birth_place=birth_place, bios=bios, image=image,
filmography=filmography,
)
def _get_writers(self, top_crew_data):
......@@ -89,17 +71,28 @@ class ImdbFacade(object):
name=i['name'],
job=i.get('job'),
category=i.get('category'),
imdb_id=REGEX_IMDB_ID.findall(i['id'])[0]
imdb_id=self._parse_id(i['id'])
) for i in top_crew_data['writers']
)
def _get_stars(self, principals_data):
return tuple(
TitleName(
name=i['name'],
job=i.get('job'),
characters=tuple(i.get('characters', ())),
category=i.get('category'),
imdb_id=self._parse_id(i['id'])
) for i in principals_data
)
def _get_creators(self, top_crew_data):
return tuple(
TitleName(
name=i['name'],
job=i.get('job'),
category=i.get('category'),
imdb_id=REGEX_IMDB_ID.findall(i['id'])[0]
imdb_id=self._parse_id(i['id'])
) for i in top_crew_data['writers']
if i.get('job') == 'creator'
)
......@@ -110,7 +103,7 @@ class ImdbFacade(object):
name=i['name'],
job=i.get('job'),
category=i.get('category'),
imdb_id=REGEX_IMDB_ID.findall(i['id'])[0]
imdb_id=self._parse_id(i['id'])
) for i in top_crew_data['directors']
)
......@@ -122,6 +115,85 @@ class ImdbFacade(object):
name=item['name'],
category=item.get('category'),
job=item.get('job'),
imdb_id=REGEX_IMDB_ID.findall(item['id'])[0]
imdb_id=self._parse_id(item['id'])
))
return tuple(credits)
def _parse_id(self, string):
return REGEX_IMDB_ID.findall(string)[0]
def _get_title_data(self, imdb_id):
base_title_data = self._client.get_title(imdb_id=imdb_id)
top_crew_data = self._client.get_title_top_crew(imdb_id=imdb_id)
title_aux_data = self._client.get_title_auxiliary(imdb_id=imdb_id)
credits_data = self._client.get_title_credits(imdb_id=imdb_id)
title = base_title_data['base']['title']
year = base_title_data['base'].get('year')
try:
rating = float(base_title_data['ratings']['rating'])
except KeyError:
rating = None
type_ = base_title_data['base']['titleType'].lower()
try:
releases_data = self._client.get_title_releases(imdb_id=imdb_id)
except LookupError:
release_date = None
releases = ()
else:
release_date = parse(releases_data['releases'][0]['date']).date()
releases = tuple(
TitleRelease(date=parse(r['date']).date(), region=r['region'])
for r in releases_data['releases']
)
try:
rating_count = base_title_data['ratings']['ratingCount']
except KeyError:
rating_count = 0
try:
plot_outline = base_title_data['plot']['outline']['text']
except KeyError:
plot_outline = None
writers = self._get_writers(top_crew_data)
directors = self._get_directors(top_crew_data)
creators = self._get_creators(top_crew_data)
genres = tuple(g.lower() for g in title_aux_data['genres'])
credits = self._get_credits(credits_data)
try:
certification = title_aux_data['certificate']['certificate']
except TypeError:
certification = None
stars = self._get_stars(title_aux_data['principals'])
try:
image_data = title_aux_data['image']
image = Image(
url=image_data['url'],
height=image_data['height'],
width=image_data['width'],
)
except KeyError:
image = None
return dict(
imdb_id=imdb_id,
title=title,
year=year,
rating=rating,
type=type_,
release_date=release_date,
releases=releases,
plot_outline=plot_outline,
rating_count=rating_count,
writers=writers,
directors=directors,
creators=creators,
genres=genres,
credits=credits,
certification=certification,
image=image,
stars=stars,
), title_aux_data
......@@ -3,15 +3,14 @@ from __future__ import absolute_import, unicode_literals
import re
import json
from datetime import date
import tempfile
import logging
import requests
from six import text_type
from six.moves import http_client as httplib
from six.moves.urllib.parse import (
urlencode, urljoin, quote, unquote, urlparse
)
from six.moves.urllib.parse import urlencode, urljoin, quote, unquote
from .constants import BASE_URI, SEARCH_BASE_URI
from .auth import Auth
......@@ -56,6 +55,7 @@ class Imdb(Auth):
def __init__(self, locale=None, exclude_episodes=False, session=None):
self.locale = locale or 'en_US'
self.region = self.locale.split('_')[-1]
self.exclude_episodes = exclude_episodes
self.session = session or requests.Session()
self._cachedir = tempfile.gettempdir()
......@@ -95,18 +95,32 @@ class Imdb(Auth):
def get_title_auxiliary(self, imdb_id):
logger.info('called get_title_auxiliary %s', imdb_id)
url = (
'/template/imdb-ios-writable/title-auxiliary-v31.jstl'
'/render?inlineBannerAdWeblabOn=false&minwidth=320'
f'&osVersion=11.3.0&region=GB&tconst={imdb_id}&today=2018-05-06'
)
self.validate_imdb_id(imdb_id)
self._redirection_title_check(imdb_id)
path = '/template/imdb-ios-writable/title-auxiliary-v31.jstl/render'
try:
resource = self._get_resource(url)
resource = self._get(
url=urljoin(BASE_URI, path),
params={
'inlineBannerAdWeblabOn': 'false',
'minwidth': '320',
'osVersion': '11.3.0',
'region': self.region,
'tconst': imdb_id,
'today': date.today().strftime('%Y-%m-%d'),
}
)
except LookupError:
self._title_not_found()
# TODO: exclude eps check
if (
self.exclude_episodes is True and
resource['titleType'] == 'tvEpisode'
):
raise LookupError(
'Title not found. Title was an episode and '
'"exclude_episodes" is set to true'
)
return resource
def _simple_get_method(self, method, path):
......@@ -282,16 +296,17 @@ class Imdb(Auth):
return self._get(url=url)['resource']
def _get(self, url, query=None, params=None):
path = urlparse(url).path
if params:
path += '?' + urlencode(params)
headers = {'Accept-Language': self.locale}
headers.update(self.get_auth_headers(path))
if params:
full_url = '{0}?{1}'.format(url, urlencode(params))
else:
full_url = url
headers.update(self.get_auth_headers(full_url))
resp = self.session.get(url, headers=headers, params=params)
if not resp.ok:
if resp.status_code == httplib.NOT_FOUND:
raise LookupError('Resource {0} not found'.format(path))
raise LookupError('Resource {0} not found'.format(url))
else:
msg = '{0} {1}'.format(resp.status_code, resp.text)
raise ImdbAPIError(msg)
......
from collections.abc import Sequence
from datetime import date
from dataclasses import dataclass
@dataclass
class Image:
url: str
width: int
height: int
class TitleEpisodes(Sequence):
def __init__(self, facade, imdb_id):
self._facade = facade
episodes = self._facade._client.get_title_episodes(
imdb_id=imdb_id
)
self._episode_imdb_ids = []
for season in episodes['seasons']:
for episode in season['episodes']:
imdb_id = self._facade._parse_id(episode['id'])
self._episode_imdb_ids.append(imdb_id)
self._count = len(self._episode_imdb_ids)
def __len__(self):
return self._count
def __bool__(self):
return self._count > 0
def __getitem__(self, index):
imdb_id = self._episode_imdb_ids[index]
return self._facade.get_title_episode(imdb_id=imdb_id)
@dataclass
class Title:
imdb_id: str
title: str
type: str
certification: str
year: int
genres: tuple
writers: tuple
creators: tuple
credits: tuple
directors: tuple
stars: tuple
image: Image
episodes: TitleEpisodes
rating_count: int = 0
rating: float = None
plot_outline: str = None
release_date: date = None
releases: tuple = ()
def __repr__(self):
return 'Title(imdb_id={0}, title={1})'.format(self.imdb_id, self.title)
@dataclass
class TitleEpisode:
imdb_id: str
title: str
type: str
season: int
episode: int
certification: str
year: int
genres: tuple
writers: tuple
creators: tuple
credits: tuple
directors: tuple
stars: tuple
image: Image
rating_count: int = 0
rating: float = None
plot_outline: str = None
release_date: date = None
releases: tuple = ()
@dataclass
class TitleRelease:
date: date
region: str
@dataclass
class TitleName:
name: str
category: str
imdb_id: str
job: str = None
characters: tuple = ()
@dataclass
class Name:
name: str
imdb_id: str
image: Image
birth_place: str
gender: str
bios: tuple
date_of_birth: date
filmography: tuple
......@@ -3,6 +3,7 @@ from datetime import date
import pytest
from imdbpie import Imdb, ImdbFacade
from imdbpie.objects import TitleEpisode, Title, Name, TitleName
@pytest.fixture(scope='module')
......@@ -17,58 +18,183 @@ def facade(client):
return ImdbFacade(client=client)
def test_init(facade):
assert isinstance(facade, ImdbFacade)
def test_get_title(facade):
title = facade.get_title(imdb_id='tt0096697')
def test_get_title_tv_show(facade):
tv_show_imdb_id = 'tt0096697'
title = facade.get_title(imdb_id=tv_show_imdb_id)
assert isinstance(title, Title)
_check_title(title=title, facade=facade)
assert title.type == 'tvseries'
assert str(title) == 'Title(imdb_id=tt0096697, title=The Simpsons)'
num_checked = 0
for episode in title.episodes:
assert isinstance(episode, TitleEpisode)
assert episode.imdb_id
assert isinstance(episode.season, int)
assert isinstance(episode.episode, int)
_check_title(title=episode, facade=facade)
num_checked += 1
if num_checked > 5:
break
# Sequence operations
assert title.episodes[0].season == 1
assert title.episodes[0].episode == 1
assert title.episodes
assert len(title.episodes)
assert title.episodes[-1].imdb_id
assert title.episodes[10].imdb_id
def test_get_title_movie(facade):
tv_show_imdb_id = 'tt0468569'
title = facade.get_title(imdb_id=tv_show_imdb_id)
assert isinstance(title, Title)
_check_title(title=title, facade=facade)
assert title.type == 'movie'
assert len(title.episodes) == 0
@pytest.mark.parametrize('imdb_id', [
'tt0795176',
'tt7983794',
])
def test_get_title_documentary(facade, imdb_id):
title = facade.get_title(imdb_id=imdb_id)
assert isinstance(title, Title)
_check_title(title=title, facade=facade)
assert title.type in ('tvminiseries', 'movie')
num_checked = 0
for episode in title.episodes:
assert episode
assert episode.imdb_id
assert isinstance(episode.season, int)
assert isinstance(episode.episode, int)
_check_title(title=episode, facade=facade)
num_checked += 1
if num_checked > 5:
break
@pytest.mark.parametrize('imdb_id', [
'nm0000151',
'nm0588033',
'nm0047800',
'nm1799952',
])
def test_get_name(facade, imdb_id):
name = facade.get_name(imdb_id=imdb_id)
assert isinstance(name, Name)
if name.image:
assert isinstance(name.image.url, str)
assert isinstance(name.image.width, int)
assert isinstance(name.image.height, int)
assert name.imdb_id == imdb_id
assert isinstance(name.date_of_birth, date)
assert isinstance(name.bios, tuple)
assert name.gender in ('male', 'female')
assert isinstance(name.birth_place, str)
assert isinstance(name.name, str)
for bio in name.bios:
assert isinstance(bio, str)
for imdb_id in name.filmography:
facade._client.validate_imdb_id(imdb_id)
def test_get_title_episode(facade):
episode_imdb_id = 'tt4847050'
title = facade.get_title_episode(imdb_id=episode_imdb_id)
assert isinstance(title, TitleEpisode)
assert title.imdb_id == episode_imdb_id
assert isinstance(title.season, int)
assert isinstance(title.episode, int)
def test_search_for_name(facade):
pass
def test_search_for_title(facade):
pass
def _check_title(title, facade):
assert isinstance(title.title, str)
assert isinstance(title.type, str)
assert isinstance(title.year, int)
assert isinstance(title.rating_count, int)
assert isinstance(title.rating, float)
assert isinstance(title.release_date, date)
assert isinstance(title.plot_outline, str)
if title.plot_outline:
assert isinstance(title.plot_outline, str)
assert title.releases
for release in title.releases:
assert isinstance(release.date, date)
assert isinstance(release.region, str)
assert title.writers
assert isinstance(title.writers, tuple)
for name in title.writers:
assert isinstance(name, TitleName)
assert isinstance(name.name, str)
if name.job is not None:
assert isinstance(name.job, str)
assert isinstance(name.imdb_id, str)
facade._client.validate_imdb_id(name.imdb_id)
assert title.creators
assert isinstance(title.creators, tuple)
for name in title.creators:
assert isinstance(name, TitleName)
assert isinstance(name.name, str)
assert name.job == 'creator'
assert isinstance(name.imdb_id, str)
facade._client.validate_imdb_id(name.imdb_id)
assert title.directors
assert isinstance(title.directors, tuple)
for name in title.directors:
assert isinstance(name, TitleName)
assert isinstance(name.name, str)
if name.job is not None:
assert isinstance(name.job, str)
assert isinstance(name.imdb_id, str)
facade._client.validate_imdb_id(name.imdb_id)
assert title.credits
assert isinstance(title.credits, tuple)
for name in title.credits:
assert isinstance(name, TitleName)
assert isinstance(name.name, str)
if name.job is not None:
assert isinstance(name.job, str)
assert isinstance(name.imdb_id, str)
facade._client.validate_imdb_id(name.imdb_id)
assert title.genres
assert isinstance(title.stars, tuple)
for name in title.stars:
assert isinstance(name.name, str)
assert not name.job
assert isinstance(name.characters, tuple)
for character_name in name.characters:
assert isinstance(character_name, str)
assert name.category
assert isinstance(name.imdb_id, str)
facade._client.validate_imdb_id(name.imdb_id)
assert isinstance(title.genres, tuple)
for genre in title.genres:
assert isinstance(genre, str)
# assert isinstance(title.certification, str)
assert isinstance(title.certification, str)
assert title.image
assert isinstance(title.image.url, str)
assert isinstance(title.image.width, int)
assert isinstance(title.image.height, int)
......@@ -217,10 +217,30 @@ def test_get_title_releases(client):
def test_get_title_auxiliary(client):
imdb_id = 'tt0111161'
expected_keys = [
'certificate', 'filmingLocations', 'metacriticInfo', 'plot',
'principals', 'rating', 'numberOfVotes', 'canRate', 'topRank',
'userRating', 'alternateTitlesSample', 'alternateTitlesCount',
'hasAlternateVersions', 'originalTitle', 'runningTimes',
'spokenLanguages', 'origins', 'similaritiesCount', 'releaseDetails',
'soundtracks', 'genres', 'reviewsTeaser', 'reviewsCount',
'hasContentGuide', 'hasSynopsis', 'hasCriticsReviews',
'criticsReviewers', 'crazyCreditsTeaser', 'awards', 'photos',
'heroImages', 'seasonsInfo', 'productionStatus', 'directors',
'writers', 'videos', 'adWidgets', 'id', 'image',
'runningTimeInMinutes', 'title', 'titleType', 'year'
]
resource = client.get_title_auxiliary(imdb_id)
assert resource
assert sorted(resource.keys()) == sorted(expected_keys)
def test_get_title_auxiliary_raises_when_exclude_episodes_enabled():
client = Imdb(exclude_episodes=True)
episode_imdb_id = 'tt3181538'
with pytest.raises(LookupError):
client.get_title_auxiliary(episode_imdb_id)
def test_get_title_versions(client):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.