Commit ac8e2b25 authored by richardARPANET's avatar richardARPANET

fix incorrect results

parent e78b13f1
Pipeline #84 canceled with stage
in 14 minutes and 24 seconds
test:
script:
before_script:
- apt-get update
- apt-get install -y make build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev wget curl llvm libncurses5-dev libncursesw5-dev xz-utils tk-dev
- pip install tox
- tox
- git clone https://github.com/pyenv/pyenv.git ~/.pyenv
- export PYENV_ROOT="$HOME/.pyenv"
- export PATH="$PYENV_ROOT/bin:$PATH"
test:python35:
script:
- pyenv install 3.5.4
- eval "$(pyenv init -)"
- pyenv shell 3.5.4
- tox -e py35-normal
test:python36:
script:
- pyenv install 3.6.4
- eval "$(pyenv init -)"
- pyenv shell 3.6.4
- tox -e py36-normal
test:python34:
script:
- pyenv install 3.4.7
- eval "$(pyenv init -)"
- pyenv shell 3.4.7
- tox -e py34-normal
......@@ -5,4 +5,4 @@ boto==2.48.0
python-dateutil==2.6.1
diskcache==2.9.0
setuptools>=39.0.1
dataclasses==0.5
attrs==18.1.0
# -*- coding: utf-8 -*-
from __future__ import absolute_import, unicode_literals
import re
from dateutil.parser import parse
from .imdbpie import Imdb
from .objects import (
Title, TitleEpisode, TitleEpisodes, Name, TitleName, Image, TitleRelease
Title, TitleEpisodes, Name, TitleName, Image, TitleRelease,
TitleSearchResult, NameSearchResult,
)
REGEX_IMDB_ID = re.compile(r'([a-zA-Z]{2}[0-9]{7})')
......@@ -11,26 +16,24 @@ REGEX_IMDB_ID = re.compile(r'([a-zA-Z]{2}[0-9]{7})')
class ImdbFacade(object):
def __init__(self, client):
self._client = client
def __init__(self, client=None):
self._client = client or Imdb()
def get_title(self, imdb_id):
title_data, _ = self._get_title_data(imdb_id=imdb_id)
title_data, title_aux_data = self._get_title_data(imdb_id=imdb_id)
try:
episodes = TitleEpisodes(facade=self, imdb_id=imdb_id)
except LookupError:
episodes = ()
return Title(episodes=episodes, **title_data)
def get_title_episode(self, imdb_id):
title_data, title_aux_data = self._get_title_data(imdb_id=imdb_id)
try:
season = title_aux_data['season']
episode = title_aux_data['episode']
except KeyError:
season = None
episode = None
return TitleEpisode(season=season, episode=episode, **title_data)
return Title(
season=season, episode=episode, episodes=episodes, **title_data
)
def get_name(self, imdb_id):
name_data = self._client.get_name(imdb_id=imdb_id)
......@@ -65,6 +68,29 @@ class ImdbFacade(object):
filmography=filmography,
)
def search_for_name(self, query):
results = []
for result in self._client.search_for_name(query):
result = NameSearchResult(
imdb_id=result['imdb_id'], name=result['name'],
)
results.append(result)
return tuple(results)
def search_for_title(self, query):
results = []
for result in self._client.search_for_title(query):
if result['year']:
year = int(result['year'])
else:
year = None
result = TitleSearchResult(
imdb_id=result['imdb_id'], title=result['title'],
type=result['type'], year=year,
)
results.append(result)
return tuple(results)
def _get_writers(self, top_crew_data):
return tuple(
TitleName(
......
......@@ -148,16 +148,20 @@ class Imdb(Auth):
else:
response.raise_for_status()
def _suggest_search(self, query):
query_encoded = quote(query)
first_alphanum_char = self._query_first_alpha_num(query)
path = '/suggests/{0}/{1}.json'.format(
first_alphanum_char, query_encoded
)
url = urljoin(SEARCH_BASE_URI, path)
search_results = self._get(url=url, query=query_encoded)
return search_results
def search_for_name(self, name):
logger.info('called search_for_name %s', name)
name = re.sub(r'\W+', '_', name).strip('_')
query = quote(name)
first_alphanum_char = self._query_first_alpha_num(name)
url = (
'{0}/suggests/{1}/{2}.json'.format(SEARCH_BASE_URI,
first_alphanum_char, query)
)
search_results = self._get(url=url, query=query)
search_results = self._suggest_search(name)
results = []
for result in search_results.get('d', ()):
if not result['id'].startswith('nm'):
......@@ -173,15 +177,12 @@ class Imdb(Auth):
def search_for_title(self, title):
logger.info('called search_for_title %s', title)
title = re.sub(r'\W+', '_', title).strip('_')
query = quote(title)
first_alphanum_char = self._query_first_alpha_num(title)
url = (
'{0}/suggests/{1}/{2}.json'.format(SEARCH_BASE_URI,
first_alphanum_char, query)
)
search_results = self._get(url=url, query=query)
search_results = self._suggest_search(title)
results = []
for result in search_results.get('d', ()):
if not result['id'].startswith('tt'):
# ignore non-title results
continue
result_item = {
'title': result['l'],
'year': text_type(result['y']) if result.get('y') else None,
......@@ -292,7 +293,7 @@ class Imdb(Auth):
return False
def _get_resource(self, path):
url = '{0}{1}'.format(BASE_URI, path)
url = urljoin(BASE_URI, path)
return self._get(url=url)['resource']
def _get(self, url, query=None, params=None):
......
from collections.abc import Sequence
from datetime import date
from dataclasses import dataclass
import attr
@dataclass
@attr.s
class Image:
url: str
width: int
height: int
url = attr.ib()
width = attr.ib()
height = attr.ib()
class TitleEpisodes(Sequence):
......@@ -33,79 +32,72 @@ class TitleEpisodes(Sequence):
def __getitem__(self, index):
imdb_id = self._episode_imdb_ids[index]
return self._facade.get_title_episode(imdb_id=imdb_id)
return self._facade.get_title(imdb_id=imdb_id)
@dataclass
@attr.s
class Title:
imdb_id: str
title: str
type: str
certification: str
year: int
genres: tuple
writers: tuple
creators: tuple
credits: tuple
directors: tuple
stars: tuple
image: Image
episodes: TitleEpisodes
rating_count: int = 0
rating: float = None
plot_outline: str = None
release_date: date = None
releases: tuple = ()
imdb_id = attr.ib()
title = attr.ib()
type = attr.ib()
certification = attr.ib()
year = attr.ib()
genres = attr.ib()
writers = attr.ib()
creators = attr.ib()
credits = attr.ib()
directors = attr.ib()
stars = attr.ib()
image = attr.ib()
episodes = attr.ib()
rating_count = attr.ib(default=0)
releases = attr.ib(default=())
season = attr.ib(default=None)
episode = attr.ib(default=None)
rating = attr.ib(default=None)
plot_outline = attr.ib(default=None)
release_date = attr.ib(default=None)
def __repr__(self):
return 'Title(imdb_id={0}, title={1})'.format(self.imdb_id, self.title)
@dataclass
class TitleEpisode:
imdb_id: str
title: str
type: str
season: int
episode: int
certification: str
year: int
genres: tuple
writers: tuple
creators: tuple
credits: tuple
directors: tuple
stars: tuple
image: Image
rating_count: int = 0
rating: float = None
plot_outline: str = None
release_date: date = None
releases: tuple = ()
@dataclass
@attr.s
class TitleSearchResult:
imdb_id = attr.ib()
title = attr.ib()
type = attr.ib()
year = attr.ib()
@attr.s
class NameSearchResult:
imdb_id = attr.ib()
name = attr.ib()
@attr.s
class TitleRelease:
date: date
region: str
date = attr.ib()
region = attr.ib()
@dataclass
@attr.s
class TitleName:
name: str
category: str
imdb_id: str
job: str = None
characters: tuple = ()
name = attr.ib()
category = attr.ib()
imdb_id = attr.ib()
job = attr.ib(default=None)
characters = attr.ib(default=())
@dataclass
@attr.s
class Name:
name: str
imdb_id: str
image: Image
birth_place: str
gender: str
bios: tuple
date_of_birth: date
filmography: tuple
name = attr.ib()
imdb_id = attr.ib()
image = attr.ib()
birth_place = attr.ib()
gender = attr.ib()
bios = attr.ib()
date_of_birth = attr.ib()
filmography = attr.ib()
......@@ -2,84 +2,86 @@ from datetime import date
import pytest
from imdbpie import Imdb, ImdbFacade
from imdbpie.objects import TitleEpisode, Title, Name, TitleName
from imdbpie import ImdbFacade
from imdbpie.objects import (
Title, Name, TitleName, TitleSearchResult, NameSearchResult
)
@pytest.fixture(scope='module')
def client():
client = Imdb(locale='en_US')
yield client
client.clear_cached_credentials()
@pytest.fixture(scope='module')
def facade(client):
return ImdbFacade(client=client)
def test_get_title_tv_show(facade):
tv_show_imdb_id = 'tt0096697'
title = facade.get_title(imdb_id=tv_show_imdb_id)
assert isinstance(title, Title)
_check_title(title=title, facade=facade)
assert title.type == 'tvseries'
assert str(title) == 'Title(imdb_id=tt0096697, title=The Simpsons)'
num_checked = 0
for episode in title.episodes:
assert isinstance(episode, TitleEpisode)
assert episode.imdb_id
assert isinstance(episode.season, int)
assert isinstance(episode.episode, int)
_check_title(title=episode, facade=facade)
num_checked += 1
if num_checked > 5:
break
# Sequence operations
assert title.episodes[0].season == 1
assert title.episodes[0].episode == 1
assert title.episodes
assert len(title.episodes)
assert title.episodes[-1].imdb_id
assert title.episodes[10].imdb_id
def test_get_title_movie(facade):
tv_show_imdb_id = 'tt0468569'
title = facade.get_title(imdb_id=tv_show_imdb_id)
assert isinstance(title, Title)
_check_title(title=title, facade=facade)
assert title.type == 'movie'
assert len(title.episodes) == 0
@pytest.mark.parametrize('imdb_id', [
'tt0795176',
'tt7983794',
])
def test_get_title_documentary(facade, imdb_id):
title = facade.get_title(imdb_id=imdb_id)
assert isinstance(title, Title)
_check_title(title=title, facade=facade)
assert title.type in ('tvminiseries', 'movie')
num_checked = 0
for episode in title.episodes:
assert episode
assert episode.imdb_id
assert isinstance(episode.season, int)
assert isinstance(episode.episode, int)
_check_title(title=episode, facade=facade)
num_checked += 1
if num_checked > 5:
break
def facade():
return ImdbFacade()
class TestGetTitle(object):
def test_tv_show(self, facade):
tv_show_imdb_id = 'tt0096697'
title = facade.get_title(imdb_id=tv_show_imdb_id)
assert isinstance(title, Title)
_check_title(title=title, facade=facade)
assert title.type == 'tvseries'
num_checked = 0
for episode in title.episodes:
assert isinstance(episode, Title)
assert episode.imdb_id
assert isinstance(episode.season, int)
assert isinstance(episode.episode, int)
_check_title(title=episode, facade=facade)
num_checked += 1
if num_checked > 5:
break
# Sequence operations
assert title.episodes[0].season == 1
assert title.episodes[0].episode == 1
assert title.episodes
assert len(title.episodes)
assert title.episodes[-1].imdb_id
assert title.episodes[10].imdb_id
def test_movie(self, facade):
tv_show_imdb_id = 'tt0468569'
title = facade.get_title(imdb_id=tv_show_imdb_id)
assert isinstance(title, Title)
_check_title(title=title, facade=facade)
assert title.type == 'movie'
assert len(title.episodes) == 0
@pytest.mark.parametrize('imdb_id', [
'tt0795176',
'tt7983794',
])
def test_get_title_documentary(self, facade, imdb_id):
title = facade.get_title(imdb_id=imdb_id)
assert isinstance(title, Title)
_check_title(title=title, facade=facade)
assert title.type in ('tvminiseries', 'movie')
num_checked = 0
for episode in title.episodes:
assert episode
assert episode.imdb_id
assert isinstance(episode.season, int)
assert isinstance(episode.episode, int)
_check_title(title=episode, facade=facade)
num_checked += 1
if num_checked > 5:
break
def test_tv_episode(self, facade):
episode_imdb_id = 'tt4847050'
title = facade.get_title(imdb_id=episode_imdb_id)
assert isinstance(title, Title)
assert title.imdb_id == episode_imdb_id
assert len(title.episodes) == 0
assert isinstance(title.season, int)
assert isinstance(title.episode, int)
@pytest.mark.parametrize('imdb_id', [
......@@ -109,22 +111,26 @@ def test_get_name(facade, imdb_id):
facade._client.validate_imdb_id(imdb_id)
def test_get_title_episode(facade):
episode_imdb_id = 'tt4847050'
title = facade.get_title_episode(imdb_id=episode_imdb_id)
assert isinstance(title, TitleEpisode)
assert title.imdb_id == episode_imdb_id
assert isinstance(title.season, int)
assert isinstance(title.episode, int)
def test_search_for_name(facade):
pass
results = facade.search_for_name('Tom Hanks')
assert results
for result in results:
assert isinstance(result, NameSearchResult)
assert result.imdb_id.startswith('nm')
assert isinstance(result.name, str)
def test_search_for_title(facade):
pass
results = facade.search_for_title('The Dark Knight')
assert results
for result in results:
assert isinstance(result, TitleSearchResult)
assert result.imdb_id.startswith('tt')
assert isinstance(result.title, str)
if result.year:
assert isinstance(result.year, int)
def _check_title(title, facade):
......
......@@ -89,6 +89,11 @@ def test_search_for_title_searching_title(client):
assert expected_top_results == results[:2]
def test_search_for_title_returns_no_results_if_name_query(client):
results = client.search_for_title('Grigoriy Dobrygin')
assert len(results) == 0
@pytest.mark.parametrize('query', [
'Mission: Impossible',
'Honey, I Shrunk the Kids',
......@@ -118,6 +123,11 @@ def test_search_for_name(client):
sorted(results, key=itemgetter('imdb_id')))
def test_search_for_name_returns_no_results_if_title_query(client):
results = client.search_for_name('Mission Impossible')
assert len(results) == 0
def test_search_for_title_no_results(client):
results = client.search_for_title('898582da396c93d5589e0')
assert [] == results
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment