Screen Link:
My Code:
# import libraries
import pandas as pd
import re
import requests
from bs4 import BeautifulSoup
from time import sleep
from random import randint
from time import time
from warnings import warn
# Redeclaring the lists to store data in
names = []
years = []
imdb_ratings = []
metascores = []
votes = []
pages = [str(i) for i in range(1,3)]
years_url = [str(i) for i in range(2010,2018)]
# Preparing the monitoring of the loop
start_time = time()
requests = 0
# For every year in the interval 2000-2017
for year_url in years_url:
# For every page in the interval 1-4
for page in pages:
# Make a get request
response = get('http://www.imdb.com/search/title?release_date=' + year_url + '&sort=num_votes,desc&page=' + page, headers = headers)
# Pause the loop
sleep(randint(8,15))
# Monitor the requests
requests += 1
elapsed_time = time() - start_time
print('Request:{}; Frequency: {} requests/s'.format(requests, requests/elapsed_time))
clear_output(wait = True)
# Throw a warning for non-200 status codes
if response.status_code != 200:
warn('Request: {}; Status code: {}'.format(requests, response.status_code))
# Break the loop if the number of requests is greater than expected
if requests > 10:
warn('Number of requests was greater than expected.')
break
# Parse the content of the request with BeautifulSoup
page_html = BeautifulSoup(response.text, 'html.parser')
# Select all the 50 movie containers from a single page
mv_containers = page_html.find_all('div', class_ = 'lister-item mode-advanced')
# For every movie of these 50
for container in mv_containers:
# If the movie has a Metascore, then:
if container.find('div', class_ = 'ratings-metascore') is not None:
# Scrape the name
name = container.h3.a.text
names.append(name)
# Scrape the year
year = container.h3.find('span', class_ = 'lister-item-year').text
years.append(year)
# Scrape the IMDB rating
imdb = float(container.strong.text)
imdb_ratings.append(imdb)
# Scrape the Metascore
m_score = container.find('span', class_ = 'metascore').text
metascores.append(int(m_score))
# Scrape the number of votes
vote = container.find('span', attrs = {'name':'nv'})['data-value']
votes.append(int(vote))
What I expected to happen:
as per the tutorial:
Request:72; Frequency: 0.07928964663062842 requests/s
What actually happened:
NameError Traceback (most recent call last)
<ipython-input-96-383b3ebfb335> in <module>()
46
47 # Make a get request
---> 48 response = get('http://www.imdb.com/search/title?release_date=' + year_url +
49 '&sort=num_votes,desc&page=' + page, headers = headers)
50 # Pause the loop
NameError: name 'get' is not defined
i tried changing
response = get() to response = requests.get() plus tinker with the code but nothing seems to be working.
everything before the this part, worked as expected.
I am learning little by little.
Thanks for your help.
Regards