import requests from bs4 import BeautifulSoup from time import time # This program is going to compare the time it takes # to fetch a single page to the time it takes to # follow all links on a page using a Web crawler. # It will demonstrate how an I/O Bottleneck can build # up while waiting for responses from Web servers def fetch_webpage(url): return requests.get(url, data=None) def single_fetch_time(url): t0 = time() fetch_webpage(url) t1 = time() return f'{t1-t0:.2f}' def crawl_time(url): req = fetch_webpage(url) soup = BeautifulSoup(req.text, 'html.parser') t0 = time() print(f'\nLinks found crawlling {url}:') for link in soup.find_all('a'): print(link.get('href')) t1 = time() return f'{t1-t0:.2f}' def display_time(url, action, time_str): output = f'Time to {action} {url}: {time_str}s' n = len(output) print('\n' + '=' * n) print(output) print('=' * n) def main(): url = 'https://www.google.com' sf_time = single_fetch_time(url) display_time(url, 'fetch', sf_time) c_time = crawl_time(url) display_time(url, 'crawl', c_time) if __name__ == '__main__': main()