concurrency_in_python/io_bottlenecks/main.py

52 lines
1.2 KiB
Python

import requests
from bs4 import BeautifulSoup
from time import time
# This program is going to compare the time it takes
# to fetch a single page to the time it takes to
# follow all links on a page using a Web crawler.
# It will demonstrate how an I/O Bottleneck can build
# up while waiting for responses from Web servers
def fetch_webpage(url):
return requests.get(url, data=None)
def single_fetch_time(url):
t0 = time()
fetch_webpage(url)
t1 = time()
return f'{t1-t0:.2f}'
def crawl_time(url):
req = fetch_webpage(url)
soup = BeautifulSoup(req.text, 'html.parser')
t0 = time()
print(f'\nLinks found crawlling {url}:')
for link in soup.find_all('a'):
print(link.get('href'))
t1 = time()
return f'{t1-t0:.2f}'
def display_time(url, action, time_str):
output = f'Time to {action} {url}: {time_str}s'
n = len(output)
print('\n' + '=' * n)
print(output)
print('=' * n)
def main():
url = 'https://www.google.com'
sf_time = single_fetch_time(url)
display_time(url, 'fetch', sf_time)
c_time = crawl_time(url)
display_time(url, 'crawl', c_time)
if __name__ == '__main__':
main()