52 lines
1.2 KiB
Python
52 lines
1.2 KiB
Python
|
import requests
|
||
|
from bs4 import BeautifulSoup
|
||
|
from time import time
|
||
|
|
||
|
# This program is going to compare the time it takes
|
||
|
# to fetch a single page to the time it takes to
|
||
|
# follow all links on a page using a Web crawler.
|
||
|
# It will demonstrate how an I/O Bottleneck can build
|
||
|
# up while waiting for responses from Web servers
|
||
|
|
||
|
|
||
|
def fetch_webpage(url):
|
||
|
return requests.get(url, data=None)
|
||
|
|
||
|
|
||
|
def single_fetch_time(url):
|
||
|
t0 = time()
|
||
|
fetch_webpage(url)
|
||
|
t1 = time()
|
||
|
return f'{t1-t0:.2f}'
|
||
|
|
||
|
|
||
|
def crawl_time(url):
|
||
|
req = fetch_webpage(url)
|
||
|
soup = BeautifulSoup(req.text, 'html.parser')
|
||
|
t0 = time()
|
||
|
print(f'\nLinks found crawlling {url}:')
|
||
|
for link in soup.find_all('a'):
|
||
|
print(link.get('href'))
|
||
|
t1 = time()
|
||
|
return f'{t1-t0:.2f}'
|
||
|
|
||
|
|
||
|
def display_time(url, action, time_str):
|
||
|
output = f'Time to {action} {url}: {time_str}s'
|
||
|
n = len(output)
|
||
|
print('\n' + '=' * n)
|
||
|
print(output)
|
||
|
print('=' * n)
|
||
|
|
||
|
|
||
|
def main():
|
||
|
url = 'https://www.google.com'
|
||
|
sf_time = single_fetch_time(url)
|
||
|
display_time(url, 'fetch', sf_time)
|
||
|
c_time = crawl_time(url)
|
||
|
display_time(url, 'crawl', c_time)
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
main()
|