upload file

This commit is contained in:
Andrew Macmillan 2021-01-30 13:41:44 +00:00
parent 41260cf420
commit 10df6f8625
2 changed files with 105 additions and 0 deletions

51
io_bottlenecks/main.py Normal file
View File

@ -0,0 +1,51 @@
import requests
from bs4 import BeautifulSoup
from time import time
# This program is going to compare the time it takes
# to fetch a single page to the time it takes to
# follow all links on a page using a Web crawler.
# It will demonstrate how an I/O Bottleneck can build
# up while waiting for responses from Web servers
def fetch_webpage(url):
return requests.get(url, data=None)
def single_fetch_time(url):
t0 = time()
fetch_webpage(url)
t1 = time()
return f'{t1-t0:.2f}'
def crawl_time(url):
req = fetch_webpage(url)
soup = BeautifulSoup(req.text, 'html.parser')
t0 = time()
print(f'\nLinks found crawlling {url}:')
for link in soup.find_all('a'):
print(link.get('href'))
t1 = time()
return f'{t1-t0:.2f}'
def display_time(url, action, time_str):
output = f'Time to {action} {url}: {time_str}s'
n = len(output)
print('\n' + '=' * n)
print(output)
print('=' * n)
def main():
url = 'https://www.google.com'
sf_time = single_fetch_time(url)
display_time(url, 'fetch', sf_time)
c_time = crawl_time(url)
display_time(url, 'crawl', c_time)
if __name__ == '__main__':
main()

View File

@ -0,0 +1,54 @@
from threading import Thread
from random import randint
import time
# Having multiple Threads running that update a shared resource
# without proper Thread synchronisation is risky.
#
# In this example, 2 worker Threads both try to complete a job
# updating the counter variable. One is trying to increment it
# to > 1000, while the other is trying to decrement it to < -1000
#
# The progam is non-deterministic and unreliable because the
# OS Task Scheduler is constantly switching between Threads,
# with neither taking precedence. This behaviour can lead to
# switching infinitely back-and-forth between the Threads, plus
# we have no possible way of predicting the output of the program
counter = 1
def worker_a():
global counter
while counter < 1000:
counter += 1
print(f'Worker A incremented counter by 1, counter = {counter}')
time.sleep(randint(0, 1))
def worker_b():
global counter
while counter > -1000:
counter -= 1
print(f'Worker B decremented counter by 1, counter = {counter}')
time.sleep(randint(0, 1))
def start_threads(t1, t2):
t1.start()
t2.start()
t1.join()
t2.join()
def main():
t0 = time.time()
thread1 = Thread(target=worker_a, args=())
thread2 = Thread(target=worker_b, args=())
start_threads(thread1, thread2)
t1 = time.time()
print(f'Execution time: {t1-t0:.2f}')
if __name__ == '__main__':
main()