diff --git a/io_bottlenecks/main.py b/io_bottlenecks/main.py new file mode 100644 index 0000000..c478777 --- /dev/null +++ b/io_bottlenecks/main.py @@ -0,0 +1,51 @@ +import requests +from bs4 import BeautifulSoup +from time import time + +# This program is going to compare the time it takes +# to fetch a single page to the time it takes to +# follow all links on a page using a Web crawler. +# It will demonstrate how an I/O Bottleneck can build +# up while waiting for responses from Web servers + + +def fetch_webpage(url): + return requests.get(url, data=None) + + +def single_fetch_time(url): + t0 = time() + fetch_webpage(url) + t1 = time() + return f'{t1-t0:.2f}' + + +def crawl_time(url): + req = fetch_webpage(url) + soup = BeautifulSoup(req.text, 'html.parser') + t0 = time() + print(f'\nLinks found crawlling {url}:') + for link in soup.find_all('a'): + print(link.get('href')) + t1 = time() + return f'{t1-t0:.2f}' + + +def display_time(url, action, time_str): + output = f'Time to {action} {url}: {time_str}s' + n = len(output) + print('\n' + '=' * n) + print(output) + print('=' * n) + + +def main(): + url = 'https://www.google.com' + sf_time = single_fetch_time(url) + display_time(url, 'fetch', sf_time) + c_time = crawl_time(url) + display_time(url, 'crawl', c_time) + + +if __name__ == '__main__': + main() diff --git a/non_deterministic_threading/main.py b/non_deterministic_threading/main.py new file mode 100644 index 0000000..825eafb --- /dev/null +++ b/non_deterministic_threading/main.py @@ -0,0 +1,54 @@ +from threading import Thread +from random import randint +import time + +# Having multiple Threads running that update a shared resource +# without proper Thread synchronisation is risky. +# +# In this example, 2 worker Threads both try to complete a job +# updating the counter variable. One is trying to increment it +# to > 1000, while the other is trying to decrement it to < -1000 +# +# The progam is non-deterministic and unreliable because the +# OS Task Scheduler is constantly switching between Threads, +# with neither taking precedence. This behaviour can lead to +# switching infinitely back-and-forth between the Threads, plus +# we have no possible way of predicting the output of the program + +counter = 1 + + +def worker_a(): + global counter + while counter < 1000: + counter += 1 + print(f'Worker A incremented counter by 1, counter = {counter}') + time.sleep(randint(0, 1)) + + +def worker_b(): + global counter + while counter > -1000: + counter -= 1 + print(f'Worker B decremented counter by 1, counter = {counter}') + time.sleep(randint(0, 1)) + + +def start_threads(t1, t2): + t1.start() + t2.start() + t1.join() + t2.join() + + +def main(): + t0 = time.time() + thread1 = Thread(target=worker_a, args=()) + thread2 = Thread(target=worker_b, args=()) + start_threads(thread1, thread2) + t1 = time.time() + print(f'Execution time: {t1-t0:.2f}') + + +if __name__ == '__main__': + main()