upload file
This commit is contained in:
parent
41260cf420
commit
10df6f8625
|
@ -0,0 +1,51 @@
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from time import time
|
||||||
|
|
||||||
|
# This program is going to compare the time it takes
|
||||||
|
# to fetch a single page to the time it takes to
|
||||||
|
# follow all links on a page using a Web crawler.
|
||||||
|
# It will demonstrate how an I/O Bottleneck can build
|
||||||
|
# up while waiting for responses from Web servers
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_webpage(url):
|
||||||
|
return requests.get(url, data=None)
|
||||||
|
|
||||||
|
|
||||||
|
def single_fetch_time(url):
|
||||||
|
t0 = time()
|
||||||
|
fetch_webpage(url)
|
||||||
|
t1 = time()
|
||||||
|
return f'{t1-t0:.2f}'
|
||||||
|
|
||||||
|
|
||||||
|
def crawl_time(url):
|
||||||
|
req = fetch_webpage(url)
|
||||||
|
soup = BeautifulSoup(req.text, 'html.parser')
|
||||||
|
t0 = time()
|
||||||
|
print(f'\nLinks found crawlling {url}:')
|
||||||
|
for link in soup.find_all('a'):
|
||||||
|
print(link.get('href'))
|
||||||
|
t1 = time()
|
||||||
|
return f'{t1-t0:.2f}'
|
||||||
|
|
||||||
|
|
||||||
|
def display_time(url, action, time_str):
|
||||||
|
output = f'Time to {action} {url}: {time_str}s'
|
||||||
|
n = len(output)
|
||||||
|
print('\n' + '=' * n)
|
||||||
|
print(output)
|
||||||
|
print('=' * n)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
url = 'https://www.google.com'
|
||||||
|
sf_time = single_fetch_time(url)
|
||||||
|
display_time(url, 'fetch', sf_time)
|
||||||
|
c_time = crawl_time(url)
|
||||||
|
display_time(url, 'crawl', c_time)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
|
@ -0,0 +1,54 @@
|
||||||
|
from threading import Thread
|
||||||
|
from random import randint
|
||||||
|
import time
|
||||||
|
|
||||||
|
# Having multiple Threads running that update a shared resource
|
||||||
|
# without proper Thread synchronisation is risky.
|
||||||
|
#
|
||||||
|
# In this example, 2 worker Threads both try to complete a job
|
||||||
|
# updating the counter variable. One is trying to increment it
|
||||||
|
# to > 1000, while the other is trying to decrement it to < -1000
|
||||||
|
#
|
||||||
|
# The progam is non-deterministic and unreliable because the
|
||||||
|
# OS Task Scheduler is constantly switching between Threads,
|
||||||
|
# with neither taking precedence. This behaviour can lead to
|
||||||
|
# switching infinitely back-and-forth between the Threads, plus
|
||||||
|
# we have no possible way of predicting the output of the program
|
||||||
|
|
||||||
|
counter = 1
|
||||||
|
|
||||||
|
|
||||||
|
def worker_a():
|
||||||
|
global counter
|
||||||
|
while counter < 1000:
|
||||||
|
counter += 1
|
||||||
|
print(f'Worker A incremented counter by 1, counter = {counter}')
|
||||||
|
time.sleep(randint(0, 1))
|
||||||
|
|
||||||
|
|
||||||
|
def worker_b():
|
||||||
|
global counter
|
||||||
|
while counter > -1000:
|
||||||
|
counter -= 1
|
||||||
|
print(f'Worker B decremented counter by 1, counter = {counter}')
|
||||||
|
time.sleep(randint(0, 1))
|
||||||
|
|
||||||
|
|
||||||
|
def start_threads(t1, t2):
|
||||||
|
t1.start()
|
||||||
|
t2.start()
|
||||||
|
t1.join()
|
||||||
|
t2.join()
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
t0 = time.time()
|
||||||
|
thread1 = Thread(target=worker_a, args=())
|
||||||
|
thread2 = Thread(target=worker_b, args=())
|
||||||
|
start_threads(thread1, thread2)
|
||||||
|
t1 = time.time()
|
||||||
|
print(f'Execution time: {t1-t0:.2f}')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
Loading…
Reference in New Issue