upload file
This commit is contained in:
parent
41260cf420
commit
10df6f8625
|
@ -0,0 +1,51 @@
|
|||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from time import time
|
||||
|
||||
# This program is going to compare the time it takes
|
||||
# to fetch a single page to the time it takes to
|
||||
# follow all links on a page using a Web crawler.
|
||||
# It will demonstrate how an I/O Bottleneck can build
|
||||
# up while waiting for responses from Web servers
|
||||
|
||||
|
||||
def fetch_webpage(url):
|
||||
return requests.get(url, data=None)
|
||||
|
||||
|
||||
def single_fetch_time(url):
|
||||
t0 = time()
|
||||
fetch_webpage(url)
|
||||
t1 = time()
|
||||
return f'{t1-t0:.2f}'
|
||||
|
||||
|
||||
def crawl_time(url):
|
||||
req = fetch_webpage(url)
|
||||
soup = BeautifulSoup(req.text, 'html.parser')
|
||||
t0 = time()
|
||||
print(f'\nLinks found crawlling {url}:')
|
||||
for link in soup.find_all('a'):
|
||||
print(link.get('href'))
|
||||
t1 = time()
|
||||
return f'{t1-t0:.2f}'
|
||||
|
||||
|
||||
def display_time(url, action, time_str):
|
||||
output = f'Time to {action} {url}: {time_str}s'
|
||||
n = len(output)
|
||||
print('\n' + '=' * n)
|
||||
print(output)
|
||||
print('=' * n)
|
||||
|
||||
|
||||
def main():
|
||||
url = 'https://www.google.com'
|
||||
sf_time = single_fetch_time(url)
|
||||
display_time(url, 'fetch', sf_time)
|
||||
c_time = crawl_time(url)
|
||||
display_time(url, 'crawl', c_time)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -0,0 +1,54 @@
|
|||
from threading import Thread
|
||||
from random import randint
|
||||
import time
|
||||
|
||||
# Having multiple Threads running that update a shared resource
|
||||
# without proper Thread synchronisation is risky.
|
||||
#
|
||||
# In this example, 2 worker Threads both try to complete a job
|
||||
# updating the counter variable. One is trying to increment it
|
||||
# to > 1000, while the other is trying to decrement it to < -1000
|
||||
#
|
||||
# The progam is non-deterministic and unreliable because the
|
||||
# OS Task Scheduler is constantly switching between Threads,
|
||||
# with neither taking precedence. This behaviour can lead to
|
||||
# switching infinitely back-and-forth between the Threads, plus
|
||||
# we have no possible way of predicting the output of the program
|
||||
|
||||
counter = 1
|
||||
|
||||
|
||||
def worker_a():
|
||||
global counter
|
||||
while counter < 1000:
|
||||
counter += 1
|
||||
print(f'Worker A incremented counter by 1, counter = {counter}')
|
||||
time.sleep(randint(0, 1))
|
||||
|
||||
|
||||
def worker_b():
|
||||
global counter
|
||||
while counter > -1000:
|
||||
counter -= 1
|
||||
print(f'Worker B decremented counter by 1, counter = {counter}')
|
||||
time.sleep(randint(0, 1))
|
||||
|
||||
|
||||
def start_threads(t1, t2):
|
||||
t1.start()
|
||||
t2.start()
|
||||
t1.join()
|
||||
t2.join()
|
||||
|
||||
|
||||
def main():
|
||||
t0 = time.time()
|
||||
thread1 = Thread(target=worker_a, args=())
|
||||
thread2 = Thread(target=worker_b, args=())
|
||||
start_threads(thread1, thread2)
|
||||
t1 = time.time()
|
||||
print(f'Execution time: {t1-t0:.2f}')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
Reference in New Issue