pywork/scrape3.py at master · hattwick/pywork · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# scraping example modified from "Web Scraping with Python"
# written for Python3

import request
from urllib2 import HTTPError
from urllib2 import URLError
from bs4 import BeautifulSoup


def getTitle(url):
	try:
		html = urlopen(url)
	except HTTPError as e:
		return None
	try:
		bsObj = BeautifulSoup(html.read(), 'html.parser')
		title = bsObj.body.h1
	except AttributeError as e:
		return None
	return title

title = getTitle("http://www.pythonscraping.com/pages/page1.html")

# First account for http and url errors
try:
	html = urlopen("http://www.pythonscraping.com/pages/page1.html")
except HTTPError as e:
	print("HTTP error detected.")
except URLError as e:
	print("Server not found.")
else:
	print("All conditions passed")

print('//////')
bsObj = BeautifulSoup(html.read(), 'html.parser')
print(bsObj.h1)