-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscrape3.py
More file actions
36 lines (30 loc) · 818 Bytes
/
scrape3.py
File metadata and controls
36 lines (30 loc) · 818 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# scraping example modified from "Web Scraping with Python"
# written for Python3
import request
from urllib2 import HTTPError
from urllib2 import URLError
from bs4 import BeautifulSoup
def getTitle(url):
try:
html = urlopen(url)
except HTTPError as e:
return None
try:
bsObj = BeautifulSoup(html.read(), 'html.parser')
title = bsObj.body.h1
except AttributeError as e:
return None
return title
title = getTitle("http://www.pythonscraping.com/pages/page1.html")
# First account for http and url errors
try:
html = urlopen("http://www.pythonscraping.com/pages/page1.html")
except HTTPError as e:
print("HTTP error detected.")
except URLError as e:
print("Server not found.")
else:
print("All conditions passed")
print('//////')
bsObj = BeautifulSoup(html.read(), 'html.parser')
print(bsObj.h1)