-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path20_project.py
More file actions
108 lines (94 loc) · 3.73 KB
/
20_project.py
File metadata and controls
108 lines (94 loc) · 3.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import re
import requests
from bs4 import BeautifulSoup
def create_soup(url):
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
}
res = requests.get(url, headers=headers)
res.raise_for_status()
soup = BeautifulSoup(res.text, "lxml")
return soup
def print_news(index, title, link):
print("{}. {}".format(index + 1, title))
print(" (링크 : {})".format(link))
def scrape_weather():
print("오늘의 날씨")
url = "https://search.naver.com/search.naver?where=nexearch&sm=tab_jum&query=%EC%84%9C%EC%9A%B8+%EB%82%A0%EC%94%A8"
soup = create_soup(url)
# 날씨
cast = soup.find("p", attrs={"class": "cast_txt"}).get_text()
# 기온
curr_temp = (
soup.find("p", attrs={"class": "info_temperature"}).get_text().replace("도씨", "")
)
# curr_tempp = soup.find("span", attrs={"class":"todaytemp"}).get_text()
min_temp = soup.find("span", attrs={"class": "min"}).get_text()
max_temp = soup.find("span", attrs={"class": "max"}).get_text()
# 강수 확률
morning_rain_rate = (
soup.find("span", attrs={"class": "point_time morning"}).get_text().strip()
)
afternoon_rain_rate = (
soup.find("span", attrs={"class": "point_time afternoon"}).get_text().strip()
)
# 미세먼지, 초미세먼지
dust = soup.find("dl", attrs={"class": "indicator"})
pm10 = dust.find_all("dd")[0].get_text()
pm25 = dust.find_all("dd")[1].get_text()
# 출력
print(cast)
print("현재 {} (최저 {} / 최고 {})".format(curr_temp, min_temp, max_temp))
print("오전 {} / 오후 {}".format(morning_rain_rate, afternoon_rain_rate))
print()
print("미세먼지 {}".format(pm10))
print("초미세먼지 {}".format(pm25))
print()
def scrape_headline_news():
print("[헤드라인 뉴스]")
url = "https://news.naver.com"
soup = create_soup(url)
news_list = soup.find("ul", attrs={"class": "hdline_article_list"}).find_all(
"li", limit=3
)
for index, news in enumerate(news_list):
title = news.find("a").get_text().strip()
link = url + news.find("a")["href"]
print_news(index, title, link)
print()
def scrape_it_news():
print("[IT 뉴스]")
url = "https://news.naver.com/main/list.nhn?mode=LS2D&mid=shm&sid1=105&sid2=230"
soup = create_soup(url)
news_list = soup.find("ul", attrs={"class": "type06_headline"}).find_all(
"li", limit=3
) # 3개까지만 가져오기
for index, news in enumerate(news_list):
a_idx = 0
img = news.find("img")
if img:
a_idx = 1 # img 태그가 있으면 1번째 a 태그의 정보를 사용
a_tag = news.find_all("a")[a_idx]
title = a_tag.get_text().strip()
link = a_tag["href"]
print_news(index, title, link)
print()
def scrape_english():
print("[오늘의 영어회화]")
url = "https://www.hackers.co.kr/?c=s_eng/eng_contents/I_others_english&keywd=haceng_submain_lnb_eng_I_others_english&logger_kw=haceng_submain_lnb_eng_I_others_english"
soup = create_soup(url)
sentences = soup.find_all("div", attrs={"id": re.compile("^conv_kor_t")})
print("(영어 지문)")
for sentence in sentences[
len(sentences) // 2 :
]: # 8문장이 있다고 가정할 때, index 기준 4-7까지 잘라서 가져옴
print(sentence.get_text().strip())
print()
print("(한글 지문)")
for sentence in sentences[: len(sentences) // 2]:
print(sentence.get_text().strip())
if __name__ == "__main__":
scrape_weather() # 오늘의 날씨 정보 가져오기
scrape_headline_news()
scrape_it_news()
scrape_english()