[데이터분석]SpartaCodingClub
Python 첫째주_파이썬 기초
dowon
2023. 1. 4. 19:38
-라이브러리 설치하기
!pip install bs4 requests
-뉴스 크롤링하기
import requests
from bs4 import BeautifulSoup
headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}
data = requests.get('https://search.naver.com/search.naver?where=news&ie=utf8&sm=nws_hty&query=삼성전자',headers=headers)
soup = BeautifulSoup(data.text, 'html.parser')
a = soup.select_one('#sp_nws1 > div.news_wrap.api_ani_send > div > a')
a['href']
a.text
-키워드를 바꿔가며 뉴스 보기
def get_news(keyword):
headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}
data = requests.get(f'https://search.naver.com/search.naver?where=news&ie=utf8&sm=nws_hty&query={keyword}',headers=headers)
soup = BeautifulSoup(data.text, 'html.parser')
lis = soup.select('#main_pack > section > div > div.group_news > ul > li')
for li in lis:
a = li.select_one('a.news_tit')
print(a.text, a['href'])
::키워드 입력해보기
get_news('삼성전자')
get_news('현대자동차')
-엑셀 다루기
::엑셀 읽기 : 첫 줄(제목) 제외하고 전체 데이터 읽어보기
import openpyxl
wb = openpyxl.load_workbook('샘플파일.xlsx')
sheet = wb['Sheet']
new_rows = list(sheet.rows)[1:]
for row in new_rows:
print(row[0].value, row[1].value, row[2].value)
::스크래핑 결과를 news라는 폴더를 만들고 엑셀파일로 저장
def make_news_excel(keyword):
wb = Workbook()
sheet = wb.active
headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}
data = requests.get(f'https://search.naver.com/search.naver?where=news&ie=utf8&sm=nws_hty&query={keyword}',headers=headers)
soup = BeautifulSoup(data.text, 'html.parser')
lis = soup.select('#main_pack > section > div > div.group_news > ul > li')
for li in lis:
a = li.select_one('a.news_tit')
row = [a.text, a['href']]
sheet.append(row)
today = datetime.today().strftime("%Y-%m-%d")
wb.save(f"{today}_{keyword}.xlsx")
wb.close()
::for사용하여 회사별 파일 만들기
keywords = ['삼성전자','LG전자','현대자동차','SK']
for keyword in keywords:
make_news_excel(keyword)
::이미지 다운하기
import openpyxl
import urllib.request
wb = openpyxl.load_workbook('관리종목.xlsx')
sheet = wb['종목']
new_rows = list(sheet.rows)[1:]
for row in new_rows:
url = f'https://ssl.pstatic.net/imgfinance/chart/item/area/day/{row[1].value}.png'
urllib.request.urlretrieve(url, f"{row[0].value}.png")
아직 첫째주라 그런지 뭐가 뭔지는 잘 모르겠다
그래도 열심히 해야지