[데이터분석]SpartaCodingClub

Python 첫째주_파이썬 기초

dowon 2023. 1. 4. 19:38

-라이브러리 설치하기

!pip install bs4 requests

-뉴스 크롤링하기

import requests
from bs4 import BeautifulSoup

headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}
data = requests.get('https://search.naver.com/search.naver?where=news&ie=utf8&sm=nws_hty&query=삼성전자',headers=headers)

soup = BeautifulSoup(data.text, 'html.parser')
a = soup.select_one('#sp_nws1 > div.news_wrap.api_ani_send > div > a')
a['href']
a.text

-키워드를 바꿔가며 뉴스 보기

def get_news(keyword):
  headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}
  data = requests.get(f'https://search.naver.com/search.naver?where=news&ie=utf8&sm=nws_hty&query={keyword}',headers=headers)

  soup = BeautifulSoup(data.text, 'html.parser')
  lis = soup.select('#main_pack > section > div > div.group_news > ul > li')

  for li in lis:
    a = li.select_one('a.news_tit')
    print(a.text, a['href'])

::키워드 입력해보기

get_news('삼성전자')
get_news('현대자동차')

-엑셀 다루기

::엑셀 읽기 : 첫 줄(제목) 제외하고 전체 데이터 읽어보기

import openpyxl
wb = openpyxl.load_workbook('샘플파일.xlsx')
sheet = wb['Sheet']

new_rows = list(sheet.rows)[1:]

for row in new_rows:
  print(row[0].value, row[1].value, row[2].value)

::스크래핑 결과를 news라는 폴더를 만들고 엑셀파일로 저장

def make_news_excel(keyword):
  wb = Workbook()
  sheet = wb.active

  headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}
  data = requests.get(f'https://search.naver.com/search.naver?where=news&ie=utf8&sm=nws_hty&query={keyword}',headers=headers)

  soup = BeautifulSoup(data.text, 'html.parser')
  lis = soup.select('#main_pack > section > div > div.group_news > ul > li')

  for li in lis:
    a = li.select_one('a.news_tit')
    row = [a.text, a['href']]
    sheet.append(row)
    
   today = datetime.today().strftime("%Y-%m-%d")

  wb.save(f"{today}_{keyword}.xlsx")
  wb.close()

::for사용하여 회사별 파일 만들기

keywords = ['삼성전자','LG전자','현대자동차','SK']

for keyword in keywords:
  make_news_excel(keyword)

::이미지 다운하기

import openpyxl
import urllib.request

wb = openpyxl.load_workbook('관리종목.xlsx')
sheet = wb['종목']

new_rows = list(sheet.rows)[1:]

for row in new_rows:
  url = f'https://ssl.pstatic.net/imgfinance/chart/item/area/day/{row[1].value}.png'
  urllib.request.urlretrieve(url, f"{row[0].value}.png")

 

아직 첫째주라 그런지 뭐가 뭔지는 잘 모르겠다

그래도 열심히 해야지