반응형 네이버 인기검색어 크롤링 from urllib.request import urlopen from bs4 import BeautifulSoup from html2text import html2text import pprint if __name__ == "__main__": url = urlopen("https://www.naver.com/") bs = BeautifulSoup(url,'html.parser') rank = bs.find_all('span',class_='ah_k') list = [] dic = {} for val in rank: list.append(html2text(str(val)).replace("\n","",2)) for n in range(0,20): dic[n+1] = list[n] dic = pprint.. 2020. 10. 19. 네이버 증권뉴스 크롤링(2) import pandas as pd import re import requests from bs4 import BeautifulSoup from urllib.request import urlopen from urllib.request import HTTPError from urllib import parse from tqdm import trange data = pd.read_csv("이지케어텍_url.csv") data = data['url'] hh = [] ii = [] for i in trange(len(data)): url = data[i] req = requests.get(url) bs = BeautifulSoup(req.content, "html.parser") div = bs.find_all.. 2020. 7. 27. 네이버 증권뉴스 크롤링(1) import time import pandas as pd import re import requests from bs4 import BeautifulSoup from urllib.request import urlopen from urllib.request import HTTPError from urllib import parse import chardet import math import numpy as np def craw(): news = bs.find_all('dl', {'class' : 'newsList'}) news_list = [] news_list_2 = [] title_list = [] title_list_2 = [] url_list = [] url_list_2 = [] for new in.. 2020. 7. 27. 이전 1 다음 반응형