본문 바로가기
반응형
네이버 인기검색어 크롤링 from urllib.request import urlopen from bs4 import BeautifulSoup from html2text import html2text import pprint if __name__ == "__main__": url = urlopen("https://www.naver.com/") bs = BeautifulSoup(url,'html.parser') rank = bs.find_all('span',class_='ah_k') list = [] dic = {} for val in rank: list.append(html2text(str(val)).replace("\n","",2)) for n in range(0,20): dic[n+1] = list[n] dic = pprint.. 2020. 10. 19.
트위터 크롤링 # import packages import time import datetime import GetOldTweets3 as got import logging import logging.handlers import requests from bs4 import BeautifulSoup from multiprocessing import Pool import pandas as pd import os # 트윗 수집하는 함수 정의 # def get_tweets(start_date, end_date, keyword, keyword2): def get_tweets(start_date, end_date, keyword): # 범위 끝을 포함하게 만듬 end_date = (datetime.datetime.strptime.. 2020. 7. 28.
네이버 증권뉴스 크롤링(2) import pandas as pd import re import requests from bs4 import BeautifulSoup from urllib.request import urlopen from urllib.request import HTTPError from urllib import parse from tqdm import trange data = pd.read_csv("이지케어텍_url.csv") data = data['url'] hh = [] ii = [] for i in trange(len(data)): url = data[i] req = requests.get(url) bs = BeautifulSoup(req.content, "html.parser") div = bs.find_all.. 2020. 7. 27.
네이버 증권뉴스 크롤링(1) import time import pandas as pd import re import requests from bs4 import BeautifulSoup from urllib.request import urlopen from urllib.request import HTTPError from urllib import parse import chardet import math import numpy as np def craw(): news = bs.find_all('dl', {'class' : 'newsList'}) news_list = [] news_list_2 = [] title_list = [] title_list_2 = [] url_list = [] url_list_2 = [] for new in.. 2020. 7. 27.
벅스 일간차트 크롤링 from urllib.request import urlopen from bs4 import BeautifulSoup from html2text import html2text import sys import pprint if __name__ == "__main__": f = open("bugs_chart.txt", "w") url = urlopen("http://music.bugs.co.kr/chart/track/day/total") bs = BeautifulSoup(url,'html.parser') artist_list=[] # 가수 리스트 생성 title_list = [] # 제목 리스트 생성 daily_chart = {} # 가수 + 제목 딕셔너리 artist = bs.find_all('p', cla.. 2020. 2. 8.
반응형