반응형
def download_image(url, save_path):
response = requests.get(url, stream=True)
if response.status_code == 200:
with open(save_path, 'wb') as file:
for chunk in response.iter_content(1024):
file.write(chunk)
else:
pass
# print(f"Failed to download image from {url}")
def crawl_images(url, save_folder):
response = requests.get(url)
if response.status_code == 200:
soup = BeautifulSoup(response.text, 'html.parser')
img_tags = soup.find_all('img')
for img_tag in img_tags:
img_url = img_tag.get('src')
if img_url:
if img_url[-3:] == 'svg':
continue
img_url = urljoin(url, img_url)
img_name = os.path.basename(img_url) + ".jpg"
img_save_path = os.path.join(save_folder, img_name)
download_image(img_url, img_save_path)
# print("Image crawling completed.")
else:
pass
# print(f"Failed to access {url}")
import requests
from bs4 import BeautifulSoup
import os
from urllib.parse import urljoin
from tqdm import tqdm
query = input()
num_images = 200
url = f"https://www.google.com/search?q={query}&tbm=isch"
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")
ul=soup.find_all('div')
link_list = []
for href in soup.find_all("div"):
try:
s = href.find("a")["href"].find('https')
e = href.find("a")["href"].find('(')
if href.find("a")["href"].find('https') == href.find("a")["href"].find(')'):
print(href.find("a")["href"].find('https'))
continue
else:
link_list.append(href.find("a")["href"][s:e].split('&sa')[0])
except:
continue
for url in tqdm(link_list):
try:
save_folder = "C:/Users/user/code/" + query # 이미지를 저장할 폴더 이름 입력
if not os.path.exists(save_folder):
os.makedirs(save_folder)
crawl_images(url, save_folder)
except:
continue
반응형
'-------------코딩------------- > Python 기초 코딩' 카테고리의 다른 글
show bbox(coco) (0) | 2023.08.02 |
---|---|
python (json 파일 읽기) (0) | 2022.04.24 |
시계열 데이터 정규화 (0) | 2021.09.07 |
pyplot 시계열 데이터 그래프 그리기 (0) | 2021.09.06 |
AttributeError: module transformers.models.big_bird has no attribute BigBirdTokenizer (0) | 2021.06.28 |
댓글