본문 바로가기
-------------코딩-------------/Python 기초 코딩

이미지 크롤링

by 탶선 2023. 8. 2.
반응형
def download_image(url, save_path):
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        with open(save_path, 'wb') as file:
            for chunk in response.iter_content(1024):
                file.write(chunk)
    else:
        pass
#         print(f"Failed to download image from {url}")

def crawl_images(url, save_folder):
    response = requests.get(url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        img_tags = soup.find_all('img')
        for img_tag in img_tags:
            img_url = img_tag.get('src')

            if img_url:
                if img_url[-3:] == 'svg':
                    continue                           
                img_url = urljoin(url, img_url)
                img_name = os.path.basename(img_url) + ".jpg"
                img_save_path = os.path.join(save_folder, img_name)
                download_image(img_url, img_save_path)
#         print("Image crawling completed.")
    else:
        pass
#         print(f"Failed to access {url}")

import requests
from bs4 import BeautifulSoup
import os
from urllib.parse import urljoin
from tqdm import tqdm

query = input()
num_images = 200

url = f"https://www.google.com/search?q={query}&tbm=isch"

response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")

ul=soup.find_all('div')

link_list = []

for href in soup.find_all("div"):
    try:
        s = href.find("a")["href"].find('https')
        e = href.find("a")["href"].find('(')
        
        if href.find("a")["href"].find('https') == href.find("a")["href"].find(')'):
            print(href.find("a")["href"].find('https'))
            continue
        else:
            link_list.append(href.find("a")["href"][s:e].split('&sa')[0])
    except:
        continue


for url in tqdm(link_list):
    try:
        save_folder = "C:/Users/user/code/" + query  # 이미지를 저장할 폴더 이름 입력

        if not os.path.exists(save_folder):
            os.makedirs(save_folder)

        crawl_images(url, save_folder)    
    except:
        continue
반응형

댓글