본문 바로가기
반응형
이미지 크롤링 def download_image(url, save_path): response = requests.get(url, stream=True) if response.status_code == 200: with open(save_path, 'wb') as file: for chunk in response.iter_content(1024): file.write(chunk) else: pass # print(f"Failed to download image from {url}") def crawl_images(url, save_folder): response = requests.get(url) if response.status_code == 200: soup = BeautifulSoup(response.text, .. 2023. 8. 2.
show bbox(coco) import os import cv2 import json base_path = "" json_path = base_path + '/' + "_annotations.coco.json" with open(json_path, "r") as json_file: st_python = json.load(json_file) red_color = (0,0,255) tmp = 0 for i in range(len(st_python['images'])): file_name = st_python['images'][i]['file_name'] img_id = st_python['images'][i]['id'] img = cv2.imread(base_path+'/'+file_name) while True: bbox = st_.. 2023. 8. 2.
python (json 파일 읽기) import json import os path = os.path.join('경로') path_list = os.listdir(path) for i in path_list: file_path = path+'/'+i with open(file_path, "r", encoding='utf-8') as json_file: json_data = json.load(json_file) 2022. 4. 24.
시계열 데이터 정규화 min_max 정규화 from datetime import datetime import pandas as pd import matplotlib.pyplot as plt from sklearn.preprocessing import MinMaxScaler attack = pd.read_csv("./data/haicon/attack.csv") cols = [] for i in range(1,87): if(i in [2,8,9,10,17,18,19,20,22,26,29,34,36,38,39,46,48,49,52,55,58,61,63,64,69,79,82,84,85]): continue if i 2021. 9. 7.
pyplot 시계열 데이터 그래프 그리기 from datetime import datetime import pandas as pd import matplotlib.pyplot as plt from sklearn.preprocessing import MinMaxScaler import os path = "./data/haicon/train/" tmp = os.listdir(path) file_list = list() for i in tmp: if(i[-4:] == ".csv"): file_list.append(i) file_list from datetime import datetime import pandas as pd import matplotlib.pyplot as plt from sklearn.preprocessing import MinMa.. 2021. 9. 6.
AttributeError: module transformers.models.big_bird has no attribute BigBirdTokenizer import torch !pip install transformers==3.3.0 from transformers import BertTokenizer from transformers import BertForSequenceClassification, AdamW, BertConfig from transformers import get_linear_schedule_with_warmup # !pip install transformers[torch] # !pip install transformers[tf-cpu] # !pip install transformers[flax] !pip install sentencepiece # from transformers import BertTokenizer # from tr.. 2021. 6. 28.
list 중복 단어 체크하기(딕셔너리로) temp = ['a', 'a', 'b', 'c'] w_count= {} for lst in temp: try: w_count[lst]+= 1 except: w_count[lst]=1 print(w_count) # {'a' : 2, 'b' : 1, 'c' : 1} 2021. 6. 19.
욕설 탐지 프로그램 보호되어 있는 글 입니다. 2021. 5. 28.
숫자만 추출하기 number = re.findall("\d+",expression) 묶음단위로 number = re.findall("\d+",expression) 한글자 단위로 import re expression = "100-200*300-500+20" number = re.findall("\d+",expression) print('number :',number) # number : ['100', '200', '300', '500', '20'] number = re.findall("\d", expression) print('number :',number) # number : ['1', '0', '0', '2', '0', '0', '3', '0', '0', '5', '0', '0', '2', '0'] 2021. 3. 9.
re.sub text = re.sub('RT @[\w_]+: ', '', text) # enticons 제거 text = re.sub('@[\w_]+', '', text) # URL 제거 text = re.sub(r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+", ' ', text) # http로 시작되는 url text = re.sub(r"[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{2,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)", ' ', text) # http로 시작되지 않는 url # Hashtag 제거 text = re.sub('[#]+[0-9a-zA-Z_.. 2021. 2. 28.
반응형