시계열 데이터 정규화
min_max 정규화 from datetime import datetime import pandas as pd import matplotlib.pyplot as plt from sklearn.preprocessing import MinMaxScaler attack = pd.read_csv("./data/haicon/attack.csv") cols = [] for i in range(1,87): if(i in [2,8,9,10,17,18,19,20,22,26,29,34,36,38,39,46,48,49,52,55,58,61,63,64,69,79,82,84,85]): continue if i
2021. 9. 7.
숫자만 추출하기
number = re.findall("\d+",expression) 묶음단위로 number = re.findall("\d+",expression) 한글자 단위로 import re expression = "100-200*300-500+20" number = re.findall("\d+",expression) print('number :',number) # number : ['100', '200', '300', '500', '20'] number = re.findall("\d", expression) print('number :',number) # number : ['1', '0', '0', '2', '0', '0', '3', '0', '0', '5', '0', '0', '2', '0']
2021. 3. 9.
re.sub
text = re.sub('RT @[\w_]+: ', '', text) # enticons 제거 text = re.sub('@[\w_]+', '', text) # URL 제거 text = re.sub(r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+", ' ', text) # http로 시작되는 url text = re.sub(r"[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{2,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)", ' ', text) # http로 시작되지 않는 url # Hashtag 제거 text = re.sub('[#]+[0-9a-zA-Z_..
2021. 2. 28.