반응형
import pandas as pd
# 종량제 봉투 종류가 '규격봉투'이고, 종량제 봉투 용도가 '음식물쓰레기'인 2L가격 평균을 출력하시오 (단, 가격0 제외, 반올림 후 정수 출력)
df = pd.read_csv("5-1price.csv")
cond1 = (df['종량제봉투종류'] == '규격봉투')
cond2 = (df['종량제봉투용도'] == '음식물쓰레기')
cond3 = (df['2l가격'] != 0)
ans = df[cond1&cond2&cond3]['2l가격'].mean()
print(int(round(ans,0)))
print(round(ans)) #이렇게 해도 동일하게 정수로 된다. ,0 안하면됨.
# bmi 계산하고, 수치가 정상인 사람 수와 위험체중인 사람 수의 차이를 절대값으로 구하시오. (정수로 출력)
# bmi: 몸무게 (kg) / 키 (m) 의 제곱 단위
# 정상체중: 18.5 이상~23미만
#과체중 또는 위험체중: 23이상~25미만
df = pd.read_csv("5-2bmi.csv")
df['bmi'] = df['Weight'] / (df['Height']/100)**2
normal = (18.5 <= df['bmi'] & df['bmi'] < 23)
danger = (23 <= df['bmi'] & df['bmi'] < 25 )
print(abs(len(df[normal]) - len(df[danger])))
# 순전입학생이 가장 많은 학교의 전체학생수 (순전입학생 = 전입학생 - 전출학생 ) 정수로 출력하시오.
df = pd.read_csv("5-3student.csv")
df['순전입']= df['전입학생수(계)'] - df['전출학생수(계)']
df = df.sort_values('순전입',ascending=False)
int(df.iloc[0]['전체학생수(계)'])
5회 실기 2유형 3유형 연습
#자동차 가격 예측
#예측할 값(y): price
#평가:RMSE (Root Mean Squared Error)
#data: train, test
#제출: result.csv 컬럼은 'pred' 1개
import pandas as pd
train = pd.read_csv('data/5th/train.csv')
test = pd.read_csv('data/5th/test.csv')
y_train = train.pop('price')
# print(train.shape,test.shape) # (3759, 8) (1617, 8)
# print("============")
# print(train.head())
# print(test.head())
# print("============")
# print(train.info())
# print(test.info())
# print("============")
# print(train.describe())
# print(test.describe())
# print("============")
# print(train.isnull().sum()) #결측치 없음
# print(test.isnull().sum()) #결측치 없음
# 스케일링 대상:
cols = train.select_dtypes(exclude=object).columns
# print('스케일링대상컬럼', cols)
from sklearn.preprocessing import RobustScaler
scaler = RobustScaler()
train[cols] = scaler.fit_transform(train[cols])
test[cols] = scaler.transform(test[cols])
# print(train.head())
# print(test.head())
# 인코딩 대상: model, transmission, fuelType
# for col in cols:
# print(col)
# print(train[col].nunique())
# print(test[col].nunique())
# model 19,19, transmission 3,3 fuelType 3,3
# train, test 각각 동일함
cols = ['model','transmission','fuelType']
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
for col in cols:
train[col]=encoder.fit_transform(train[col])
test[col]=encoder.transform(test[col])
# print(train.head())
# print(test.head())
from sklearn.model_selection import train_test_split
X_tr, X_val, y_tr, y_val = train_test_split(train, y_train, test_size = 0.2 , random_state = 2023 )
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
import numpy as np
def calc_rmse(y_val,pred):
mse = mean_squared_error(y_val,pred)
return np.sqrt(mse)
# model = LinearRegression()
# model.fit(X_tr,y_tr)
# pred=model.predict(X_val)
# rmse = calc_rmse(y_val,pred)
# print('LinearRegression ', rmse)
#2266
# model = DecisionTreeRegressor(random_state=2023)
# model.fit(X_tr,y_tr)
# pred=model.predict(X_val)
# rmse = calc_rmse(y_val,pred)
# print('DecisionTreeRegressor ', rmse)
#1738
model = RandomForestRegressor(random_state=2023 )
model.fit(X_tr,y_tr)
pred=model.predict(X_val)
rmse = calc_rmse(y_val,pred)
print('RandomForestRegressor ', rmse)
#1234
#max_depth=5: 1797
#max_depth=7: 1436
#max_depth=9: 1263
#n_estimators=200: 1241
#max_depth =7, n_estimators=800: 1424
model = XGBRegressor(random_state=2023 )
model.fit(X_tr,y_tr)
pred=model.predict(X_val)
rmse = calc_rmse(y_val,pred)
print('XGBRegressor ', rmse)
#1202
# max_depth =5: 1218
# max_depth =7: 1797
# max_depth =9: 1271
# n_estimators=200: 1227
# max_depth =5, n_estimators=400: 1287
# max_depth =7, n_estimators=800: 1306
# model = LGBMRegressor(random_state=2023)
# model.fit(X_tr,y_tr)
# pred=model.predict(X_val)
# rmse = calc_rmse(y_val,pred)
# print('LGBMRegressor ', rmse)
pred = model.predict(test)
# pred = pred.astype(int)
submit = pd.DataFrame({
'pred':pred
})
submit.to_csv('data/5th/20230619_01.csv',index=False)
mj = pd.read_csv('data/5th/20230619_01.csv')
print(mj.head())
y=pd.read_csv('data/5th/y.csv')
rmse = calc_rmse(y,pred)
print('real : ', rmse)
반응형
'IT,SW,Data,Cloud,코딩 > Python' 카테고리의 다른 글
20230622 이런저런함수들 (0) | 2023.06.22 |
---|---|
20230621 캐글 연습문제 풀기 (0) | 2023.06.22 |
20230620 넘파이 ceil, floor, trunc (0) | 2023.06.21 |
20230619 lightgbm (0) | 2023.06.19 |
20230617 빅분기 4회 기출유형 제2유형 (0) | 2023.06.17 |
20230617 빅분기 3유형 가설검정 t-test, one-way ANOVA (1) | 2023.06.17 |
20230616 빅분기공부 (0) | 2023.06.17 |
20230615 머신러닝 공부, 시험환경 꿀팁 등 (0) | 2023.06.15 |
댓글