代码拉取完成,页面将自动刷新
import lightgbm as lgb
import prepare
import xgboost as xgb
from xgboost import plot_importance
from matplotlib import pyplot as plt
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split,cross_val_score,StratifiedKFold,GridSearchCV
from sklearn.metrics import roc_auc_score
from xgboost.sklearn import XGBClassifier
from sklearn.metrics import confusion_matrix, recall_score, roc_auc_score , accuracy_score
import prepare
import pandas as pd
import time
import datetime
def lgb_722():
x, val_x, y, val_y, testA, sub = prepare.read()
# print(Y.head())
cate_features = ['grade', 'subGrade', 'employmentTitle', 'homeOwnership', 'verificationStatus', 'purpose',
'postCode',
'regionCode', 'applicationType', 'initialListStatus', 'title', 'policyCode']
train = lgb.Dataset(data=x, label=y)
val = lgb.Dataset(data=val_x, label=val_y)
# valdata = dataset.create_valid()
param = {'num_leaves': 31, 'num_trees': 100, 'objective': 'binary'}
param['metric'] = ['auc', 'binary_logloss']
num_round = 500
bst = lgb.train(param, train, num_round, valid_sets=[val])
r = bst.predict(testA)
print(r.shape, r)
sub['isDefault'] = r
sub.to_csv('lightgbm.csv', index=False)
def xgb_553():
X_train = "data/prepare/xtrain.csv"
X_test = "data/prepare/xtest.csv"
y_train = "data/prepare/ytrain.csv"
y_test = "data/prepare/ytest.csv"
testA = "data/prepare/testA.csv"
X_train, X_test, y_train, y_test, testA = prepare.prepare_csv(X_train, X_test, y_train, y_test, testA)
sub = pd.read_csv("data/sample_submit.csv")
i = 5
m = XGBClassifier(max_depth=5, n_estimators=490, eval_metric=['logloss', 'auc', 'error'])
m.fit(X_train, y_train)
p = m.predict(X_test)
s = m.score(X_test, y_test)
print(f"max_depth:{i},score", s)
print(f"max_depth:{i},混淆矩阵", confusion_matrix(y_test, p, labels=[1, 0]))
print(f"max_depth:{i},召回率", recall_score(y_test, p))
print(f"max_depth:{i},auc", roc_auc_score(y_test, m.predict_proba(X_test)[:, 1]))
r = m.predict(testA)
sub['isDefault'] = r
sub.to_csv('xgb-02-2.csv', index=False)
def xgb_728():
X_train = "data/prepare/xtrain.csv"
X_test = "data/prepare/xtest.csv"
y_train = "data/prepare/ytrain.csv"
y_test = "data/prepare/ytest.csv"
testA = "data/prepare/testA.csv"
X_train, X_test, y_train, y_test, testA = prepare.prepare_csv(X_train, X_test, y_train, y_test, testA)
sub = pd.read_csv("data/sample_submit.csv")
i = 5
m = XGBClassifier(max_depth=5,n_estimators=490,eval_metric=['logloss','auc','error'])
m.fit(X_train,y_train)
p=m.predict(X_test)
s=m.score(X_test,y_test)
print(f"max_depth:{i},score",s)
print(f"max_depth:{i},混淆矩阵",confusion_matrix(y_test,p,labels=[1,0]))
print(f"max_depth:{i},召回率",recall_score(y_test,p))
print(f"max_depth:{i},auc",roc_auc_score(y_test,m.predict_proba(X_test)[:,1]))
r=m.predict_proba(testA)[:,1]
print(r.shape,r)
sub['isDefault'] = r
sub.to_csv('xgb-03-1.csv', index=False)
if __name__ == '__main__':
lgb_722()
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。