2 Star 6 Fork 3

pingan_match_666/2018_pingan_behavior_predicting_driving_risk

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
feature_integrate02.py 3.49 KB
一键复制 编辑 原始数据 按行查看 历史
倩影繁华 提交于 2018-05-02 20:46 . ljt
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# @Time : 2018/4/5 8:09
# @Author : liujiantao
# @Site : 特征集成
# @File : feature_integrate.py
# @Software: PyCharm
import time
from data_helper import DataHelper
from ljt_feature_etl02 import LjtFeatureEtl02
from threading_util import ThreadingUtil
import pandas as pd
import numpy as np
from config import *
class FeatureIntegrate02(object):
"""
特征汇总
"""
def __init__(self):
self.d_h = DataHelper()
def get_all_features(self,path=path_train01):
start = time.time()
data = self.d_h.get_data(path)
ft_Liyang = LjtFeatureEtl02(self.d_h, data)
user_Y_list = self.d_h.get_user_Y_list(data)
min_Y = data[data['Y'] != 0]['Y'].min()
max_Y = data[data['Y'] != 0]['Y'].max()
all_features_list = []
pre_features_list = []
act_class_Y = []
act_Y = []
all_Y = []
pre_Y = []
mt = ThreadingUtil()
g_func_list = []
# 用户行程占比
g_func_list.append({"func": ft_Liyang.get_features01, "args": (data,)})
g_func_list.append({"func": ft_Liyang.get_features02, "args": (data,)})
g_func_list.append({"func": ft_Liyang.get_features03, "args": (data,)})
g_func_list.append({"func": ft_Liyang.get_features04, "args": (data,)})
g_func_list.append({"func": ft_Liyang.get_features05, "args": (data,)})
g_func_list.append({"func": ft_Liyang.get_features06, "args": (data,)})
mt.set_thread_func_list(g_func_list)
mt.start()
m = 0
n = 0
# for j in range(mt.data_list.__len__()):
# all_features_list.extend(mt.data_list[j])
# res = np.array(mt.data_list)
all_features_list = [[row[col] for row in mt.data_list] for col in range(len(mt.data_list[0]))]
# pre_features_list = [all_features_list[i] for i in range(len(user_Y_list)) if user_Y_list[i]>0]
# pre_Y = [user_Y_list[i] for i in range(len(user_Y_list)) if user_Y_list[i]>0]
# act_class_Y = list(map(lambda x :1 if x>0 else 0,user_Y_list))
# df_features = pd.DataFrame(mt.data_list,columns=predictors1)
# df_features = df_features[predictors]
self.d_h.print_str += " get_train_features cost time: " + str(time.time() - start)+" "
return all_features_list, user_Y_list, pre_features_list, pre_Y, act_class_Y, min_Y, max_Y
def get_test_features02(self, path=path_test01):
"""
:param path:
:return:
"""
start = time.time()
data = self.d_h.get_test_data(path)
userid_list = self.d_h.get_userlist(data)
ft_Liyang = LjtFeatureEtl02(self.d_h, data)
mt = ThreadingUtil()
g_func_list = []
g_func_list.append({"func": ft_Liyang.get_features01, "args": (data,)})
g_func_list.append({"func": ft_Liyang.get_features02, "args": (data,)})
g_func_list.append({"func": ft_Liyang.get_features03, "args": (data,)})
g_func_list.append({"func": ft_Liyang.get_features04, "args": (data,)})
g_func_list.append({"func": ft_Liyang.get_features05, "args": (data,)})
g_func_list.append({"func": ft_Liyang.get_features06, "args": (data,)})
mt.set_thread_func_list(g_func_list)
mt.start()
test_features = [[row[col] for row in mt.data_list] for col in range(len(mt.data_list[0]))]
self.d_h.print_str += " get_test_features cost time: " + str(time.time() - start)+" "
return userid_list, test_features
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/pingan_match_666/2018_pingan_behavior_predicting_driving_risk.git
git@gitee.com:pingan_match_666/2018_pingan_behavior_predicting_driving_risk.git
pingan_match_666
2018_pingan_behavior_predicting_driving_risk
2018_pingan_behavior_predicting_driving_risk
master

搜索帮助