代码拉取完成,页面将自动刷新
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# @Time : 2018/4/5 8:09
# @Author : liujiantao
# @Site : 特征集成
# @File : feature_integrate.py
# @Software: PyCharm
import time
from data_helper import DataHelper
from ljt_feature_etl02 import LjtFeatureEtl02
from threading_util import ThreadingUtil
import pandas as pd
import numpy as np
from config import *
class FeatureIntegrate02(object):
"""
特征汇总
"""
def __init__(self):
self.d_h = DataHelper()
def get_all_features(self,path=path_train01):
start = time.time()
data = self.d_h.get_data(path)
ft_Liyang = LjtFeatureEtl02(self.d_h, data)
user_Y_list = self.d_h.get_user_Y_list(data)
min_Y = data[data['Y'] != 0]['Y'].min()
max_Y = data[data['Y'] != 0]['Y'].max()
all_features_list = []
pre_features_list = []
act_class_Y = []
act_Y = []
all_Y = []
pre_Y = []
mt = ThreadingUtil()
g_func_list = []
# 用户行程占比
g_func_list.append({"func": ft_Liyang.get_features01, "args": (data,)})
g_func_list.append({"func": ft_Liyang.get_features02, "args": (data,)})
g_func_list.append({"func": ft_Liyang.get_features03, "args": (data,)})
g_func_list.append({"func": ft_Liyang.get_features04, "args": (data,)})
g_func_list.append({"func": ft_Liyang.get_features05, "args": (data,)})
g_func_list.append({"func": ft_Liyang.get_features06, "args": (data,)})
mt.set_thread_func_list(g_func_list)
mt.start()
m = 0
n = 0
# for j in range(mt.data_list.__len__()):
# all_features_list.extend(mt.data_list[j])
# res = np.array(mt.data_list)
all_features_list = [[row[col] for row in mt.data_list] for col in range(len(mt.data_list[0]))]
# pre_features_list = [all_features_list[i] for i in range(len(user_Y_list)) if user_Y_list[i]>0]
# pre_Y = [user_Y_list[i] for i in range(len(user_Y_list)) if user_Y_list[i]>0]
# act_class_Y = list(map(lambda x :1 if x>0 else 0,user_Y_list))
# df_features = pd.DataFrame(mt.data_list,columns=predictors1)
# df_features = df_features[predictors]
self.d_h.print_str += " get_train_features cost time: " + str(time.time() - start)+" "
return all_features_list, user_Y_list, pre_features_list, pre_Y, act_class_Y, min_Y, max_Y
def get_test_features02(self, path=path_test01):
"""
:param path:
:return:
"""
start = time.time()
data = self.d_h.get_test_data(path)
userid_list = self.d_h.get_userlist(data)
ft_Liyang = LjtFeatureEtl02(self.d_h, data)
mt = ThreadingUtil()
g_func_list = []
g_func_list.append({"func": ft_Liyang.get_features01, "args": (data,)})
g_func_list.append({"func": ft_Liyang.get_features02, "args": (data,)})
g_func_list.append({"func": ft_Liyang.get_features03, "args": (data,)})
g_func_list.append({"func": ft_Liyang.get_features04, "args": (data,)})
g_func_list.append({"func": ft_Liyang.get_features05, "args": (data,)})
g_func_list.append({"func": ft_Liyang.get_features06, "args": (data,)})
mt.set_thread_func_list(g_func_list)
mt.start()
test_features = [[row[col] for row in mt.data_list] for col in range(len(mt.data_list[0]))]
self.d_h.print_str += " get_test_features cost time: " + str(time.time() - start)+" "
return userid_list, test_features
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。