1 Star 0 Fork 0

jack/Early-Prediction-of-Sepsis-from-Clinical-Data

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
test.py 5.87 KB
一键复制 编辑 原始数据 按行查看 历史
company 提交于 2019-09-05 16:37 . Big update
import pandas as pd
import os
import io
import re
from sklearn.linear_model import LinearRegression
import tensorflow as tf
from keras.layers import Dense, Activation, Dropout, Input, BatchNormalization, Flatten
from keras.models import Model
from keras.optimizers import Adam
from keras.callbacks import ReduceLROnPlateau
import keras
#Config minimize GPU
config_gpu = True
if config_gpu:
config = tf.ConfigProto(allow_soft_placement=False, log_device_placement=False)
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
keras.backend.set_session(sess)
#Set GPU
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
path_check_file = './check.csv'
path_folder = './training'
# sepsis with 0 is normal, 1 is sepsis
# gender= [0,1] with 0 is male, 1 is female
# thredhold of default age is 50 with True is age > 50 and False is age <=50
def get_file_name(sepsis=1, gender=0, age=0):
df = pd.read_csv(path_check_file)
df = df[df['TypeSepsis'] == sepsis]
df = df[df['Sex'] == gender]
if age == 0:
df = df[df['Age'] <= 50]
else:
df = df[df['Age'] > 50]
return df
def create_data_frame(df_file_name, interpolation=False):
file_names = df_file_name['FileName']
len = file_names.shape[0]
for i in range(len):
file = os.path.join(path_folder, file_names.iloc[i])
df = pd.read_csv(file, delimiter='|')
if interpolation == True:
df = df.interpolate(method='linear').ffill().bfill()
df = df.ffill().bfill()
df['FileName'] = file_names.iloc[i]
if i == 0:
frames = df
else:
frames = [frames, df]
frames = pd.concat(frames)
return frames
def process_string_info_frame(frames):
buf = io.StringIO()
frames.info(buf=buf)
s = buf.getvalue()
num = int(s.split('\n')[1].split(' ')[1])
list = s.split('\n')[3:-5]
label = ['Name', 'None-Null', 'Temp', 'Type']
temp = []
for s in list:
s = re.sub(' +', ' ', s)
array = s.split(' ')
temp.append(array)
temp = pd.DataFrame(data=temp, columns=label)
temp = temp.drop('Temp', axis=1)
temp['None-Null'] = temp['None-Null'].astype(int)
temp = temp.sort_values(by=['None-Null'], ascending=False)
return num, temp
def neural_network(x_train_, y_train_, x_test, epochs):
num_examples = x_train_.shape[0]
x_train = x_train_[:int(num_examples*0.7)]
y_train = y_train_[:int(num_examples*0.7)]
x_val = x_train_[int(num_examples*0.7):]
y_val = y_train_[int(num_examples*0.7):]
units = 512
epochs = epochs
batch_size = 512
input_model = Input(shape=(x_train.shape[1],))
# x = Flatten()(input_model)
x = Dense(units)(input_model)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Dropout(0.4)(x)
x = Dense(units)(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Dropout(0.4)(x)
output_model = Dense(1, activation='linear')(x)
model = Model(inputs=input_model, outputs=output_model)
model.summary()
model.compile(loss='mean_squared_error', optimizer=Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-7),
metrics=['accuracy'])
# Reducer Learning rate
lr_reducer = ReduceLROnPlateau(monitor='val_loss', factor=0.9, patience=3, verbose=1)
callbacks_list = [lr_reducer]
# ------Fit network---------
model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(x_val, y_val),
callbacks=callbacks_list, verbose=2, shuffle=True)
y_test = model.predict(x_test, batch_size=batch_size)
return y_test
def process_missing_data(frames, neural = True):
frames = frames
while (True):
# Processing string
num, temp = process_string_info_frame(frames)
# Use linear
items_full = temp[temp['None-Null'] == num]['Name'].values.tolist()
items_missing = temp[temp['None-Null'] < num]['Name']
num_items_missing = items_missing.shape[0]
if num_items_missing == 0:
break
items_name = items_missing.iloc[0]
items_full.append(items_name)
data = frames[items_full]
# Step-1: Split the dataset that contains the missing values and no
# missing values are test and train respectively.
x_train = data[data[items_name].notnull()].drop(columns=items_name)
y_train = data[data[items_name].notnull()][items_name]
x_test = data[data[items_name].isnull()].drop(columns=items_name)
# y_test = data[data[items_name].isnull()][items_name]
# print(y_test)
if neural:
predicted = neural_network(x_train, y_train, x_test, 200)
else:
linreg = LinearRegression()
# Step-2: Train the machine learning algorithm
linreg.fit(x_train, y_train)
# Step-3: Predict the missing values in the attribute of the test data.
predicted = linreg.predict(x_test)
# Step-4: Let’s obtain the complete dataset by combining with the target attribute.
frames[items_name][frames[items_name].isnull()] = predicted
# print(frames)
# frames.info()
# print(frames)
# path_save = './xxxxxxxxxxxx.csv'
# with open(path_save, 'w') as f:
# frames.to_csv(f, encoding='utf-8', header=True, index = False)
return frames
def split_to_object(frames):
# print(frames)
name_files = frames.drop_duplicates(subset=['FileName'], keep='first')['FileName']
num_files = name_files.shape[0]
for i in range(num_files):
name_file = name_files.iloc[i]
temp = frames[frames['FileName'] == name_file]
path_save = './test_1_0_0/' + name_file
with open(path_save, 'w') as f:
temp.to_csv(f, encoding='utf-8', header=True, index=False)
split_to_object(process_missing_data(create_data_frame(get_file_name())))
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/jack1314/Early-Prediction-of-Sepsis-from-Clinical-Data.git
git@gitee.com:jack1314/Early-Prediction-of-Sepsis-from-Clinical-Data.git
jack1314
Early-Prediction-of-Sepsis-from-Clinical-Data
Early-Prediction-of-Sepsis-from-Clinical-Data
master

搜索帮助