master

分支 (1)

管理

管理

master

newprogess
/
pca_gotmd.py

import torch
import numpy as np
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import pandas as pd
from sklearn.decomposition import PCA


class TfDataset(Dataset):
    def __init__(self, filepath, save_filepath=None):
        # xy = np.loadtxt(filepath, delimiter=',', dtype=np.float32, encoding='utf-8')
        xy = np.genfromtxt(filepath, delimiter=',', dtype=np.float32, skip_header=1, encoding='utf-8')
        # print(xy)
        # exit()
        self.len = xy.shape[0]
        # self.x_data = torch.from_numpy(xy[:, :-1])
        # self.y_data = torch.from_numpy(xy[:, [-1]])
        self.x_data = torch.from_numpy(xy[1:, :-1])
        self.y_data = torch.from_numpy(xy[1:, [-5]])

        # 使用 PCA 进行降维
        pca = PCA(n_components=12)
        reduced_data = pca.fit_transform(self.x_data.numpy())
        # 将降维后的数据设置为新的 x_data
        self.x_data = torch.from_numpy(reduced_data)

        # 将降维后的数据保存为 CSV 文件（如果提供了保存路径）
        if save_filepath:
            df_reduced = pd.DataFrame(reduced_data,
                                      columns=['PC1', 'PC2', 'PC1', 'PC2', 'PC1', 'PC2', 'PC1', 'PC2', 'PC1',
                                               'PC2', 'PC1', 'PC1'])
            df_reduced['label'] = self.y_data.numpy()
            df_reduced['dborgot'] = 1  # 添加一列值为1的列,代表为痛风数据集
            df_reduced.to_csv(save_filepath, index=False)

    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return self.len

    # 初始化数据集并保存降维后的数据为 CSV 文件


train_dataset = TfDataset('./gotmdfillzero.csv', save_filepath='./reduced_gotmd.csv')

#
# train_dataset = tfDataset('../data/resample.csv')