代码拉取完成,页面将自动刷新
# !/usr/bin/python
# -*- coding: utf-8 -*-
"""
@File : pytorch代码片段.py
@Time : 2021/08/06 10:33:38
@Desc : torch代码片段保存
"""
import torch
import torch.nn as nn
# 多线程
def create_data_loader(df, tokenizer, max_len, batch_size):
ds = PaperDataset( # dataset
texts=df["text"].values,
labels=df["label"].values,
tokenizer=tokenizer,
max_len=max_len,
)
return DataLoader(
ds,
batch_size=batch_size,
num_workers=4, # 多线程
shuffle=False,
pin_memory=True, # 页锁定内存
)
# baseline中的.cpu()换成. detach()
# outputs.data.cpu().numpy()
# 累计梯度
def train_epoch(model, data_loader, loss_fn, optimizer, device, scheduler, n_examples):
print("start training!")
model = model.train()
losses = []
correct_predictions = 0
step = 0
pred_ls = []
label_ls = []
accumulation_steps = 4
i = 0
for d in tqdm(data_loader):
input_ids = d["input_ids"].to(device)
attention_mask = d["attention_mask"].to(device)
targets = d["labels"].to(device)
outputs = model(input_ids=input_ids, attention_mask=attention_mask)
_, preds = torch.max(outputs, dim=1)
loss = loss_fn(outputs, targets)
loss = loss / accumulation_steps # 梯度累积
losses.append(loss.item())
loss.backward()
if (i + 1) % accumulation_steps == 0: # Wait for several backward steps
optimizer.step() # Now we can do an optimizer step
nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
scheduler.step()
model.zero_grad() # Reset gradients tensors
label_ls.extend(d["labels"])
pred_ls.extend(preds.tolist())
i += 1
correct_predictions = accuracy_score(label_ls, pred_ls)
return correct_predictions, np.mean(losses)
# bert-abse+bilstm
class PaperClassifier(nn.Module):
def __init__(self):
n_classes = 39
super(PaperClassifier, self).__init__()
PRE_TRAINED_MODEL_NAME = "bert-base-uncased"
self.bert = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME)
self.bilstm = nn.LSTM(
input_size=self.bert.config.hidden_size,
hidden_size=self.bert.config.hidden_size,
batch_first=True,
bidirectional=True,
)
self.drop = nn.Dropout(p=0.5)
self.out = nn.Linear(self.bert.config.hidden_size * 2, n_classes)
def forward(self, input_ids, attention_mask):
(
last_hidden_out,
pooled_output,
) = self.bert( # 只要了句子级表示? _:[10, 300, 768] [16, 768]
input_ids=input_ids, attention_mask=attention_mask # [16, 300]300是句子长度
)
last_hidden_out = self.drop(last_hidden_out)
output_hidden, _ = self.bilstm(last_hidden_out) # [10, 300, 768]
output = self.drop(output_hidden) # dropout
output = output.mean(dim=1)
return self.out(output)
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。