1 Star 0 Fork 58

陈鸿元1/face_adjust_homework

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
model.py 16.30 KB
一键复制 编辑 原始数据 按行查看 历史
mynameisi 提交于 2024-06-18 22:52 . readme.md
import torch
from torch import nn
from torch.nn import init
from torch.nn import functional as F
from torch.autograd import Function
from math import sqrt
import random
import numpy as np
def init_linear(linear):
init.xavier_normal(linear.weight)
linear.bias.data.zero_()
def init_conv(conv, glu=True):
init.kaiming_normal(conv.weight)
if conv.bias is not None:
conv.bias.data.zero_()
## 归一化权重
class EqualLR:
def __init__(self, name):
self.name = name
def compute_weight(self, module):
weight = getattr(module, self.name + '_orig')
## 输入神经元数目,每一层卷积核数量=Nin*Nout*K*K,
fan_in = weight.data.size(1) * weight.data[0][0].numel()
return weight * sqrt(2 / fan_in)
@staticmethod
def apply(module, name):
fn = EqualLR(name)
weight = getattr(module, name)
del module._parameters[name]
module.register_parameter(name + '_orig', nn.Parameter(weight.data))
module.register_forward_pre_hook(fn)
return fn
def __call__(self, module, input):
weight = self.compute_weight(module)
setattr(module, self.name, weight)
def equal_lr(module, name='weight'):
EqualLR.apply(module, name)
return module
## 转置卷积上采样,其中权重参数自己定义
class FusedUpsample(nn.Module):
def __init__(self, in_channel, out_channel, kernel_size, padding=0):
super().__init__()
weight = torch.randn(in_channel, out_channel, kernel_size, kernel_size)
bias = torch.zeros(out_channel)
fan_in = in_channel * kernel_size * kernel_size ##神经元数量
self.multiplier = sqrt(2 / fan_in)
self.weight = nn.Parameter(weight)
self.bias = nn.Parameter(bias)
self.pad = padding
def forward(self, input):
weight = F.pad(self.weight * self.multiplier, [1, 1, 1, 1])
weight = (
weight[:, :, 1:, 1:]
+ weight[:, :, :-1, 1:]
+ weight[:, :, 1:, :-1]
+ weight[:, :, :-1, :-1]
) / 4
out = F.conv_transpose2d(input, weight, self.bias, stride=2, padding=self.pad)
return out
## 卷积与下采样,其中权重参数随机定义
class FusedDownsample(nn.Module):
def __init__(self, in_channel, out_channel, kernel_size, padding=0):
super().__init__()
weight = torch.randn(out_channel, in_channel, kernel_size, kernel_size)
bias = torch.zeros(out_channel)
fan_in = in_channel * kernel_size * kernel_size
self.multiplier = sqrt(2 / fan_in)
self.weight = nn.Parameter(weight)
self.bias = nn.Parameter(bias)
self.pad = padding
def forward(self, input):
weight = F.pad(self.weight * self.multiplier, [1, 1, 1, 1])
weight = (
weight[:, :, 1:, 1:]
+ weight[:, :, :-1, 1:]
+ weight[:, :, 1:, :-1]
+ weight[:, :, :-1, :-1]
) / 4
out = F.conv2d(input, weight, self.bias, stride=2, padding=self.pad)
return out
## 像素归一化
class PixelNorm(nn.Module):
def __init__(self):
super().__init__()
def forward(self, input):
return input / torch.sqrt(torch.mean(input ** 2, dim=1, keepdim=True) + 1e-8)
class BlurFunctionBackward(Function):
@staticmethod
def forward(ctx, grad_output, kernel, kernel_flip):
ctx.save_for_backward(kernel, kernel_flip)
grad_input = F.conv2d(
grad_output, kernel_flip, padding=1, groups=grad_output.shape[1]
)
return grad_input
@staticmethod
def backward(ctx, gradgrad_output):
kernel, kernel_flip = ctx.saved_tensors
grad_input = F.conv2d(
gradgrad_output, kernel, padding=1, groups=gradgrad_output.shape[1]
)
return grad_input, None, None
class BlurFunction(Function):
@staticmethod
def forward(ctx, input, kernel, kernel_flip):
ctx.save_for_backward(kernel, kernel_flip)
output = F.conv2d(input, kernel, padding=1, groups=input.shape[1])
return output
@staticmethod
def backward(ctx, grad_output):
kernel, kernel_flip = ctx.saved_tensors
grad_input = BlurFunctionBackward.apply(grad_output, kernel, kernel_flip)
return grad_input, None, None
blur = BlurFunction.apply
## 加权滤波函数
class Blur(nn.Module):
def __init__(self, channel):
super().__init__()
weight = torch.tensor([[1, 2, 1], [2, 4, 2], [1, 2, 1]], dtype=torch.float32)
weight = weight.view(1, 1, 3, 3)
weight = weight / weight.sum()
weight_flip = torch.flip(weight, [2, 3])
self.register_buffer('weight', weight.repeat(channel, 1, 1, 1))
self.register_buffer('weight_flip', weight_flip.repeat(channel, 1, 1, 1))
def forward(self, input):
return blur(input, self.weight, self.weight_flip)
# return F.conv2d(input, self.weight, padding=1, groups=input.shape[1])
## 归一化了权重的卷积层
class EqualConv2d(nn.Module):
def __init__(self, *args, **kwargs):
super().__init__()
conv = nn.Conv2d(*args, **kwargs)
conv.weight.data.normal_()
conv.bias.data.zero_()
self.conv = equal_lr(conv) ##归一化卷积层的权重
def forward(self, input):
return self.conv(input)
## 全连接层
class EqualLinear(nn.Module):
def __init__(self, in_dim, out_dim):
super().__init__()
linear = nn.Linear(in_dim, out_dim)
linear.weight.data.normal_()
linear.bias.data.zero_()
self.linear = equal_lr(linear)
def forward(self, input):
return self.linear(input)
## 自适应的IN层
class AdaptiveInstanceNorm(nn.Module):
def __init__(self, in_channel, style_dim):
super().__init__()
self.norm = nn.InstanceNorm2d(in_channel) ##创建IN层
self.style = EqualLinear(style_dim, in_channel * 2)
self.style.linear.bias.data[:in_channel] = 1
self.style.linear.bias.data[in_channel:] = 0
def forward(self, input, style):
#print("AdaIN style input="+str(style.shape)) #默认值,风格向量长度512
## 输入style为风格向量,长度为512;经过self.style得到输出风格矩阵,通道数等于输入通道数的2倍
style = self.style(style).unsqueeze(2).unsqueeze(3)
gamma, beta = style.chunk(2, 1) ##获得缩放和偏置系数,按1轴分为2块
#print("AdaIN style output="+str(style.shape))
#等于输入通道数的2倍,in_channel*2
out = self.norm(input) ##IN归一化
out = gamma * out + beta
return out
## 添加噪声,噪声权重可以学习
class NoiseInjection(nn.Module):
def __init__(self, channel):
super().__init__()
self.weight = nn.Parameter(torch.zeros(1, channel, 1, 1))
def forward(self, image, noise):
return image + self.weight * noise
## 固定输入
class ConstantInput(nn.Module):
def __init__(self, channel, size=4):
super().__init__()
self.input = nn.Parameter(torch.randn(1, channel, size, size))
def forward(self, input):
batch = input.shape[0]
out = self.input.repeat(batch, 1, 1, 1)
return out
## 风格模块层,包括两个卷积,两个AdaIN层
class StyledConvBlock(nn.Module):
def __init__(
self,
in_channel,
out_channel,
kernel_size=3,
padding=1,
style_dim=512,
initial=False,
upsample=False,
fused=False,
):
super().__init__()
## 第1个风格层,初始化4×4×512的特征图
if initial:
self.conv1 = ConstantInput(in_channel)
else:
if upsample:
## 对于128及以上的分辨率使用转置卷积上采样
if fused:
self.conv1 = nn.Sequential(
FusedUpsample(
in_channel, out_channel, kernel_size, padding=padding
),
Blur(out_channel),##滤波操作
)
else:
## 对于分辨小于128,使用普通的最近邻上采样
self.conv1 = nn.Sequential(
nn.Upsample(scale_factor=2, mode='nearest'),
EqualConv2d(
in_channel, out_channel, kernel_size, padding=padding
),
Blur(out_channel),##滤波操作
)
else: ##非上采样层
self.conv1 = EqualConv2d(
in_channel, out_channel, kernel_size, padding=padding
)
self.noise1 = equal_lr(NoiseInjection(out_channel)) ##噪声模块1
self.adain1 = AdaptiveInstanceNorm(out_channel, style_dim) ##AdaIN模块1
self.lrelu1 = nn.LeakyReLU(0.2)
self.conv2 = EqualConv2d(out_channel, out_channel, kernel_size, padding=padding)
self.noise2 = equal_lr(NoiseInjection(out_channel))
self.adain2 = AdaptiveInstanceNorm(out_channel, style_dim)
self.lrelu2 = nn.LeakyReLU(0.2)
def forward(self, input, style, noise):
out = self.conv1(input)
out = self.noise1(out, noise)
out = self.lrelu1(out)
out = self.adain1(out, style)
out = self.conv2(out)
out = self.noise2(out, noise)
out = self.lrelu2(out)
out = self.adain2(out, style)
return out
## 生成器主架构
class Generator(nn.Module):
def __init__(self, code_dim, fused=True):
super().__init__()
## 计数变量,用于获取w
self.global_count = 0
## 9个尺度的卷积block,从4×4到64×64,使用双线性上采样;从64×64到1024×1024,使用转置卷积进行上采样
self.progression = nn.ModuleList(
[
StyledConvBlock(512, 512, 3, 1, initial=True), # 4×4
StyledConvBlock(512, 512, 3, 1, upsample=True), # 8×8
StyledConvBlock(512, 512, 3, 1, upsample=True), # 16×16
StyledConvBlock(512, 512, 3, 1, upsample=True), # 32×32
StyledConvBlock(512, 256, 3, 1, upsample=True), # 64×64
StyledConvBlock(256, 128, 3, 1, upsample=True, fused=fused), # 128×128
StyledConvBlock(128, 64, 3, 1, upsample=True, fused=fused), # 256×256
StyledConvBlock(64, 32, 3, 1, upsample=True, fused=fused), # 512×512
StyledConvBlock(32, 16, 3, 1, upsample=True, fused=fused), # 1024×1024
]
)
## 9个尺度的1*1构成的to_rgb层,与前面对应
self.to_rgb = nn.ModuleList(
[
EqualConv2d(512, 3, 1),
EqualConv2d(512, 3, 1),
EqualConv2d(512, 3, 1),
EqualConv2d(512, 3, 1),
EqualConv2d(256, 3, 1),
EqualConv2d(128, 3, 1),
EqualConv2d(64, 3, 1),
EqualConv2d(32, 3, 1),
EqualConv2d(16, 3, 1),
]
)
def forward(self, style, noise, step=0, alpha=1, mixing_range=(-1, 1)):
out = noise[0] ## 取噪声向量为输入
if len(style) < 2: ## 不进行样式混合,inject_index=10
inject_index = [len(self.progression) + 1]
#print("len(style)<2")
else:
## 生成长度等于style向量,最大不超过step的升序排列随机index,step=9,len(style)=8,比如[0, 2, 3, 4, 5, 6, 7, 8]
inject_index = sorted(random.sample(list(range(step)), len(style) - 1))
#print("inject_index="+str(inject_index)) ##default=10
crossover = 0 ##用于mix的位置
##存储W向量
# np.save('./results/w/'+str(self.global_count)+'.npy',style[0].cpu().detach().numpy())
self.global_count = self.global_count + 1
for i, (conv, to_rgb) in enumerate(zip(self.progression, self.to_rgb)):
#print("the resolution is="+str(4*np.power(2,i)))
if mixing_range == (-1, 1):
if crossover < len(inject_index) and i > inject_index[crossover]:
crossover = min(crossover + 1, len(style))
#print("in mixing range,crossover="+str(crossover))
style_step = style[crossover] ##获得交叉的style起始点
else:
## 样式混合
#print("not in mixing range")
if mixing_range[0] <= i <= mixing_range[1]:
style_step = style[1] #取第2个样本样式
else:
style_step = style[0] #取第1个样本样式
if i > 0 and step > 0:
out_prev = out
## 将噪声与风格向量输入风格模块
#print("batchsize="+str(len(style_step))+",style shape="+str(style_step[0].shape))
out = conv(out, style_step, noise[i])
if i == step: ## 最后1级分辨率,输出图片
out = to_rgb(out) ##1×1卷积
## 最后结果是否进行alpha融合
if i > 0 and 0 <= alpha < 1:
skip_rgb = self.to_rgb[i - 1](out_prev)
skip_rgb = F.interpolate(skip_rgb, scale_factor=2, mode='nearest')
out = (1 - alpha) * skip_rgb + alpha * out
break
return out
## 完整的StyleGAN生成器定义
class StyledGenerator(nn.Module):
def __init__(self, code_dim=512, n_mlp=8):
super().__init__()
self.generator = Generator(code_dim) # 合成网络(Synthesis Network)
# 定义映射网络(Mapping Network),包含8个全连接层
layers = [PixelNorm()] # 像素归一化层
for i in range(n_mlp):
layers.append(EqualLinear(code_dim, code_dim)) # 全连接层,使用了equalized learning rate
layers.append(nn.LeakyReLU(0.2)) # LeakyReLU激活函数
# 风格向量W
self.style = nn.Sequential(*layers)
def forward(
self,
input, # 输入的属性向量Z
noise=None, # 可选的噪声向量
step=0, # 当前训练的步数
alpha=1, # 在渐进式生长过程中用于平滑过渡的参数
mean_style=None, # 用于风格混合的平均风格向量W
style_weight=0, # 风格混合权重
mixing_range=(-1, 1), # 风格混合的范围
):
styles = [] # 存储风格向量W
if type(input) not in (list, tuple):
input = [input] # 将输入转换为列表格式
# 对每个输入的属性向量Z应用映射网络,得到对应的风格向量W
for i in input:
styles.append(self.style(i))
batch = input[0].shape[0] # 获取批次大小
if noise is None: # 如果没有提供噪声向量,则随机生成
noise = []
for i in range(step + 1): # 为每一层生成对应尺寸的噪声向量
size = 4 * 2 ** i # 计算当前层的特征图尺寸
noise.append(torch.randn(batch, 1, size, size, device=input[0].device))
if mean_style is not None: # 如果提供了平均风格向量,则进行风格混合
styles_norm = [] # 存储归一化后的风格向量
for style in styles:
styles_norm.append(mean_style + style_weight * (style - mean_style))
styles = styles_norm
print("has mean_style, the shape=" + str(len(mean_style)) + ' ' + str(mean_style[0].shape) + ' the weight is' + str(1 - style_weight))
# 将风格向量W和噪声向量输入到生成器的合成网络中,生成最终的图像
return self.generator(styles, noise, step, alpha, mixing_range=mixing_range)
def mean_style(self, input):
# 计算输入的属性向量Z在映射网络中的平均风格向量W
style = self.style(input).mean(0, keepdim=True)
return style
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/chen-hongyuan-1/face_adjust_homework.git
git@gitee.com:chen-hongyuan-1/face_adjust_homework.git
chen-hongyuan-1
face_adjust_homework
face_adjust_homework
master

搜索帮助