1 Star 0 Fork 0

NeoWoodley/Neural-Style-Transfer

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
MRFNetwork.py 42.95 KB
一键复制 编辑 原始数据 按行查看 历史
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963
from scipy.misc import imread, imresize, imsave
from scipy.optimize import fmin_l_bfgs_b
from sklearn.feature_extraction.image import reconstruct_from_patches_2d, extract_patches_2d
import scipy.interpolate
import scipy.ndimage
import numpy as np
import time
import os
import argparse
import h5py
from keras.models import Sequential
from keras.layers.convolutional import Convolution2D, ZeroPadding2D, AveragePooling2D
from keras import backend as K
"""
Neural Style Transfer with Keras 1.0.6
Uses the VGG-16 model as described in the Keras example below :
https://github.com/fchollet/keras/blob/master/examples/neural_style_transfer.py
Note:
Before running this script, download the weights for the VGG16 model at:
https://drive.google.com/file/d/0Bz7KyqmuGsilT0J5dmRCM0ROVHc/view?usp=sharing
(source: https://gist.github.com/baraldilorenzo/07d7802847aaad0a35d3)
and make sure the variable `weights_path` in this script matches the location of the file.
-----------------------------------------------------------------------------------------------------------------------
"""
def _calc_patch_grid_dims(shape, patch_size, patch_stride):
x_w, x_h, x_c = shape
num_rows = 1 + (x_h - patch_size) // patch_stride
num_cols = 1 + (x_w - patch_size) // patch_stride
return num_rows, num_cols
def make_patch_grid(x, patch_size, patch_stride=1):
'''x shape: (num_channels, rows, cols)'''
x = x.transpose(2, 1, 0)
patches = extract_patches_2d(x, (patch_size, patch_size))
x_w, x_h, x_c = x.shape
num_rows, num_cols = _calc_patch_grid_dims(x.shape, patch_size, patch_stride)
patches = patches.reshape((num_rows, num_cols, patch_size, patch_size, x_c))
patches = patches.transpose((0, 1, 4, 2, 3))
#patches = np.rollaxis(patches, -1, 2)
return patches
def combine_patches_grid(in_patches, out_shape):
'''Reconstruct an image from these `patches`
input shape: (rows, cols, channels, patch_row, patch_col)
'''
num_rows, num_cols = in_patches.shape[:2]
num_channels = in_patches.shape[-3]
patch_size = in_patches.shape[-1]
num_patches = num_rows * num_cols
in_patches = np.reshape(in_patches, (num_patches, num_channels, patch_size, patch_size)) # (patches, channels, pr, pc)
in_patches = np.transpose(in_patches, (0, 2, 3, 1)) # (patches, p, p, channels)
recon = reconstruct_from_patches_2d(in_patches, out_shape)
return recon.transpose(2, 1, 0)
class PatchMatcher(object):
'''A matcher of image patches inspired by the PatchMatch algorithm.
image shape: (width, height, channels)
'''
def __init__(self, input_shape, target_img, patch_size=1, patch_stride=1, jump_size=0.5,
num_propagation_steps=5, num_random_steps=5, random_max_radius=1.0, random_scale=0.5):
self.input_shape = input_shape
self.patch_size = patch_size
self.patch_stride = patch_stride
self.jump_size = jump_size
self.num_propagation_steps = num_propagation_steps
self.num_random_steps = num_random_steps
self.random_max_radius = random_max_radius
self.random_scale = random_scale
self.num_input_rows, self.num_input_cols = _calc_patch_grid_dims(input_shape, patch_size, patch_stride)
self.target_patches = make_patch_grid(target_img, patch_size)
self.target_patches_normed = self.normalize_patches(self.target_patches)
self.coords = np.random.uniform(0.0, 1.0, # TODO: switch to pixels
(2, self.num_input_rows, self.num_input_cols))# * [[[self.num_input_rows]],[[self.num_input_cols]]]
self.similarity = np.zeros(input_shape[:2:-1], dtype ='float32')
self.min_propagration_row = 1.0 / self.num_input_rows
self.min_propagration_col = 1.0 / self.num_input_cols
self.delta_row = np.array([[[self.min_propagration_row]], [[0.0]]])
self.delta_col = np.array([[[0.0]], [[self.min_propagration_col]]])
def update(self, input_img, reverse_propagation=False):
input_patches = self.get_patches_for(input_img)
self.update_with_patches(self.normalize_patches(input_patches), reverse_propagation=reverse_propagation)
def update_with_patches(self, input_patches, reverse_propagation=False):
self._propagate(input_patches, reverse_propagation=reverse_propagation)
self._random_update(input_patches)
def get_patches_for(self, img):
return make_patch_grid(img, self.patch_size)
def normalize_patches(self, patches):
norm = np.sqrt(np.sum(np.square(patches), axis=(2, 3, 4), keepdims=True))
return patches / norm
def _propagate(self, input_patches, reverse_propagation=False):
if reverse_propagation:
roll_direction = 1
else:
roll_direction = -1
sign = float(roll_direction)
for step_i in range(self.num_propagation_steps):
new_coords = self.clip_coords(np.roll(self.coords, roll_direction, 1) + self.delta_row * sign)
coords_row, similarity_row = self.eval_state(new_coords, input_patches)
new_coords = self.clip_coords(np.roll(self.coords, roll_direction, 2) + self.delta_col * sign)
coords_col, similarity_col = self.eval_state(new_coords, input_patches)
self.coords, self.similarity = self.take_best(coords_row, similarity_row, coords_col, similarity_col)
def _random_update(self, input_patches):
for alpha in range(1, self.num_random_steps + 1): # NOTE this should actually stop when the move is < 1
new_coords = self.clip_coords(self.coords + np.random.uniform(-self.random_max_radius, self.random_max_radius, self.coords.shape) * self.random_scale ** alpha)
self.coords, self.similarity = self.eval_state(new_coords, input_patches)
def eval_state(self, new_coords, input_patches):
new_similarity = self.patch_similarity(input_patches, new_coords)
delta_similarity = new_similarity - self.similarity
coords = np.where(delta_similarity > 0, new_coords, self.coords)
best_similarity = np.where(delta_similarity > 0, new_similarity, self.similarity)
return coords, best_similarity
def take_best(self, coords_a, similarity_a, coords_b, similarity_b):
delta_similarity = similarity_a - similarity_b
best_coords = np.where(delta_similarity > 0, coords_a, coords_b)
best_similarity = np.where(delta_similarity > 0, similarity_a, similarity_b)
return best_coords, best_similarity
def patch_similarity(self, source, coords):
'''Check the similarity of the patches specified in coords.'''
target_vals = self.lookup_coords(self.target_patches_normed, coords)
err = source * target_vals
return np.sum(err, axis=(2, 3, 4))
def clip_coords(self, coords):
# TODO: should this all be in pixel space?
coords = np.clip(coords, 0.0, 1.0)
return coords
def lookup_coords(self, x, coords):
x_shape = np.expand_dims(np.expand_dims(x.shape, -1), -1)
i_coords = np.round(coords * (x_shape[:2] - 1)).astype('int32')
return x[i_coords[0], i_coords[1]]
def get_reconstruction(self, patches=None, combined=None):
if combined is not None:
patches = make_patch_grid(combined, self.patch_size)
if patches is None:
patches = self.target_patches
patches = self.lookup_coords(patches, self.coords)
recon = combine_patches_grid(patches, self.input_shape)
return recon
def scale(self, new_shape, new_target_img):
'''Create a new matcher of the given shape and replace its
state with a scaled up version of the current matcher's state.
'''
new_matcher = PatchMatcher(new_shape, new_target_img, patch_size=self.patch_size,
patch_stride=self.patch_stride, jump_size=self.jump_size,
num_propagation_steps=self.num_propagation_steps,
num_random_steps=self.num_random_steps,
random_max_radius=self.random_max_radius,
random_scale=self.random_scale)
new_matcher.coords = congrid(self.coords, new_matcher.coords.shape, method='neighbour')
new_matcher.similarity = congrid(self.similarity, new_matcher.coords.shape, method='neighbour')
return new_matcher
def congrid(a, newdims, method='linear', centre=False, minusone=False):
'''Arbitrary resampling of source array to new dimension sizes.
Currently only supports maintaining the same number of dimensions.
To use 1-D arrays, first promote them to shape (x,1).
Uses the same parameters and creates the same co-ordinate lookup points
as IDL''s congrid routine, which apparently originally came from a VAX/VMS
routine of the same name.
method:
neighbour - closest value from original data
nearest and linear - uses n x 1-D interpolations using
scipy.interpolate.interp1d
(see Numerical Recipes for validity of use of n 1-D interpolations)
spline - uses ndimage.map_coordinates
centre:
True - interpolation points are at the centres of the bins
False - points are at the front edge of the bin
minusone:
For example- inarray.shape = (i,j) & new dimensions = (x,y)
False - inarray is resampled by factors of (i/x) * (j/y)
True - inarray is resampled by(i-1)/(x-1) * (j-1)/(y-1)
This prevents extrapolation one element beyond bounds of input array.
'''
if not a.dtype in [np.float64, np.float32]:
a = np.cast[float](a)
m1 = np.cast[int](minusone)
ofs = np.cast[int](centre) * 0.5
old = np.array( a.shape )
ndims = len( a.shape )
if len( newdims ) != ndims:
print ("[congrid] dimensions error. "
"This routine currently only support "
"rebinning to the same number of dimensions.")
return None
newdims = np.asarray( newdims, dtype=float )
dimlist = []
if method == 'neighbour':
for i in range( ndims ):
base = np.indices(newdims)[i]
dimlist.append( (old[i] - m1) / (newdims[i] - m1) \
* (base + ofs) - ofs )
cd = np.array( dimlist ).round().astype(int)
newa = a[list( cd )]
return newa
elif method in ['nearest','linear']:
# calculate new dims
for i in range( ndims ):
base = np.arange( newdims[i] )
dimlist.append( (old[i] - m1) / (newdims[i] - m1) \
* (base + ofs) - ofs )
# specify old dims
olddims = [np.arange(i, dtype = np.float) for i in list( a.shape )]
# first interpolation - for ndims = any
mint = scipy.interpolate.interp1d( olddims[-1], a, kind=method )
newa = mint( dimlist[-1] )
trorder = [ndims - 1] + range( ndims - 1 )
for i in range( ndims - 2, -1, -1 ):
newa = newa.transpose( trorder )
mint = scipy.interpolate.interp1d( olddims[i], newa, kind=method )
newa = mint( dimlist[i] )
if ndims > 1:
# need one more transpose to return to original dimensions
newa = newa.transpose( trorder )
return newa
elif method in ['spline']:
oslices = [ slice(0,j) for j in old ]
oldcoords = np.ogrid[oslices]
nslices = [ slice(0,j) for j in list(newdims) ]
newcoords = np.mgrid[nslices]
newcoords_dims = [i for i in range(np.rank(newcoords))]
#make first index last
newcoords_dims.append(newcoords_dims.pop(0))
newcoords_tr = newcoords.transpose(newcoords_dims)
# makes a view that affects newcoords
newcoords_tr += ofs
deltas = (np.asarray(old) - m1) / (newdims - m1)
newcoords_tr *= deltas
newcoords_tr -= ofs
newa = scipy.ndimage.map_coordinates(a, newcoords)
return newa
else:
print("Congrid error: Unrecognized interpolation type.\n",
"Currently only \'neighbour\', \'nearest\',\'linear\',",
"and \'spline\' are supported.")
return None
class BaseModel(object):
'''Model to be extended.'''
def __init__(self, net, args):
self.set_net(net)
self.args = args
def set_net(self, net):
self.net = net
self.net_input = net.layers[0].input
self.layer_map = dict([(layer.name, layer) for layer in self.net.layers])
self._f_layer_outputs = {}
def build(self, a_image, ap_image, b_image, output_shape):
self.output_shape = output_shape
loss = self.build_loss(a_image, ap_image, b_image)
# get the gradients of the generated image wrt the loss
grads = K.gradients(loss, self.net_input)
outputs = [loss]
if type(grads) in {list, tuple}:
outputs += grads
else:
outputs.append(grads)
self.f_outputs = K.function([self.net_input], outputs)
def build_loss(self, a_image, ap_image, b_image):
'''Create an expression for the loss as a function of the image inputs.'''
loss = K.variable(0.0)
# get the symbolic outputs of each "key" layer (we gave them unique names).
loss += self.args.tv_weight * total_variation_loss(self.net_input)
return loss
def precompute_static_features(self, a_image, ap_image, b_image):
# figure out which layers we need to extract
a_layers, ap_layers, b_layers = set(), set(), set()
if self.args.analogy_weight:
for layerset in (a_layers, ap_layers, b_layers):
layerset.update(analogy_layers)
if self.args.mrf_weight:
ap_layers.update(mrf_layers)
if self.args.b_bp_content_weight:
b_layers.update(feature_layers)
if self.args.style_weight:
ap_layers.add(args.content_layer)
# let's get those features
all_a_features = self.get_features(a_image, a_layers)
all_ap_image_features = self.get_features(ap_image, ap_layers)
all_b_features = self.get_features(b_image, b_layers)
return all_a_features, all_ap_image_features, all_b_features
def get_features(self, x, layers):
if not layers:
return None
f = K.function([self.net_input], [self.get_layer_output(layer_name) for layer_name in layers])
feature_outputs = f([x])
features = dict(zip(layers, feature_outputs))
return features
def get_f_layer(self, layer_name):
return K.function([self.net_input], [self.get_layer_output(layer_name)])
def get_layer_output(self, name):
if not name in self._f_layer_outputs:
layer = self.layer_map[name]
self._f_layer_outputs[name] = layer.output
return self._f_layer_outputs[name]
def get_layer_output_shape(self, name):
layer = self.layer_map[name]
return layer.output_shape
def eval_loss_and_grads(self, x):
x = x.reshape(self.output_shape)
outs = self.f_outputs([x])
loss_value = outs[0]
if len(outs[1:]) == 1:
grad_values = outs[1].flatten().astype('float64')
else:
grad_values = np.array(outs[1:]).flatten().astype('float64')
return loss_value, grad_values
class NNFModel(BaseModel):
'''Faster model for image analogies.'''
def build(self, a_image, ap_image, b_image, output_shape):
self.output_shape = output_shape
loss = self.build_loss(a_image, ap_image, b_image)
# get the gradients of the generated image wrt the loss
grads = K.gradients(loss, self.net_input)
outputs = [loss]
if type(grads) in {list, tuple}:
outputs += grads
else:
outputs.append(grads)
f_inputs = [self.net_input]
for nnf in self.feature_nnfs:
f_inputs.append(nnf.placeholder)
self.f_outputs = K.function(f_inputs, outputs)
def eval_loss_and_grads(self, x):
x = x.reshape(self.output_shape)
f_inputs = [x]
# update the patch indexes
start_t = time.time()
for nnf in self.feature_nnfs:
nnf.update(x, num_steps=self.args.mrf_nnf_steps)
new_target = nnf.matcher.get_reconstruction()
f_inputs.append(new_target)
print('PatchMatch update in {:.2f} seconds'.format(time.time() - start_t))
# run it through
outs = self.f_outputs(f_inputs)
loss_value = outs[0]
if len(outs[1:]) == 1:
grad_values = outs[1].flatten().astype('float64')
else:
grad_values = np.array(outs[1:]).flatten().astype('float64')
return loss_value, grad_values
def build_loss(self, a_image, ap_image, b_image):
'''Create an expression for the loss as a function of the image inputs.'''
print('Building loss...')
loss = super(NNFModel, self).build_loss(a_image, ap_image, b_image)
# Precompute static features for performance
print('Precomputing static features...')
all_a_features, all_ap_image_features, all_b_features = self.precompute_static_features(a_image, ap_image, b_image)
print('Building and combining losses...')
if self.args.analogy_weight:
for layer_name in analogy_layers:
a_features = all_a_features[layer_name][0]
ap_image_features = all_ap_image_features[layer_name][0]
b_features = all_b_features[layer_name][0]
# current combined output
layer_features = self.get_layer_output(layer_name)
combination_features = layer_features[0, :, :, :]
al = nnf_analogy_loss(
a_features, ap_image_features, b_features, combination_features,
num_steps=self.args.analogy_nnf_steps, patch_size=self.args.patch_size,
patch_stride=self.args.patch_stride, jump_size=1.0)
loss += (self.args.analogy_weight / len(analogy_layers)) * al
existing_feature_nnfs = getattr(self, 'feature_nnfs', [None] * len(mrf_layers))
self.feature_nnfs = []
if self.args.mrf_weight:
for layer_name, existing_nnf in zip(mrf_layers, existing_feature_nnfs):
ap_image_features = all_ap_image_features[layer_name][0]
# current combined output
layer_features = self.get_layer_output(layer_name)
combination_features = layer_features[0, :, :, :]
input_shape = self.get_layer_output_shape(layer_name)
if existing_nnf and not self.args.randomize_mnf_nnf:
matcher = existing_nnf.matcher.scale((input_shape[3], input_shape[2], input_shape[1]), ap_image_features)
else:
matcher = PatchMatcher(
(input_shape[3], input_shape[2], input_shape[1]), ap_image_features,
patch_size=self.args.patch_size, jump_size=1.0, patch_stride=self.args.patch_stride)
nnf = NNFState(matcher, self.get_f_layer(layer_name))
self.feature_nnfs.append(nnf)
sl = content_loss(combination_features, nnf.placeholder)
loss += (self.args.mrf_weight / len(mrf_layers)) * sl
if self.args.content_weight:
for layer_name in feature_layers:
b_features = K.variable(all_b_features[layer_name][0])
# current combined output
bp_features = self.get_layer_output(layer_name)
cl = content_loss(bp_features, b_features)
loss += self.args.content_weight / len(feature_layers) * cl
if self.args.style_weight != 0.0:
#for layer_name in self.args.content_layer:
layer_name = self.args.content_layer
ap_image_features = K.variable(all_ap_image_features[layer_name][0])
layer_features = self.get_layer_output(layer_name)
layer_shape = self.get_layer_output_shape(layer_name)
# current combined output
combination_features = layer_features[0, :, :, :]
nsl = style_loss(ap_image_features, combination_features)
loss += (self.args.style_weight / len(self.args.content_layer)) * nsl
return loss
class AnalogyModel(BaseModel):
'''Brute Force Model for image analogies.'''
def build_loss(self, a_image, ap_image, b_image):
'''Create an expression for the loss as a function of the image inputs.'''
print('Building loss...')
loss = super(AnalogyModel, self).build_loss(a_image, ap_image, b_image)
# Precompute static features for performance
print('Precomputing static features...')
all_a_features, all_ap_image_features, all_b_features = self.precompute_static_features(a_image, ap_image, b_image)
print('Building and combining losses...')
if self.args.analogy_weight != 0.0:
for layer_name in analogy_layers:
a_features = all_a_features[layer_name][0]
ap_image_features = all_ap_image_features[layer_name][0]
b_features = all_b_features[layer_name][0]
# current combined output
layer_features = self.get_layer_output(layer_name)
combination_features = layer_features[0, :, :, :]
al = analogy_loss(a_features, ap_image_features,
b_features, combination_features,
use_full_analogy=self.args.use_full_analogy,
patch_size=self.args.patch_size,
patch_stride=self.args.patch_stride)
loss += (self.args.analogy_weight / len(self.args.analogy_layers)) * al
if self.args.mrf_weight != 0.0:
for layer_name in mrf_layers:
ap_image_features = K.variable(all_ap_image_features[layer_name][0])
layer_features = self.get_layer_output(layer_name)
# current combined output
combination_features = layer_features[0, :, :, :]
sl = mrf_loss(ap_image_features, combination_features,
patch_size=self.args.patch_size,
patch_stride=self.args.patch_stride)
loss += (self.args.mrf_weight / len(self.args.mrf_layers)) * sl
if self.args.b_bp_content_weight != 0.0:
for layer_name in feature_layers:
b_features = K.variable(all_b_features[layer_name][0])
# current combined output
bp_features = self.get_layer_output(layer_name)
cl = content_loss(bp_features, b_features)
loss += self.args.content_weight / len(feature_layers) * cl
if self.args.style_weight != 0.0:
for layer_name in self.args.content_layer:
ap_image_features = K.variable(all_ap_image_features[layer_name][0])
layer_features = self.get_layer_output(layer_name)
layer_shape = self.get_layer_output_shape(layer_name)
# current combined output
combination_features = layer_features[0, :, :, :]
nsl = style_loss(ap_image_features, combination_features)
loss += (self.args.style_weight / len(self.args.content_layer)) * nsl
return loss
"""
This is the beginning of the program
"""
parser = argparse.ArgumentParser(description='Neural style transfer with Keras.')
parser.add_argument('style_image', metavar='base', type=str,
help='Path to the image to transform.')
parser.add_argument('style_image_mask_path', metavar='base_mask', type=str,
help='Path to the image mask.')
parser.add_argument('content_image_path', metavar='ref', type=str,
help='Path to the style reference image.')
parser.add_argument('result_prefix', metavar='res_prefix', type=str,
help='Prefix for the saved results.')
parser.add_argument("--image_size", dest="img_size", default=512, type=int, help='Output Image size')
parser.add_argument("--content_weight", dest="content_weight", default=0.025, type=float, help="Weight of content") # 0.025
parser.add_argument("--style_weight", dest="style_weight", default=1, type=float, help="Weight of content") # 1.0
parser.add_argument("--style_scale", dest="style_scale", default=1.0, type=float, help="Scale the weightage of the style") # 1, 0.5, 2
parser.add_argument("--total_variation_weight", dest="tv_weight", default=1e-3, type=float, help="Total Variation in the Weights") # 1.0
parser.add_argument("--num_iter", dest="num_iter", default=10, type=int, help="Number of iterations")
parser.add_argument("--rescale_image", dest="rescale_image", default="True", type=str, help="Rescale image after execution to original dimentions")
parser.add_argument("--rescale_method", dest="rescale_method", default="bilinear", type=str, help="Rescale image algorithm")
parser.add_argument("--maintain_aspect_ratio", dest="maintain_aspect_ratio", default="True", type=str, help="Maintain aspect ratio of image")
parser.add_argument("--content_layer", dest="content_layer", default="conv5_2", type=str, help="Optional 'conv4_2'")
parser.add_argument("--init_image", dest="init_image", default="content", type=str, help="Initial image used to generate the final image. Options are 'content' or 'noise")
parser.add_argument('--analogy-w', dest='analogy_weight', type=float, default=1.0, help='Weight for analogy loss.')
parser.add_argument('--analogy-layers', dest='analogy_layers', default=['conv3_1', 'conv4_1'], help='Comma-separated list of layer names to be used for the analogy loss')
parser.add_argument('--use-full-analogy', dest='use_full_analogy', action="store_true", help='Use the full set of analogy patches (slower/more memory but maybe more accurate)')
parser.add_argument('--mrf-w', dest='mrf_weight', type=float, default=1, help='Weight for MRF loss between A\' and B\'')
parser.add_argument('--mrf-layers', dest='mrf_layers', default=['conv3_1', 'conv4_1'], help='Comma-separated list of layer names to be used for the MRF loss')
parser.add_argument('--b-content-w', dest='b_bp_content_weight', type=float, default=4.0, help='Weight for content loss between B and B\'')
parser.add_argument('--mrf-nnf-steps', dest='mrf_nnf_steps', type=int, default=5, help='Number of patchmatch updates per iteration for local coherence loss.')
parser.add_argument('--analogy-nnf-steps', dest='analogy_nnf_steps', type=int, default=15, help='Number of patchmatch updates for the analogy loss (done once per scale).')
parser.add_argument('--patch-stride', dest='patch_stride', type=int, default=1, help='Patch stride used for matching. Currently required to be 1.')
parser.add_argument('--randomize-mrf-nnf', dest='randomize_mnf_nnf', action='store_true', help='Randomize the local coherence similarity matrix at the start of a new scale instead of scaling it up.')
parser.add_argument('--patch-size', dest='patch_size', type=int, default=1, help='Patch size used for matching.')
args = parser.parse_args()
style_image_path = args.style_image
style_image_mask_path = args.style_image_mask_path
content_image_path = args.content_image_path
result_prefix = args.result_prefix
weights_path = r"vgg16_weights.h5"
def strToBool(v):
return v.lower() in ("true", "yes", "t", "1")
rescale_image = strToBool(args.rescale_image)
maintain_aspect_ratio = strToBool(args.maintain_aspect_ratio)
# these are the weights of the different loss components
total_variation_weight = args.tv_weight
style_weight = args.style_weight * args.style_scale
content_weight = args.content_weight
# dimensions of the generated picture.
img_width = img_height = args.img_size
assert img_height == img_width, 'Due to the use of the Gram matrix, width and height must match.'
analogy_layers =['conv4_1'] # 'conv1_1','conv2_1','conv3_1', 'conv4_1'
mrf_layers = ['conv4_1', 'conv5_1'] # 'conv3_1', 'conv4_1'
img_WIDTH = img_HEIGHT = 0
aspect_ratio = 0
b_scale_ratio_width = b_scale_ratio_height = 0
# util function to open, resize and format pictures into appropriate tensors
def preprocess_image(image_path, load_dims=False, style_image=False):
global img_WIDTH, img_HEIGHT, aspect_ratio, b_scale_ratio_height, b_scale_ratio_width
img = imread(image_path, mode="RGB") # Prevents crashes due to PNG images (ARGB)
if load_dims:
img_WIDTH = img.shape[0]
img_HEIGHT = img.shape[1]
aspect_ratio = img_HEIGHT / img_WIDTH
if style_image:
b_scale_ratio_width = float(img.shape[0]) / img_WIDTH
b_scale_ratio_height = float(img.shape[1]) / img_HEIGHT
img = imresize(img, (img_width, img_height))
img = img.transpose((2, 0, 1)).astype('float64')
img = np.expand_dims(img, axis=0)
return img
# util function to convert a tensor into a valid image
def deprocess_image(x):
x = x.transpose((1, 2, 0))
x = np.clip(x, 0, 255).astype('uint8')
return x
def make_patches(x, patch_size, patch_stride):
'''Break image `x` up into a bunch of patches.'''
from theano.tensor.nnet.neighbours import images2neibs
x = K.expand_dims(x, 0)
patches = images2neibs(x,
(patch_size, patch_size), (patch_stride, patch_stride),
mode='valid')
# neibs are sorted per-channel
patches = K.reshape(patches, (K.shape(x)[1], K.shape(patches)[0] // K.shape(x)[1], patch_size, patch_size))
patches = K.permute_dimensions(patches, (1, 0, 2, 3))
patches_norm = K.sqrt(K.sum(K.square(patches), axis=(1,2,3), keepdims=True))
return patches, patches_norm
def combine_patches(patches, out_shape):
'''Reconstruct an image from these `patches`'''
patches = patches.transpose(0, 2, 3, 1)
recon = reconstruct_from_patches_2d(patches, out_shape)
return recon.transpose(2, 0, 1)
def find_patch_matches(a, a_norm, b):
'''For each patch in A, find the best matching patch in B'''
# we want cross-correlation here so flip the kernels
convs = K.conv2d(a, b[:, :, ::-1, ::-1], border_mode='valid')
argmax = K.argmax(convs / a_norm, axis=1)
return argmax
def find_analogy_patches(a, a_prime, b, patch_size=3, patch_stride=1):
'''This is for precalculating the analogy_loss
Since A, A', and B never change we only need to calculate the patch matches once.
'''
# extract patches from feature maps
a_patches, a_patches_norm = make_patches(K.variable(a), patch_size, patch_stride)
a_prime_patches, a_prime_patches_norm = make_patches(K.variable(a_prime), patch_size, patch_stride)
b_patches, b_patches_norm = make_patches(K.variable(b), patch_size, patch_stride)
# find best patches and calculate loss
p = find_patch_matches(b_patches, b_patches_norm, a_patches / a_patches_norm)
#best_patches = a_prime_patches[p]
best_patches = K.reshape(a_prime_patches[p], K.shape(b_patches))
f = K.function([], best_patches)
best_patches = f([])
return best_patches
def make_patches_grid(x, patch_size, patch_stride):
'''Break image `x` up into a grid of patches.
input shape: (channels, rows, cols)
output shape: (rows, cols, channels, patch_rows, patch_cols)
'''
from theano.tensor.nnet.neighbours import images2neibs # TODO: all K, no T
x = K.expand_dims(x, 0)
xs = K.shape(x)
num_rows = 1 + (xs[-2] - patch_size) // patch_stride
num_cols = 1 + (xs[-1] - patch_size) // patch_stride
num_channels = xs[-3]
patches = images2neibs(x,
(patch_size, patch_size), (patch_stride, patch_stride),
mode='valid')
# neibs are sorted per-channel
patches = K.reshape(patches, (num_channels, K.shape(patches)[0] // num_channels, patch_size, patch_size))
patches = K.permute_dimensions(patches, (1, 0, 2, 3))
# arrange in a 2d-grid (rows, cols, channels, px, py)
patches = K.reshape(patches, (num_rows, num_cols, num_channels, patch_size, patch_size))
patches_norm = K.sqrt(K.sum(K.square(patches), axis=(2,3,4), keepdims=True))
return patches, patches_norm
# get tensor representations of our images
content_img = preprocess_image(content_image_path, load_dims=True)
style_img = preprocess_image(style_image_path, style_image=True)
style_img_mask = preprocess_image(style_image_mask_path)
style_image_tensor = K.variable(style_img)
style_image_mask_tensor = K.variable(style_img_mask)
content_image_tensor = K.variable(content_img)
# this will contain our generated image
combination_image = K.placeholder((1, 3, img_width, img_height))
# combine the 4 images into a single Keras tensor
input_tensor = K.concatenate([style_image_tensor,
style_image_mask_tensor,
content_image_tensor,
combination_image], axis=0)
# build the VGG16 network with our 3 images as input
first_layer = ZeroPadding2D((1, 1), )
first_layer.set_input(input_tensor, shape=(4, 3, img_width, img_height))
model = Sequential()
model.add(first_layer)
model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_1'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_2'))
model.add(AveragePooling2D((2, 2), strides=(2, 2)))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_1'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_2'))
model.add(AveragePooling2D((2, 2), strides=(2, 2)))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_1'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_2'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_3'))
model.add(AveragePooling2D((2, 2), strides=(2, 2)))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_1'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_2'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_3'))
model.add(AveragePooling2D((2, 2), strides=(2, 2)))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_1'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_2'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_3'))
model.add(AveragePooling2D((2, 2), strides=(2, 2)))
# load the weights of the VGG16 networks
# (trained on ImageNet, won the ILSVRC competition in 2014)
# note: when there is a complete match between your model definition
# and your weight savefile, you can simply call model.load_weights(filename)
assert os.path.exists(weights_path), 'Model weights not found (see "weights_path" variable in script).'
f = h5py.File(weights_path)
for k in range(f.attrs['nb_layers']):
if k >= len(model.layers):
# we don't look at the last (fully-connected) layers in the savefile
break
g = f['layer_{}'.format(k)]
weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])]
model.layers[k].set_weights(weights)
f.close()
print('Model loaded.')
# get the symbolic outputs of each "key" layer (we gave them unique names).
outputs_dict = dict([(layer.name, layer.output) for layer in model.layers])
# compute the neural style loss
# first we need to define 4 util functions
# the gram matrix of an image tensor (feature-wise outer product)
def gram_matrix(x):
assert K.ndim(x) == 3
if K.image_dim_ordering() == "th":
features = K.batch_flatten(x)
else:
features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))
gram = K.dot(features, K.transpose(features))
return gram
# the "style loss" is designed to maintain
# the style of the reference image in the generated image.
# It is based on the gram matrices (which capture style) of
# feature maps from the style reference image
# and from the generated image
def style_loss(style, combination):
assert K.ndim(style) == 3
assert K.ndim(combination) == 3
S = gram_matrix(style)
C = gram_matrix(combination)
channels = 3
size = img_width * img_height
return K.sum(K.square(S - C)) / (4. * (channels ** 2) * (size ** 2))
# an auxiliary loss function
# designed to maintain the "content" of the
# base image in the generated image
def content_loss(base, combination):
return K.sum(K.square(combination - base))
# the 3rd loss function, total variation loss,
# designed to keep the generated image locally coherent
def total_variation_loss(x):
assert K.ndim(x) == 4
a = K.square(x[:, :, 1:, :img_width - 1] - x[:, :, :img_height - 1, :img_width - 1])
b = K.square(x[:, :, :img_height - 1, 1:] - x[:, :, :img_width - 1, :img_height - 1])
#a = K.square(x[:, :, :img_width-1, :img_height-1] - x[:, :, 1:, :img_height-1])
#b = K.square(x[:, :, :img_width-1, :img_height-1] - x[:, :, :img_width-1, 1:])
return K.sum(K.pow(a + b, 1.25))
def analogy_loss(a, a_prime, b, b_prime, patch_size=3, patch_stride=1, use_full_analogy=False):
'''http://www.mrl.nyu.edu/projects/image-analogies/index.html'''
best_a_prime_patches = find_analogy_patches(a, a_prime, b, patch_size=patch_size, patch_stride=patch_stride)
if use_full_analogy: # combine all the patches into a single image
b_prime_patches, _ = make_patches(b_prime, patch_size, patch_stride)
loss = content_loss(best_a_prime_patches, b_prime_patches) / patch_size ** 2
else:
bs = b.shape
b_analogy = combine_patches(best_a_prime_patches, (bs[1], bs[2], bs[0]))
loss = content_loss(np.expand_dims(b_analogy, 0), b_prime)
return loss
def mrf_loss(source, combination, patch_size=3, patch_stride=1):
'''CNNMRF http://arxiv.org/pdf/1601.04589v1.pdf'''
# extract patches from feature maps
combination_patches, combination_patches_norm = make_patches(combination, patch_size, patch_stride)
source_patches, source_patches_norm = make_patches(source, patch_size, patch_stride)
# find best patches and calculate loss
patch_ids = find_patch_matches(combination_patches, combination_patches_norm, source_patches / source_patches_norm)
best_source_patches = K.reshape(source_patches[patch_ids], K.shape(combination_patches))
loss = K.sum(K.square(best_source_patches - combination_patches)) / patch_size ** 2
return loss
def nnf_analogy_loss(a, a_prime, b, b_prime, num_steps=5, jump_size=1.0, patch_size=1, patch_stride=1):
'''image shapes: (channels, rows, cols)
'''
bs = b.shape
matcher = PatchMatcher((bs[2], bs[1], bs[0]), a, jump_size=jump_size, patch_size=patch_size, patch_stride=patch_stride)
b_patches = matcher.get_patches_for(b)
b_normed = matcher.normalize_patches(b_patches)
for i in range(num_steps):
matcher.update_with_patches(b_normed, reverse_propagation=bool(i % 2))
target = matcher.get_reconstruction(combined=a_prime)
loss = content_loss(target, b_prime)
return loss
class NNFState(object):
def __init__(self, matcher, f_layer):
self.matcher = matcher
mis = matcher.input_shape
self.placeholder = K.placeholder(mis[::-1])
self.f_layer = f_layer
def update(self, x, num_steps=5):
x_f = self.f_layer([x])[0]
x_patches = self.matcher.get_patches_for(x_f[0])
x_normed = self.matcher.normalize_patches(x_patches)
for i in range(num_steps):
self.matcher.update_with_patches(x_normed, reverse_propagation=bool(i % 2))
# combine these loss functions into a single scalar
loss = K.variable(0.)
layer_features = outputs_dict[args.content_layer] # 'conv5_2' or 'conv4_2'
base_image_features = layer_features[0, :, :, :]
combination_features = layer_features[3, :, :, :]
loss += content_weight * content_loss(base_image_features,
combination_features)
feature_layers = ['conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1']
for layer_name in feature_layers:
layer_features = outputs_dict[layer_name]
style_reference_features = layer_features[1, :, :, :]
combination_features = layer_features[3, :, :, :]
sl = style_loss(style_reference_features, combination_features)
loss += (style_weight / len(feature_layers)) * sl
loss += total_variation_weight * total_variation_loss(combination_image)
# get the gradients of the generated image wrt the loss
grads = K.gradients(loss, combination_image)
outputs = [loss]
if type(grads) in {list, tuple}:
outputs += grads
else:
outputs.append(grads)
f_outputs = K.function([combination_image], outputs)
def eval_loss_and_grads(x):
x = x.reshape((1, 3, img_width, img_height))
outs = f_outputs([x])
loss_value = outs[0]
if len(outs[1:]) == 1:
grad_values = outs[1].flatten().astype('float64')
else:
grad_values = np.array(outs[1:]).flatten().astype('float64')
return loss_value, grad_values
"""
"""
# this Evaluator class makes it possible
# to compute loss and gradients in one pass
# while retrieving them via two separate functions,
# "loss" and "grads". This is done because scipy.optimize
# requires separate functions for loss and gradients,
# but computing them separately would be inefficient.
class Evaluator(object):
def __init__(self, model):
self.loss_value = None
self.grads_values = None
self.model = model
def loss(self, x):
assert self.loss_value is None
loss_value, grad_values = self.model.eval_loss_and_grads(x)
self.loss_value = loss_value
self.grad_values = grad_values
return self.loss_value
def grads(self, x):
assert self.loss_value is not None
grad_values = np.copy(self.grad_values)
self.loss_value = None
self.grad_values = None
return grad_values
nnf = NNFModel(model, args)
nnf.build(style_img, style_img_mask, content_img, (1, 3, img_width, img_height))
#analogy = AnalogyModel(model, args)
#analogy.build(style_img, style_img_mask, content_img, (1, 3, img_width, img_height))
evaluator = Evaluator(nnf)
# run scipy-based optimization (L-BFGS) over the pixels of the generated image
# so as to minimize the neural style loss
assert args.init_image in ["content", "noise"] , "init_image must be one of ['original', 'noise']"
if "content" in args.init_image:
x = preprocess_image(content_image_path, True, )
else:
x = np.random.uniform(0, 255, (1, 3, img_width, img_height))
num_iter = args.num_iter
for i in range(num_iter):
print('Start of iteration', (i+1))
start_time = time.time()
x, min_val, info = fmin_l_bfgs_b(evaluator.loss, x.flatten(),
fprime=evaluator.grads, maxfun=20, maxiter=20, m=4)
print('Current loss value:', min_val)
# save current generated image
img = deprocess_image(x.reshape((3, img_width, img_height)))
if (maintain_aspect_ratio) & (not rescale_image):
img_ht = int(img_width * aspect_ratio)
print(img_width, img_ht)
print("Rescaling Image to (%d, %d)" % (img_width, img_ht))
img = imresize(img, (img_width, img_ht), interp=args.rescale_method)
if rescale_image:
print("Rescaling Image to (%d, %d)" % (img_WIDTH, img_HEIGHT))
img = imresize(img, (img_WIDTH, img_HEIGHT), interp=args.rescale_method)
fname = result_prefix + '_at_iteration_%d.png' % (i+1)
imsave(fname, img)
end_time = time.time()
print('Image saved as', fname)
print('Iteration %d completed in %ds' % (i+1, end_time - start_time))
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/neowoodley/Neural-Style-Transfer.git
git@gitee.com:neowoodley/Neural-Style-Transfer.git
neowoodley
Neural-Style-Transfer
Neural-Style-Transfer
master

搜索帮助