From 074482998d0333a5b47c3e144f52e1fdd731f8ef Mon Sep 17 00:00:00 2001 From: s486797 Date: Sat, 10 Aug 2024 15:25:51 +0200 Subject: [PATCH] add initial code for app --- neural_style_app/.gitignore | 1 + neural_style_app/app.py | 99 +++++++ neural_style_app/mode_style_transfer.py | 258 ++++++++++++++++++ neural_style_app/templates/index.html | 71 +++++ neural_style_app/templates/visualize.html | 25 ++ .../neural_style_tutorial.ipynb | 65 ++++- 6 files changed, 512 insertions(+), 7 deletions(-) create mode 100644 neural_style_app/.gitignore create mode 100644 neural_style_app/app.py create mode 100644 neural_style_app/mode_style_transfer.py create mode 100644 neural_style_app/templates/index.html create mode 100644 neural_style_app/templates/visualize.html diff --git a/neural_style_app/.gitignore b/neural_style_app/.gitignore new file mode 100644 index 0000000..ba0430d --- /dev/null +++ b/neural_style_app/.gitignore @@ -0,0 +1 @@ +__pycache__/ \ No newline at end of file diff --git a/neural_style_app/app.py b/neural_style_app/app.py new file mode 100644 index 0000000..5bb27c6 --- /dev/null +++ b/neural_style_app/app.py @@ -0,0 +1,99 @@ +from flask import Flask, render_template, request, redirect, url_for, send_file, jsonify +from mode_style_transfer import StyleTransferModel, save_image, StyleTransferVisualizer +from PIL import Image +import io +import torch +from torchvision.models import vgg19, VGG19_Weights +import torchvision.transforms as transforms +import os +import matplotlib.pyplot as plt +import base64 + +app = Flask(__name__) + +# Image transformation +imsize = 512 if torch.cuda.is_available() else 128 +loader = transforms.Compose([ + transforms.Resize(imsize), + transforms.ToTensor() +]) + +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + +visualizations = [] + +def image_loader(image_bytes): + image = Image.open(io.BytesIO(image_bytes)) + image = loader(image).unsqueeze(0) + return image.to(device, torch.float) + +def tensor_to_image(tensor): + image = tensor.clone().detach().squeeze(0) + image = transforms.ToPILImage()(image) + return image + +def image_to_base64(image): + img_io = io.BytesIO() + image.save(img_io, 'JPEG') + img_io.seek(0) + return base64.b64encode(img_io.getvalue()).decode('utf-8') + + +@app.route('/', methods=['GET', 'POST']) +def index(): + if request.method == 'POST': + content_image_file = request.files['content_image'] + style_image_file = request.files['style_image'] + + # Load images directly from the uploaded files + content_image = Image.open(content_image_file) + style_image = Image.open(style_image_file) + + # Pass the images to the StyleTransferModel + style_transfer = StyleTransferModel(content_image, style_image) + output = style_transfer.run_style_transfer() + + # Convert the output tensor to an image + output_image = tensor_to_image(output) + + # Convert the image to Base64 for JSON response + image_base64 = image_to_base64(output_image) + + return jsonify({'image': image_base64}) + + return render_template('index.html') + +@app.route('/visualize', methods=['POST']) +def visualize(): + cnn = vgg19(weights=VGG19_Weights.DEFAULT).features.to(device).eval() + + cnn_normalization_mean = torch.tensor([0.485, 0.456, 0.406]).to(device) + cnn_normalization_std = torch.tensor([0.229, 0.224, 0.225]).to(device) + + content_image_bytes = visualizations[0] # The last saved content image + content_image = image_loader(content_image_bytes) + + style_transfer = StyleTransferModel(content_image, content_image) + + # Running the model for visualization purpose + input_img = content_image.clone().requires_grad_(True) + + model, _, _ = style_transfer.get_style_model_and_losses( + cnn, cnn_normalization_mean, cnn_normalization_std, content_image, content_image) + + layer_visualizations = [] + + # Run the image through each layer and store the output + for i, layer in enumerate(model): + input_img = layer(input_img) + with torch.no_grad(): + output_image = tensor_to_image(input_img.clamp(0, 1)) + img_io = io.BytesIO() + output_image.save(img_io, 'JPEG') + img_io.seek(0) + layer_visualizations.append(img_io.getvalue()) # Save the image bytes + + return render_template('visualize.html', visualizations=layer_visualizations) + +if __name__ == '__main__': + app.run(debug=True) diff --git a/neural_style_app/mode_style_transfer.py b/neural_style_app/mode_style_transfer.py new file mode 100644 index 0000000..1b89c75 --- /dev/null +++ b/neural_style_app/mode_style_transfer.py @@ -0,0 +1,258 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim + +from PIL import Image +import matplotlib.pyplot as plt + +import torchvision.transforms as transforms +from torchvision.models import vgg19, VGG19_Weights + +from torchvision import models +import matplotlib.pyplot as plt + +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +torch.set_default_device(device) + +def image_loader(image): + #image = Image.open(image_name) + # fake batch dimension required to fit network's input dimensions + + imsize = 512 if torch.cuda.is_available() else 128 # use small size if no GPU + + loader = transforms.Compose([ + transforms.Resize(imsize), # scale imported image + transforms.ToTensor()]) # transform it into a torch tensor + image = loader(image).unsqueeze(0) + return image.to(device, torch.float) + +def save_image(tensor, path): + image = tensor.clone().detach() + image = image.squeeze(0) + image = transforms.ToPILImage()(image) + image.save(path) + +class ContentLoss(nn.Module): + def __init__(self, target,): + super(ContentLoss, self).__init__() + # we 'detach' the target content from the tree used + # to dynamically compute the gradient: this is a stated value, + # not a variable. Otherwise the forward method of the criterion + # will throw an error. + self.target = target.detach() + def forward(self, input): + self.loss = F.mse_loss(input, self.target) + return input + +def gram_matrix(input): + a, b, c, d = input.size() # a=batch size(=1) + # b=number of feature maps + # (c,d)=dimensions of a f. map (N=c*d) + features = input.view(a * b, c * d) # resize F_XL into \hat F_XL + G = torch.mm(features, features.t()) # compute the gram product + # we 'normalize' the values of the gram matrix + # by dividing by the number of element in each feature maps. + return G.div(a * b * c * d) + +class StyleLoss(nn.Module): + def __init__(self, target_feature): + super(StyleLoss, self).__init__() + self.target = gram_matrix(target_feature).detach() + def forward(self, input): + G = gram_matrix(input) + self.loss = F.mse_loss(G, self.target) + return input + +#cnn = vgg19(weights=VGG19_Weights.DEFAULT).features.eval() + +#cnn_normalization_mean = torch.tensor([0.485, 0.456, 0.406]) +#cnn_normalization_std = torch.tensor([0.229, 0.224, 0.225]) + +# create a module to normalize input image so we can easily put it in a +# ``nn.Sequential`` +class Normalization(nn.Module): + def __init__(self, mean, std): + super(Normalization, self).__init__() + # .view the mean and std to make them [C x 1 x 1] so that they can + # directly work with image Tensor of shape [B x C x H x W]. + # B is batch size. C is number of channels. H is height and W is width. + self.mean = torch.tensor(mean).view(-1, 1, 1) + self.std = torch.tensor(std).view(-1, 1, 1) + def forward(self, img): + # normalize ``img`` + return (img - self.mean) / self.std + +# desired depth layers to compute style/content losses : +content_layers_default = ['conv_4'] +style_layers_default = ['conv_1', 'conv_2', 'conv_3', 'conv_4', 'conv_5'] + +def get_style_model_and_losses(cnn, normalization_mean, normalization_std, + style_img, content_img, + content_layers=content_layers_default, + style_layers=style_layers_default): + # normalization module + normalization = Normalization(normalization_mean, normalization_std) + + # just in order to have an iterable access to or list of content/style + # losses + content_losses = [] + style_losses = [] + + # assuming that ``cnn`` is a ``nn.Sequential``, so we make a new ``nn.Sequential`` + # to put in modules that are supposed to be activated sequentially + model = nn.Sequential(normalization) + + i = 0 # increment every time we see a conv + for layer in cnn.children(): + if isinstance(layer, nn.Conv2d): + i += 1 + name = 'conv_{}'.format(i) + elif isinstance(layer, nn.ReLU): + name = 'relu_{}'.format(i) + # The in-place version doesn't play very nicely with the ``ContentLoss`` + # and ``StyleLoss`` we insert below. So we replace with out-of-place + # ones here. + layer = nn.ReLU(inplace=False) + elif isinstance(layer, nn.MaxPool2d): + name = 'pool_{}'.format(i) + elif isinstance(layer, nn.BatchNorm2d): + name = 'bn_{}'.format(i) + else: + raise RuntimeError('Unrecognized layer: {}'.format(layer.__class__.__name__)) + + model.add_module(name, layer) + + if name in content_layers: + # add content loss: + target = model(content_img).detach() + content_loss = ContentLoss(target) + model.add_module("content_loss_{}".format(i), content_loss) + content_losses.append(content_loss) + + if name in style_layers: + # add style loss: + target_feature = model(style_img).detach() + style_loss = StyleLoss(target_feature) + model.add_module("style_loss_{}".format(i), style_loss) + style_losses.append(style_loss) + + # now we trim off the layers after the last content and style losses + for i in range(len(model) - 1, -1, -1): + if isinstance(model[i], ContentLoss) or isinstance(model[i], StyleLoss): + break + + model = model[:(i + 1)] + + return model, style_losses, content_losses + +def get_input_optimizer(input_img): + # this line to show that input is a parameter that requires a gradient + optimizer = optim.LBFGS([input_img]) + return optimizer + +class StyleTransferModel: + def __init__(self, content_img, style_img, num_steps=300, style_weight=1000000, content_weight=1): + self.content_img = content_img + self.style_img = style_img.resize(content_img.size) + #self.style_img = self.style_img.resize(self.content_img.size) + self.style_img = image_loader(self.style_img) + self.content_img = image_loader(self.content_img) + self.input_img = self.content_img.clone() + self.num_steps = num_steps + self.style_weight = style_weight + self.content_weight = content_weight + + self.cnn = vgg19(weights=VGG19_Weights.DEFAULT).features.to(device).eval() + self.cnn_normalization_mean = torch.tensor([0.485, 0.456, 0.406]).to(device) + self.cnn_normalization_std = torch.tensor([0.229, 0.224, 0.225]).to(device) + + def run_style_transfer(self): + print('Building the style transfer model..') + model, style_losses, content_losses = get_style_model_and_losses( + self.cnn, self.cnn_normalization_mean, self.cnn_normalization_std, + self.style_img, self.content_img) + + self.input_img.requires_grad_(True) + model.eval() + model.requires_grad_(False) + optimizer = get_input_optimizer(self.input_img) + + print('Optimizing..') + run = [0] + while run[0] <= self.num_steps: + def closure(): + with torch.no_grad(): + self.input_img.clamp_(0, 1) + + optimizer.zero_grad() + model(self.input_img) + + style_score = 0 + content_score = 0 + + for sl in style_losses: + style_score += sl.loss + for cl in content_losses: + content_score += cl.loss + + style_score *= self.style_weight + content_score *= self.content_weight + + loss = style_score + content_score + loss.backward() + + run[0] += 1 + if run[0] % 50 == 0: + print(f"run {run[0]}:") + print(f'Style Loss : {style_score.item():4f} Content Loss: {content_score.item():4f}') + print() + + return style_score + content_score + + optimizer.step(closure) + + with torch.no_grad(): + self.input_img.clamp_(0, 1) + + return self.input_img + + +class StyleTransferVisualizer(StyleTransferModel): + def __init__(self, content_img, style_img): + super().__init__(content_img, style_img) + self.model_layers = self.get_model_layers() + + def get_model_layers(self): + cnn = models.vgg19(pretrained=True).features.to(self.device).eval() + model_layers = [] + i = 0 + for layer in cnn.children(): + if isinstance(layer, torch.nn.Conv2d): + i += 1 + model_layers.append((f'conv_{i}', layer)) + return model_layers + + def visualize_layers(self): + fig, axs = plt.subplots(len(self.model_layers), 3, figsize=(15, 20)) + + input_img = self.content_img.clone().detach() + + for idx, (name, layer) in enumerate(self.model_layers): + input_img = layer(input_img) + axs[idx, 0].imshow(self.content_img.squeeze(0).permute(1, 2, 0).cpu().numpy()) + axs[idx, 0].set_title("Original Image") + axs[idx, 0].axis('off') + + axs[idx, 1].imshow(input_img.squeeze(0).permute(1, 2, 0).cpu().detach().numpy()) + axs[idx, 1].set_title(f"After {name}") + axs[idx, 1].axis('off') + + combined = input_img.clone() + combined += self.style_img.squeeze(0) + axs[idx, 2].imshow(combined.permute(1, 2, 0).cpu().detach().numpy()) + axs[idx, 2].set_title(f"Combined (Content + Style) after {name}") + axs[idx, 2].axis('off') + + plt.tight_layout() + plt.show() \ No newline at end of file diff --git a/neural_style_app/templates/index.html b/neural_style_app/templates/index.html new file mode 100644 index 0000000..eb5723b --- /dev/null +++ b/neural_style_app/templates/index.html @@ -0,0 +1,71 @@ + + + + + + Style Transfer + + + +

Style Transfer

+
+ +

+ + +

+ + +
+ + +
+

Resulting Image:

+
+ + + +
+ + + + diff --git a/neural_style_app/templates/visualize.html b/neural_style_app/templates/visualize.html new file mode 100644 index 0000000..4e57618 --- /dev/null +++ b/neural_style_app/templates/visualize.html @@ -0,0 +1,25 @@ + + + + + + Visualize Layers + + +

Layer Visualizations

+

Select a layer to view the image before and after processing through that layer:

+ + {% for i in range(visualizations|length) %} +
+

Layer {{ i + 1 }}

+ +
+ {% endfor %} + +

Layer Output:

+ Layer Image Output + + + diff --git a/neural_style_pytorch/neural_style_tutorial.ipynb b/neural_style_pytorch/neural_style_tutorial.ipynb index d16315a..f2a3c2a 100644 --- a/neural_style_pytorch/neural_style_tutorial.ipynb +++ b/neural_style_pytorch/neural_style_tutorial.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 135, + "execution_count": 1, "metadata": { "collapsed": false }, @@ -69,7 +69,7 @@ }, { "cell_type": "code", - "execution_count": 136, + "execution_count": 2, "metadata": { "collapsed": false }, @@ -91,11 +91,22 @@ }, { "cell_type": "code", - "execution_count": 137, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#torch.cuda.is_available()" + "torch.cuda.is_available()" ] }, { @@ -113,7 +124,7 @@ }, { "cell_type": "code", - "execution_count": 138, + "execution_count": 4, "metadata": { "collapsed": false }, @@ -123,6 +134,26 @@ "torch.set_default_device(device)" ] }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "device(type='cuda')" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "device" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -150,7 +181,7 @@ }, { "cell_type": "code", - "execution_count": 154, + "execution_count": 6, "metadata": { "collapsed": false }, @@ -181,6 +212,26 @@ " \"we need to import style and content images of the same size\"\n" ] }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "512" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "imsize" + ] + }, { "cell_type": "markdown", "metadata": {},