Markiewicz_praca/data_tranformations.py

132 lines
3.9 KiB
Python

from collections import defaultdict
import copy
import random
from PIL import Image
from numpy.core.fromnumeric import sort
with open("data.csv") as f:
raw_data = [line.replace("\n","").split(",") for line in f.readlines()]
columns=copy.deepcopy(raw_data[0])
raw_data=raw_data[1:]
def transform_csv(columns, raw_data):
count_data={}
for i,column in enumerate(columns):
values_count=defaultdict(lambda: 0)
values_percent={}
total=0
for row in raw_data:
values_count[row[i]]+=1
total+=1
for value in values_count.keys():
values_percent[value] = values_count[value]/total
count_data[column] = values_percent
return count_data
import timeit
time_result = timeit.timeit(lambda: transform_csv(columns,raw_data), number=1)
print(time_result*1000)
def generate_dataset(transformed_csv):
# print(transformed_csv)
legend = list(transformed_csv.keys())
series = [[entry[1] for entry in sorted(list(values.items()), key= lambda x: x[0])] for values in transformed_csv.values()]
return (legend,series)
def fig2img(fig):
import io
buf = io.BytesIO()
fig.savefig(buf, format="svg")
buf.seek(0)
plt.close(fig)
return buf
# importing package
import matplotlib.pyplot as plt
import numpy as np
def benchmark_linear_to_csv(results):
csv = ""
for k, v in results.items():
csv += f"{k}, {v}\n"
return csv
def benchmark_linear(start,end,function):
out = {}
for i in range(start,end):
out[i] = timeit.timeit(function, number=i)*1000
return out
# create data
def createLinePlot():
x = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
y = [150, 230, 224, 218, 135, 147, 260]
fig,ax1 = plt.subplots()
ax1.plot(x,y)
return fig
def createBarPlot():
np.random.seed(19680801)
plt.rcdefaults()
fig, ax = plt.subplots()
# Example data
people = ('Tom', 'Dick', 'Harry', 'Slim', 'Jim')
y_pos = np.arange(len(people))
performance = [1,2,3,4,5]
ax.barh(y_pos, performance, align='center')
ax.set_yticks(y_pos, labels=people)
ax.invert_yaxis()
ax.set_xlabel('Performance')
ax.set_title('How fast do you want to go today?')
return fig
def createPlotFromStratch():
x = ['1', '2', '3', '4', '5']
dataset = generate_dataset(transform_csv(columns,raw_data))
set_already = []
dataset = (dataset[0][0], dataset[1][0])
# print("dataset")
# print(dataset)
labels = dataset[0]
fig,ax = plt.subplots()
already = [0]
for bar_series in zip(x,dataset[1]):
# print(already)
# print(bar_series)
ax.barh(labels, bar_series[1], 0.1, label=bar_series[0], left=sum(already))
already=already+[bar_series[1]]
ax.set_ylabel("Wyniki")
ax.legend()
return fig
# print("Czas tworzenia i zapisywania 100 wykresów:")
# time_result = timeit.timeit(lambda: fig2img(createPlotFromStratch()), number=100)
# print(time_result*1000)
# print("Czas zapisywania 100 wykresów:")
# figure = createPlotFromStratch()
# time_result = timeit.timeit(lambda: fig2img(figure), number=100)
# print(time_result*1000)
with open("barPlotPython.svg", 'wb+') as f:
f.write(fig2img(createBarPlot()).read())
input()
print("Liniowy benchmark create from scratch")
def generate_and_close():
fig = createPlotFromStratch()
plt.close('all')
plt.close()
plt.cla()
plt.clf()
# print(benchmark_linear_to_csv(benchmark_linear(1,5,lambda:plt.close(createPlotFromStratch()) )))
print(benchmark_linear_to_csv(benchmark_linear(1,20,lambda:plt.close(createBarPlot()) )))
# print("Liniowy benchmark create from scratch + save")
# print(benchmark_linear_to_csv(benchmark_linear(1,20,lambda: fig2img(createPlotFromStratch()) )))
# print("Liniowy benchmark zapisywania do bufora")
# print(benchmark_linear_to_csv(benchmark_linear(1,25,lambda: fig2img(figure) )))