passed

2018-06-23 01:00:53 +02:00 · 2018-06-22 22:38:06 +02:00 · 2018-06-03 20:24:02 +02:00 · 2018-06-03 12:34:25 +02:00 · 2018-06-03 11:11:15 +02:00 · 2018-06-03 11:07:28 +02:00
16 changed files with 314 additions and 48 deletions
--- a/labs02/intro_task.py
+++ b/labs02/intro_task.py
@ -9,48 +9,55 @@ Zadania wprowadzające do pierwszych ćwiczeń.
 """
 Wypisz na ekran swoje imię i nazwisko.
 """
-
+print("Agnieszka Wagner")

 """
 Oblicz i wypisz na ekran pole koła o promienie 10. Jako PI przyjmij 3.14.
 """
-
+pole = 3.14 * (10.0 ** 2)
+print(pole)
 """
 Stwórz zmienną pole_kwadratu i przypisz do liczbę: pole kwadratu o boku 3.
 """
-
+pole_kwadratu = 3 ** 2
 """
 Stwórz 3 elementową listę, która zawiera nazwy 3 Twoich ulubionych owoców.
 Wynik przypisz do zmiennej `owoce`.
 """
-
+owoce = ['jabłko', 'gruszka', 'malina']
 """
 Dodaj do powyższej listy jako nowy element "pomidor".
 """
-
+owoce.append("pomidor")
+print(owoce)
 """
 Usuń z powyższej listy drugi element.
 """
-
-
+owoce.pop(1)
+print(owoce)
 """
 Rozszerz listę o tablice ['Jabłko', "Gruszka"].
 """
-
+owoce.append(['Jabłko', "Gruszka"])
+print(owoce)
 """
 Wyświetl listę owoce, ale bez pierwszego i ostatniego elementu.
 """
-
+print(owoce[1:-1])
 """
 Wyświetl co trzeci element z listy owoce.
 """
-
+print(owoce[::3])
 """
 Stwórz pusty słownik i przypisz go do zmiennej magazyn.
 """
-
+magazyn = {}
 """
 Dodaj do słownika magazyn owoce z listy owoce, tak, aby owoce były kluczami,
 zaś wartościami były równe 5.
 """

+for i in owoce:
+    magazyn[i] = 5
+
+print(magazyn)
--- a/labs02/task01.py
+++ b/labs02/task01.py
@ -7,7 +7,8 @@ która zawiera tylko elementy z list o parzystych indeksach.
 """

 def even_elements(lista):
-    pass
+    return(lista[::2])
+


 def tests(f):
@ -23,3 +24,4 @@ def tests(f):

 if __name__ == "__main__":
    print(tests(even_elements))
+
--- a/labs02/task02.py
+++ b/labs02/task02.py
@ -6,7 +6,10 @@
 """

 def days_in_year(days):
-    pass
+    if (days % 4 == 0 and ((days % 100 != 0) or (days % 400 == 0))):
+        return 366
+    else:
+        return 365

 def tests(f):
    inputs = [[2015], [2012], [1900], [2400], [1977]]
--- a/labs02/task03.py
+++ b/labs02/task03.py
@ -13,7 +13,13 @@ jak 'set', która przechowuje elementy bez powtórzeń.)


 def oov(text, vocab):
-    pass
+    flag = []
+    textSegm = set(text.split(' '))
+    for word in textSegm:
+        if word not in vocab:
+            flag.append(word)
+    return flag
+



@ -30,3 +36,9 @@ def tests(f):

 if __name__ == "__main__":
    print(tests(oov))
+
+
+text = "this is a string , which i will use for string testing"
+textSegm = set(text.split(' '))
+print(textSegm)
+len(textSegm)
--- a/labs02/task04.py
+++ b/labs02/task04.py
@ -7,7 +7,14 @@ Jeśli podany argument jest mniejszy od 1 powinna być zwracana wartość 0.
 """

 def sum_from_one_to_n(n):
-    pass
+    sum = 0
+    if n < 1:
+        return 0
+    else:
+        for i in range(n+1):
+            sum += i
+    return sum
+


 def tests(f):
--- a/labs02/task05.py
+++ b/labs02/task05.py
@ -10,7 +10,13 @@ np. odległość pomiędzy punktami (0, 0, 0) i (3, 4, 0) jest równa 5.
 """

 def euclidean_distance(x, y):
-    pass
+
+    sum = 0
+    for i in range(len(x)):
+        result = (x[i] - y[i])**2
+        sum += result
+    return(sum**0.5)
+

 def tests(f):
    inputs = [[(2.3, 4.3, -7.5), (2.3, 8.5, -7.5)]]
--- a/labs02/task06.py
+++ b/labs02/task06.py
@ -10,7 +10,10 @@ ma być zwracany napis "It's not a Big 'No!'".
 """

 def big_no(n):
-    pass
+    if n >= 5 :
+        return("N"+("O"*n)+"!")
+    else :
+        return("It's not a Big 'No!'")

 def tests(f):
    inputs = [[5], [6], [2]]
--- a/labs02/task07.py
+++ b/labs02/task07.py
@ -6,7 +6,10 @@ Napisz funkcję char_sum, która dla zadanego łańcucha zwraca
 sumę kodów ASCII znaków.
 """
 def char_sum(text):
-    pass
+    x = 0
+    for c in text:
+        x += ord(c)
+    return (x)

 def tests(f):
    inputs = [["this is a string"], ["this is another string"]]
--- a/labs02/task08.py
+++ b/labs02/task08.py
@ -7,7 +7,11 @@ przez 3 lub 5 mniejszych niż n.
 """

 def sum_div35(n):
-    pass
+    x = 0
+    for i in range(n):
+        if ( i % 3 == 0 or i % 5 == 0 ) :
+            x += i
+    return(x)

 def tests(f):
    inputs = [[10], [100], [3845]]
--- a/labs02/task09.py
+++ b/labs02/task09.py
@ -9,8 +9,15 @@ Np. leet('leet') powinno zwrócić '1337'.


 def leet_speak(text):
-    pass
-
+    if 'e' in text :
+        text = text.replace("e", "3")
+    if "l" in text :
+        text = text.replace("l", "1")
+    if "o" in text :
+        text = text.replace("o", "0")
+    if "t" in text :
+        text = text.replace("t", "7")
+    return(text)

 def tests(f):
    inputs = [['leet'], ['do not want']]
--- a/labs02/task10.py
+++ b/labs02/task10.py
@ -9,7 +9,13 @@ na wielką. Np. pokemon_speak('pokemon') powinno zwrócić 'PoKeMoN'.


 def pokemon_speak(text):
-    pass
+    if text[:].isupper() == True :
+        return(text)
+    else :
+        R = [''] * len(text)
+        R[::2], R[1::2] = text[::2].upper(), text[1::2].lower()
+        R = ''.join(R)
+        return(R)


 def tests(f):
--- a/labs02/task11.py
+++ b/labs02/task11.py
@ -9,8 +9,12 @@ Oba napisy będą składać się wyłacznie z małych liter.
 """

 def common_chars(string1, string2):
-    pass
-
+    string1 = "this is a string"
+    string2 = "ala ma kota"
+    s = set(string1.replace(" ", ""))
+    t = set(string2.replace(" ", ""))
+    intersect = s & t
+    return(sorted(list(intersect)))

 def tests(f):
    inputs = [["this is a string", "ala ma kota"]]
--- a/labs04/labs04Task5.py
+++ b/labs04/labs04Task5.py
@ -0,0 +1,21 @@
+import glob
+
+filelist = glob.glob('scores\\*.bleu')
+bleu_filename = ''
+max_bleu = 0
+
+
+def find_bleu(bleu_list, max_bleu):
+    for bleufile in bleu_list:
+        content = open(bleufile, 'r').read()
+        bleu = content.split(r',')
+        bleu_datum = bleu[0].split()
+        if max_bleu <= float(bleu_datum[2]):
+            max_bleu = float(bleu_datum[2])
+            bleu_filename = bleufile
+    return bleu_filename, max_bleu
+
+
+# filename, max_bleu = find_bleu([filelist[0]], max_bleu)
+filename, max_bleu = find_bleu(filelist, max_bleu)
+print(filename)
--- a/labs06/linearModel.py
+++ b/labs06/linearModel.py
@ -0,0 +1,56 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sklearn
+import pandas as pd
+import numpy as np
+
+dane = pd.read_csv("mieszkania.csv")
+print(dane.head())
+print(dane.columns)
+
+# check data for outliers
+from matplotlib import pyplot as plt
+plt.scatter(dane['SqrMeters'], dane['Expected'], color='g')
+plt.show()
+# remove all data points that have expected price <= 500.000 and living area <= 200 sqrt meters
+plt.scatter(dane['Rooms'], dane['Expected'], color='g')
+plt.show()
+# remove all data points that represent flats with more than 8 rooms
+
+flats = dane[(dane['Rooms'] < 10) & (dane['SqrMeters'] <= 200) & (dane['Expected'] <= 500000)]
+print(flats.head(20))
+
+y = flats['Expected']
+X = flats.drop(['Id', 'Expected', 'Floor', 'Location',
+               'Description', 'Unnamed: 7', 'Unnamed: 8', 'Unnamed: 9', 'Unnamed: 10', 'Unnamed: 11'], axis=1)
+print(y.head())
+print(X.head())
+
+
+from sklearn.model_selection import train_test_split
+
+train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.3, random_state=38, shuffle=True)
+
+from sklearn.linear_model import LinearRegression
+model = LinearRegression()
+model.fit(X,y)
+
+
+predicted = model.predict(test_X)
+print("Predictions:", predicted[:5])
+
+for p in zip(train_X.columns, model.coef_):
+    print("Intercept for {}: {:.3}".format(p[0], p[1]))
+
+from sklearn.metrics import mean_squared_error
+rmse = np.sqrt(mean_squared_error(predicted, test_y))
+print("RMSE:", rmse)
+
+r2 = model.score(test_X, test_y)
+
+print("R squared:", r2) # 0.54 comparing to 0.02 before cleaning the data
+
+
+
+
--- a/labs06/task02.py
+++ b/labs06/task02.py
@ -1,14 +1,22 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-

+import pandas as pd
+import matplotlib.pyplot as plt
+
 def wczytaj_dane():
-    pass
+    dane = pd.read_csv("mieszkania.csv")
+    print(dane.head())
+    return(dane)

 def most_common_room_number(dane):
-    pass
+    return(dane['Rooms'].value_counts().idxmax())
+
+

 def cheapest_flats(dane, n):
-    pass
+    p = dane.sort_values(['Expected'], ascending=[0])
+    p.head(7)

 def find_borough(desc):
    dzielnice = ['Stare Miasto',
@ -18,37 +26,139 @@ def find_borough(desc):
                 'Piątkowo',
                 'Winogrady',
                 'Miłostowo',
-                 'Dębiec']
-    pass
+                 'Dębiec',
+                 'Grunwald',
+                 'Nowe Miasto']
+
+
+    check = 0
+    for dzielnica in dzielnice:
+        if dzielnica in desc:
+            check = 1
+            save_dzielnica = dzielnica
+    if check == 1:
+        return(save_dzielnica)
+    else:
+        return("Inne")
+


 def add_borough(dane):
-    pass
+    dzielnice = ['Stare Miasto',
+                     'Wilda',
+                     'Jeżyce',
+                     'Rataje',
+                     'Piątkowo',
+                     'Winogrady',
+                     'Miłostowo',
+                     'Dębiec',
+                     'Grunwald',
+                     'Nowe Miasto']
+    Borough = []
+    column = dane['Location']
+    for item in column:
+        check = 0
+        for dzielnica in dzielnice:
+            if dzielnica in item:
+                check = 1
+                save_dzielnica = dzielnica
+        if check == 1:
+            Borough.append(save_dzielnica)
+        else:
+            Borough.append("Inne")
+
+    Borough = pd.DataFrame(Borough)
+
+    dane = pd.concat([dane.reset_index(drop=True), Borough], axis=1)
+    print(dane)

 def write_plot(dane, filename):
-    pass
+    dane.groupby('Borough')['Id'].nunique().plot(kind='bar')
+    plt.show()
+    plt.savefig('output.png')

 def mean_price(dane, room_number):
-    pass
+    p1 = dane[dane['Rooms'] == room_number]
+    p2 = p1['Expected']
+    return(p2.mean())

 def find_13(dane):
-    pass
+    p1 = dane[dane['Floor'] == 13]
+    p1.Location.unique()

 def find_best_flats(dane):
-    pass
+    p_index = dane['Location'].str.contains('Winogrady')
+    p = dane[p_index]
+    best_flats = p[(p['Rooms'] == 3) & (p['Floor'] == 1)]
+    print(best_flats)

 def main():
    dane = wczytaj_dane()
-    print(dane[:5])
+

    print("Najpopularniejsza liczba pokoi w mieszkaniu to: {}"
          .format(most_common_room_number(dane)))

-    print("{} to najłądniejsza dzielnica w Poznaniu."
-          .format(find_borough("Grunwald i Jeżyce"))))
+    print("{} to najładniejsza dzielnica w Poznaniu."
+          .format(find_borough("Grunwald i Jeżyce")))

    print("Średnia cena mieszkania 3-pokojowego, to: {}"
          .format(mean_price(dane, 3)))

 if __name__ == "__main__":
    main()
+
+
+
+
+# zadanie dodatkowe
+
+import sklearn
+import pandas as pd
+import numpy as np
+
+dane = pd.read_csv("mieszkania.csv")
+print(dane.head())
+print(dane.columns)
+
+# check data for outliers
+from matplotlib import pyplot as plt
+plt.scatter(dane['SqrMeters'], dane['Expected'], color='g')
+plt.show()
+# remove all data points that have expected price <= 500.000 and living area <= 200 sqrt meters
+plt.scatter(dane['Rooms'], dane['Expected'], color='g')
+plt.show()
+# remove all data points that represent flats with more than 8 rooms
+
+flats = dane[(dane['Rooms'] < 10) & (dane['SqrMeters'] <= 200) & (dane['Expected'] <= 500000)]
+print(flats.head(20))
+
+y = flats['Expected']
+X = flats.drop(['Id', 'Expected', 'Floor', 'Location',
+               'Description', 'Unnamed: 7', 'Unnamed: 8', 'Unnamed: 9', 'Unnamed: 10', 'Unnamed: 11'], axis=1)
+print(y.head())
+print(X.head())
+
+
+from sklearn.model_selection import train_test_split
+
+train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.3, random_state=38, shuffle=True)
+
+from sklearn.linear_model import LinearRegression
+model = LinearRegression()
+model.fit(X,y)
+
+
+predicted = model.predict(test_X)
+print("Predictions:", predicted[:5])
+
+for p in zip(train_X.columns, model.coef_):
+    print("Intercept for {}: {:.3}".format(p[0], p[1]))
+
+from sklearn.metrics import mean_squared_error
+rmse = np.sqrt(mean_squared_error(predicted, test_y))
+print("RMSE:", rmse)
+
+r2 = model.score(test_X, test_y)
+
+print("R squared:", r2) # 0.54 comparing to 0.02 before cleaning the data
--- a/labs06/tasks.py
+++ b/labs06/tasks.py
@ -4,77 +4,92 @@
 """
 1. Zaimportuj bibliotkę pandas jako pd.
 """
-
+import pandas as pd

 """
-2. Wczytaj zbiór danych `311.csv` do zniennej data.
+2. Wczytaj zbiór danych `311.csv` do zmiennej data.
 """

+data = pd.read_csv("311.csv", low_memory=False)

 """
 3. Wyświetl 5 pierwszych wierszy z data.
 """
-
+print(data.head())

 """
 4. Wyświetl nazwy kolumn.
 """

+print(data.columns)

 """
 5. Wyświetl ile nasz zbiór danych ma kolumn i wierszy.
 """
-
+shape = data.shape
+print(shape)

 """
 6. Wyświetl kolumnę 'City' z powyższego zbioru danych.
 """
-
+print(data['City'])

 """
 7. Wyświetl jakie wartoścu przyjmuje kolumna 'City'.
 """
+data.City.unique()

 """
 8. Wyświetl tabelę rozstawną kolumny City.
 """

-
+t = data.City.value_counts()
+print(t)
 """
 9. Wyświetl tylko pierwsze 4 wiersze z wcześniejszego polecenia.
 """
-
+t.head(4)

 """
 10. Wyświetl, w ilu przypadkach kolumna City zawiera NaN.
 """

-
+p = pd.DataFrame(data['City'].isnull())
+t = p[p['City'] == True]
+shape = t.shape
+rows = shape[0]
+print(rows)

 """
 11. Wyświetl data.info()
 """
-
+print(data.info())
 """
 12. Wyświetl tylko kolumny Borough i Agency i tylko 5 ostatnich linii.
 """
-
+print(data[['Borough', 'Agency']].tail())

 """
 13. Wyświetl tylko te dane, dla których wartość z kolumny Agency jest równa
 NYPD. Zlicz ile jest takich przykładów.
 """
-
+p = data[data['Agency'] == 'NYPD']
+p.Agency.value_counts()
 """
 14. Wyświetl wartość minimalną i maksymalną z kolumny Longitude.
 """
+data['Longitude'].max()
+data['Longitude'].min()

 """
 15. Dodaj kolumne diff, która powstanie przez sumowanie kolumn Longitude i Latitude.
 """

+data['diff'] = data['Longitude'] + data['Latitude']

 """
 16. Wyświetl tablę rozstawną dla kolumny 'Descriptor', dla której Agency jest
 równe NYPD.
 """
+p = data[data['Agency'] == 'NYPD']
+p.Descriptor.value_counts()
Author	SHA1	Message	Date
wagner.agnieszka	325549ab4a	passed	2018-06-23 01:00:53 +02:00
wagner.agnieszka	49fe26305c	passed	2018-06-22 22:38:06 +02:00
wagner.agnieszka	7a8dadbe3b	passed	2018-06-03 20:24:02 +02:00
s327689	91a014aa96	tasks	2018-06-03 12:34:25 +02:00
s327689	02c198ef0a	Merge branch 'master' of https://git.wmi.amu.edu.pl/s327689/Python2018	2018-06-03 11:11:15 +02:00
s327689	928056aa2e	tasks	2018-06-03 11:07:28 +02:00
s327689	fbc307f164	Merge branch 'master' of https://git.wmi.amu.edu.pl/tdwojak/Python2018	2018-06-03 10:10:58 +02:00
s327689	68c99447a9	tasks	2018-06-03 10:10:45 +02:00
wagner.agnieszka	f4e009d148	passed	2018-06-03 00:23:55 +02:00
wagner.agnieszka	bac748992f	passed	2018-06-01 19:28:21 +02:00
wagner.agnieszka	96214d2d3f	passed	2018-06-01 19:04:52 +02:00
wagner.agnieszka	6d503b377c	passed	2018-06-01 18:38:11 +02:00
wagner.agnieszka	fd1d22ef4d	passed	2018-06-01 17:55:41 +02:00
wagner.agnieszka	8e45dba2e5	passed	2018-06-01 17:32:25 +02:00
s327689	84ac4edce9	passed	2018-06-01 17:09:47 +02:00
s327689	7714575879	my results	2018-05-13 12:54:51 +02:00