16 changed files with 48 additions and 314 deletions
--- a/labs02/intro_task.py
+++ b/labs02/intro_task.py
@ -9,55 +9,48 @@ Zadania wprowadzające do pierwszych ćwiczeń.
 """
 Wypisz na ekran swoje imię i nazwisko.
 """
-print("Agnieszka Wagner")
+
 """
 Oblicz i wypisz na ekran pole koła o promienie 10. Jako PI przyjmij 3.14.
 """
-pole = 3.14 * (10.0 ** 2)
+
 print(pole)
 """
 Stwórz zmienną pole_kwadratu i przypisz do liczbę: pole kwadratu o boku 3.
 """
-pole_kwadratu = 3 ** 2
+
 """
 Stwórz 3 elementową listę, która zawiera nazwy 3 Twoich ulubionych owoców.
 Wynik przypisz do zmiennej `owoce`.
 """
-owoce = ['jabłko', 'gruszka', 'malina']
+
 """
 Dodaj do powyższej listy jako nowy element "pomidor".
 """
-owoce.append("pomidor")
+
 print(owoce)
 """
 Usuń z powyższej listy drugi element.
 """
-owoce.pop(1)
+
-print(owoce)
+
 """
 Rozszerz listę o tablice ['Jabłko', "Gruszka"].
 """
-owoce.append(['Jabłko', "Gruszka"])
+
 print(owoce)
 """
 Wyświetl listę owoce, ale bez pierwszego i ostatniego elementu.
 """
-print(owoce[1:-1])
+
 """
 Wyświetl co trzeci element z listy owoce.
 """
-print(owoce[::3])
+
 """
 Stwórz pusty słownik i przypisz go do zmiennej magazyn.
 """
-magazyn = {}
+
 """
 Dodaj do słownika magazyn owoce z listy owoce, tak, aby owoce były kluczami,
 zaś wartościami były równe 5.
 """
 for i in owoce:
    magazyn[i] = 5
 print(magazyn)
--- a/labs02/task01.py
+++ b/labs02/task01.py
@ -7,8 +7,7 @@ która zawiera tylko elementy z list o parzystych indeksach.
 """
 def even_elements(lista):
-    return(lista[::2])
+    pass
 def tests(f):
@ -24,4 +23,3 @@ def tests(f):
 if __name__ == "__main__":
    print(tests(even_elements))
--- a/labs02/task02.py
+++ b/labs02/task02.py
@ -6,10 +6,7 @@
 """
 def days_in_year(days):
-    if (days % 4 == 0 and ((days % 100 != 0) or (days % 400 == 0))):
+    pass
        return 366
    else:
        return 365
 def tests(f):
    inputs = [[2015], [2012], [1900], [2400], [1977]]
--- a/labs02/task03.py
+++ b/labs02/task03.py
@ -13,13 +13,7 @@ jak 'set', która przechowuje elementy bez powtórzeń.)
 def oov(text, vocab):
-    flag = []
+    pass
    textSegm = set(text.split(' '))
    for word in textSegm:
        if word not in vocab:
            flag.append(word)
    return flag
@ -36,9 +30,3 @@ def tests(f):
 if __name__ == "__main__":
    print(tests(oov))
 text = "this is a string , which i will use for string testing"
 textSegm = set(text.split(' '))
 print(textSegm)
 len(textSegm)
--- a/labs02/task04.py
+++ b/labs02/task04.py
@ -7,14 +7,7 @@ Jeśli podany argument jest mniejszy od 1 powinna być zwracana wartość 0.
 """
 def sum_from_one_to_n(n):
-    sum = 0
+    pass
    if n < 1:
        return 0
    else:
        for i in range(n+1):
            sum += i
    return sum
 def tests(f):
--- a/labs02/task05.py
+++ b/labs02/task05.py
@ -10,13 +10,7 @@ np. odległość pomiędzy punktami (0, 0, 0) i (3, 4, 0) jest równa 5.
 """
 def euclidean_distance(x, y):
-
+    pass
    sum = 0
    for i in range(len(x)):
        result = (x[i] - y[i])**2
        sum += result
    return(sum**0.5)
 def tests(f):
    inputs = [[(2.3, 4.3, -7.5), (2.3, 8.5, -7.5)]]
--- a/labs02/task06.py
+++ b/labs02/task06.py
@ -10,10 +10,7 @@ ma być zwracany napis "It's not a Big 'No!'".
 """
 def big_no(n):
-    if n >= 5 :
+    pass
        return("N"+("O"*n)+"!")
    else :
        return("It's not a Big 'No!'")
 def tests(f):
    inputs = [[5], [6], [2]]
--- a/labs02/task07.py
+++ b/labs02/task07.py
@ -6,10 +6,7 @@ Napisz funkcję char_sum, która dla zadanego łańcucha zwraca
 sumę kodów ASCII znaków.
 """
 def char_sum(text):
-    x = 0
+    pass
    for c in text:
        x += ord(c)
    return (x)
 def tests(f):
    inputs = [["this is a string"], ["this is another string"]]
--- a/labs02/task08.py
+++ b/labs02/task08.py
@ -7,11 +7,7 @@ przez 3 lub 5 mniejszych niż n.
 """
 def sum_div35(n):
-    x = 0
+    pass
    for i in range(n):
        if ( i % 3 == 0 or i % 5 == 0 ) :
            x += i
    return(x)
 def tests(f):
    inputs = [[10], [100], [3845]]
--- a/labs02/task09.py
+++ b/labs02/task09.py
@ -9,15 +9,8 @@ Np. leet('leet') powinno zwrócić '1337'.
 def leet_speak(text):
-    if 'e' in text :
+    pass
-        text = text.replace("e", "3")
+
    if "l" in text :
        text = text.replace("l", "1")
    if "o" in text :
        text = text.replace("o", "0")
    if "t" in text :
        text = text.replace("t", "7")
    return(text)
 def tests(f):
    inputs = [['leet'], ['do not want']]
--- a/labs02/task10.py
+++ b/labs02/task10.py
@ -9,13 +9,7 @@ na wielką. Np. pokemon_speak('pokemon') powinno zwrócić 'PoKeMoN'.
 def pokemon_speak(text):
-    if text[:].isupper() == True :
+    pass
        return(text)
    else :
        R = [''] * len(text)
        R[::2], R[1::2] = text[::2].upper(), text[1::2].lower()
        R = ''.join(R)
        return(R)
 def tests(f):
--- a/labs02/task11.py
+++ b/labs02/task11.py
@ -9,12 +9,8 @@ Oba napisy będą składać się wyłacznie z małych liter.
 """
 def common_chars(string1, string2):
-    string1 = "this is a string"
+    pass
-    string2 = "ala ma kota"
+
    s = set(string1.replace(" ", ""))
    t = set(string2.replace(" ", ""))
    intersect = s & t
    return(sorted(list(intersect)))
 def tests(f):
    inputs = [["this is a string", "ala ma kota"]]
--- a/labs04/labs04Task5.py
+++ b/labs04/labs04Task5.py
@ -1,21 +0,0 @@
 import glob
 filelist = glob.glob('scores\\*.bleu')
 bleu_filename = ''
 max_bleu = 0
 def find_bleu(bleu_list, max_bleu):
    for bleufile in bleu_list:
        content = open(bleufile, 'r').read()
        bleu = content.split(r',')
        bleu_datum = bleu[0].split()
        if max_bleu <= float(bleu_datum[2]):
            max_bleu = float(bleu_datum[2])
            bleu_filename = bleufile
    return bleu_filename, max_bleu
 # filename, max_bleu = find_bleu([filelist[0]], max_bleu)
 filename, max_bleu = find_bleu(filelist, max_bleu)
 print(filename)
--- a/labs06/linearModel.py
+++ b/labs06/linearModel.py
@ -1,56 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 import sklearn
 import pandas as pd
 import numpy as np
 dane = pd.read_csv("mieszkania.csv")
 print(dane.head())
 print(dane.columns)
 # check data for outliers
 from matplotlib import pyplot as plt
 plt.scatter(dane['SqrMeters'], dane['Expected'], color='g')
 plt.show()
 # remove all data points that have expected price <= 500.000 and living area <= 200 sqrt meters
 plt.scatter(dane['Rooms'], dane['Expected'], color='g')
 plt.show()
 # remove all data points that represent flats with more than 8 rooms
 flats = dane[(dane['Rooms'] < 10) & (dane['SqrMeters'] <= 200) & (dane['Expected'] <= 500000)]
 print(flats.head(20))
 y = flats['Expected']
 X = flats.drop(['Id', 'Expected', 'Floor', 'Location',
               'Description', 'Unnamed: 7', 'Unnamed: 8', 'Unnamed: 9', 'Unnamed: 10', 'Unnamed: 11'], axis=1)
 print(y.head())
 print(X.head())
 from sklearn.model_selection import train_test_split
 train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.3, random_state=38, shuffle=True)
 from sklearn.linear_model import LinearRegression
 model = LinearRegression()
 model.fit(X,y)
 predicted = model.predict(test_X)
 print("Predictions:", predicted[:5])
 for p in zip(train_X.columns, model.coef_):
    print("Intercept for {}: {:.3}".format(p[0], p[1]))
 from sklearn.metrics import mean_squared_error
 rmse = np.sqrt(mean_squared_error(predicted, test_y))
 print("RMSE:", rmse)
 r2 = model.score(test_X, test_y)
 print("R squared:", r2) # 0.54 comparing to 0.02 before cleaning the data
--- a/labs06/task02.py
+++ b/labs06/task02.py
@ -1,22 +1,14 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 import pandas as pd
 import matplotlib.pyplot as plt
 def wczytaj_dane():
-    dane = pd.read_csv("mieszkania.csv")
+    pass
    print(dane.head())
    return(dane)
 def most_common_room_number(dane):
-    return(dane['Rooms'].value_counts().idxmax())
+    pass
 def cheapest_flats(dane, n):
-    p = dane.sort_values(['Expected'], ascending=[0])
+    pass
    p.head(7)
 def find_borough(desc):
    dzielnice = ['Stare Miasto',
@ -26,139 +18,37 @@ def find_borough(desc):
                 'Piątkowo',
                 'Winogrady',
                 'Miłostowo',
-                 'Dębiec',
+                 'Dębiec']
-                 'Grunwald',
+    pass
                 'Nowe Miasto']
    check = 0
    for dzielnica in dzielnice:
        if dzielnica in desc:
            check = 1
            save_dzielnica = dzielnica
    if check == 1:
        return(save_dzielnica)
    else:
        return("Inne")
 def add_borough(dane):
-    dzielnice = ['Stare Miasto',
+    pass
                     'Wilda',
                     'Jeżyce',
                     'Rataje',
                     'Piątkowo',
                     'Winogrady',
                     'Miłostowo',
                     'Dębiec',
                     'Grunwald',
                     'Nowe Miasto']
    Borough = []
    column = dane['Location']
    for item in column:
        check = 0
        for dzielnica in dzielnice:
            if dzielnica in item:
                check = 1
                save_dzielnica = dzielnica
        if check == 1:
            Borough.append(save_dzielnica)
        else:
            Borough.append("Inne")
    Borough = pd.DataFrame(Borough)
    dane = pd.concat([dane.reset_index(drop=True), Borough], axis=1)
    print(dane)
 def write_plot(dane, filename):
-    dane.groupby('Borough')['Id'].nunique().plot(kind='bar')
+    pass
    plt.show()
    plt.savefig('output.png')
 def mean_price(dane, room_number):
-    p1 = dane[dane['Rooms'] == room_number]
+    pass
    p2 = p1['Expected']
    return(p2.mean())
 def find_13(dane):
-    p1 = dane[dane['Floor'] == 13]
+    pass
    p1.Location.unique()
 def find_best_flats(dane):
-    p_index = dane['Location'].str.contains('Winogrady')
+    pass
    p = dane[p_index]
    best_flats = p[(p['Rooms'] == 3) & (p['Floor'] == 1)]
    print(best_flats)
 def main():
    dane = wczytaj_dane()
-
+    print(dane[:5])
    print("Najpopularniejsza liczba pokoi w mieszkaniu to: {}"
          .format(most_common_room_number(dane)))
-    print("{} to najładniejsza dzielnica w Poznaniu."
+    print("{} to najłądniejsza dzielnica w Poznaniu."
-          .format(find_borough("Grunwald i Jeżyce")))
+          .format(find_borough("Grunwald i Jeżyce"))))
    print("Średnia cena mieszkania 3-pokojowego, to: {}"
          .format(mean_price(dane, 3)))
 if __name__ == "__main__":
    main()
 # zadanie dodatkowe
 import sklearn
 import pandas as pd
 import numpy as np
 dane = pd.read_csv("mieszkania.csv")
 print(dane.head())
 print(dane.columns)
 # check data for outliers
 from matplotlib import pyplot as plt
 plt.scatter(dane['SqrMeters'], dane['Expected'], color='g')
 plt.show()
 # remove all data points that have expected price <= 500.000 and living area <= 200 sqrt meters
 plt.scatter(dane['Rooms'], dane['Expected'], color='g')
 plt.show()
 # remove all data points that represent flats with more than 8 rooms
 flats = dane[(dane['Rooms'] < 10) & (dane['SqrMeters'] <= 200) & (dane['Expected'] <= 500000)]
 print(flats.head(20))
 y = flats['Expected']
 X = flats.drop(['Id', 'Expected', 'Floor', 'Location',
               'Description', 'Unnamed: 7', 'Unnamed: 8', 'Unnamed: 9', 'Unnamed: 10', 'Unnamed: 11'], axis=1)
 print(y.head())
 print(X.head())
 from sklearn.model_selection import train_test_split
 train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.3, random_state=38, shuffle=True)
 from sklearn.linear_model import LinearRegression
 model = LinearRegression()
 model.fit(X,y)
 predicted = model.predict(test_X)
 print("Predictions:", predicted[:5])
 for p in zip(train_X.columns, model.coef_):
    print("Intercept for {}: {:.3}".format(p[0], p[1]))
 from sklearn.metrics import mean_squared_error
 rmse = np.sqrt(mean_squared_error(predicted, test_y))
 print("RMSE:", rmse)
 r2 = model.score(test_X, test_y)
 print("R squared:", r2) # 0.54 comparing to 0.02 before cleaning the data
--- a/labs06/tasks.py
+++ b/labs06/tasks.py
@ -4,92 +4,77 @@
 """
 1. Zaimportuj bibliotkę pandas jako pd.
 """
-import pandas as pd
+
 """
-2. Wczytaj zbiór danych `311.csv` do zmiennej data.
+2. Wczytaj zbiór danych `311.csv` do zniennej data.
 """
 data = pd.read_csv("311.csv", low_memory=False)
 """
 3. Wyświetl 5 pierwszych wierszy z data.
 """
-print(data.head())
+
 """
 4. Wyświetl nazwy kolumn.
 """
 print(data.columns)
 """
 5. Wyświetl ile nasz zbiór danych ma kolumn i wierszy.
 """
-shape = data.shape
+
 print(shape)
 """
 6. Wyświetl kolumnę 'City' z powyższego zbioru danych.
 """
-print(data['City'])
+
 """
 7. Wyświetl jakie wartoścu przyjmuje kolumna 'City'.
 """
 data.City.unique()
 """
 8. Wyświetl tabelę rozstawną kolumny City.
 """
-t = data.City.value_counts()
+
 print(t)
 """
 9. Wyświetl tylko pierwsze 4 wiersze z wcześniejszego polecenia.
 """
-t.head(4)
+
 """
 10. Wyświetl, w ilu przypadkach kolumna City zawiera NaN.
 """
-p = pd.DataFrame(data['City'].isnull())
+
 t = p[p['City'] == True]
 shape = t.shape
 rows = shape[0]
 print(rows)
 """
 11. Wyświetl data.info()
 """
-print(data.info())
+
 """
 12. Wyświetl tylko kolumny Borough i Agency i tylko 5 ostatnich linii.
 """
-print(data[['Borough', 'Agency']].tail())
+
 """
 13. Wyświetl tylko te dane, dla których wartość z kolumny Agency jest równa
 NYPD. Zlicz ile jest takich przykładów.
 """
-p = data[data['Agency'] == 'NYPD']
+
 p.Agency.value_counts()
 """
 14. Wyświetl wartość minimalną i maksymalną z kolumny Longitude.
 """
 data['Longitude'].max()
 data['Longitude'].min()
 """
 15. Dodaj kolumne diff, która powstanie przez sumowanie kolumn Longitude i Latitude.
 """
 data['diff'] = data['Longitude'] + data['Latitude']
 """
 16. Wyświetl tablę rozstawną dla kolumny 'Descriptor', dla której Agency jest
 równe NYPD.
 """
 p = data[data['Agency'] == 'NYPD']
 p.Descriptor.value_counts()