forked from tdwojak/Python2018
Compare commits
No commits in common. "master" and "master" have entirely different histories.
@ -9,55 +9,48 @@ Zadania wprowadzające do pierwszych ćwiczeń.
|
||||
"""
|
||||
Wypisz na ekran swoje imię i nazwisko.
|
||||
"""
|
||||
print("Agnieszka Wagner")
|
||||
|
||||
|
||||
"""
|
||||
Oblicz i wypisz na ekran pole koła o promienie 10. Jako PI przyjmij 3.14.
|
||||
"""
|
||||
pole = 3.14 * (10.0 ** 2)
|
||||
print(pole)
|
||||
|
||||
"""
|
||||
Stwórz zmienną pole_kwadratu i przypisz do liczbę: pole kwadratu o boku 3.
|
||||
"""
|
||||
pole_kwadratu = 3 ** 2
|
||||
|
||||
"""
|
||||
Stwórz 3 elementową listę, która zawiera nazwy 3 Twoich ulubionych owoców.
|
||||
Wynik przypisz do zmiennej `owoce`.
|
||||
"""
|
||||
owoce = ['jabłko', 'gruszka', 'malina']
|
||||
|
||||
"""
|
||||
Dodaj do powyższej listy jako nowy element "pomidor".
|
||||
"""
|
||||
owoce.append("pomidor")
|
||||
print(owoce)
|
||||
|
||||
"""
|
||||
Usuń z powyższej listy drugi element.
|
||||
"""
|
||||
owoce.pop(1)
|
||||
print(owoce)
|
||||
|
||||
|
||||
"""
|
||||
Rozszerz listę o tablice ['Jabłko', "Gruszka"].
|
||||
"""
|
||||
owoce.append(['Jabłko', "Gruszka"])
|
||||
print(owoce)
|
||||
|
||||
"""
|
||||
Wyświetl listę owoce, ale bez pierwszego i ostatniego elementu.
|
||||
"""
|
||||
print(owoce[1:-1])
|
||||
|
||||
"""
|
||||
Wyświetl co trzeci element z listy owoce.
|
||||
"""
|
||||
print(owoce[::3])
|
||||
|
||||
"""
|
||||
Stwórz pusty słownik i przypisz go do zmiennej magazyn.
|
||||
"""
|
||||
magazyn = {}
|
||||
|
||||
"""
|
||||
Dodaj do słownika magazyn owoce z listy owoce, tak, aby owoce były kluczami,
|
||||
zaś wartościami były równe 5.
|
||||
"""
|
||||
|
||||
for i in owoce:
|
||||
magazyn[i] = 5
|
||||
|
||||
print(magazyn)
|
@ -7,8 +7,7 @@ która zawiera tylko elementy z list o parzystych indeksach.
|
||||
"""
|
||||
|
||||
def even_elements(lista):
|
||||
return(lista[::2])
|
||||
|
||||
pass
|
||||
|
||||
|
||||
def tests(f):
|
||||
@ -24,4 +23,3 @@ def tests(f):
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(tests(even_elements))
|
||||
|
||||
|
@ -6,10 +6,7 @@
|
||||
"""
|
||||
|
||||
def days_in_year(days):
|
||||
if (days % 4 == 0 and ((days % 100 != 0) or (days % 400 == 0))):
|
||||
return 366
|
||||
else:
|
||||
return 365
|
||||
pass
|
||||
|
||||
def tests(f):
|
||||
inputs = [[2015], [2012], [1900], [2400], [1977]]
|
||||
|
@ -13,13 +13,7 @@ jak 'set', która przechowuje elementy bez powtórzeń.)
|
||||
|
||||
|
||||
def oov(text, vocab):
|
||||
flag = []
|
||||
textSegm = set(text.split(' '))
|
||||
for word in textSegm:
|
||||
if word not in vocab:
|
||||
flag.append(word)
|
||||
return flag
|
||||
|
||||
pass
|
||||
|
||||
|
||||
|
||||
@ -36,9 +30,3 @@ def tests(f):
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(tests(oov))
|
||||
|
||||
|
||||
text = "this is a string , which i will use for string testing"
|
||||
textSegm = set(text.split(' '))
|
||||
print(textSegm)
|
||||
len(textSegm)
|
@ -7,14 +7,7 @@ Jeśli podany argument jest mniejszy od 1 powinna być zwracana wartość 0.
|
||||
"""
|
||||
|
||||
def sum_from_one_to_n(n):
|
||||
sum = 0
|
||||
if n < 1:
|
||||
return 0
|
||||
else:
|
||||
for i in range(n+1):
|
||||
sum += i
|
||||
return sum
|
||||
|
||||
pass
|
||||
|
||||
|
||||
def tests(f):
|
||||
|
@ -10,13 +10,7 @@ np. odległość pomiędzy punktami (0, 0, 0) i (3, 4, 0) jest równa 5.
|
||||
"""
|
||||
|
||||
def euclidean_distance(x, y):
|
||||
|
||||
sum = 0
|
||||
for i in range(len(x)):
|
||||
result = (x[i] - y[i])**2
|
||||
sum += result
|
||||
return(sum**0.5)
|
||||
|
||||
pass
|
||||
|
||||
def tests(f):
|
||||
inputs = [[(2.3, 4.3, -7.5), (2.3, 8.5, -7.5)]]
|
||||
|
@ -10,10 +10,7 @@ ma być zwracany napis "It's not a Big 'No!'".
|
||||
"""
|
||||
|
||||
def big_no(n):
|
||||
if n >= 5 :
|
||||
return("N"+("O"*n)+"!")
|
||||
else :
|
||||
return("It's not a Big 'No!'")
|
||||
pass
|
||||
|
||||
def tests(f):
|
||||
inputs = [[5], [6], [2]]
|
||||
|
@ -6,10 +6,7 @@ Napisz funkcję char_sum, która dla zadanego łańcucha zwraca
|
||||
sumę kodów ASCII znaków.
|
||||
"""
|
||||
def char_sum(text):
|
||||
x = 0
|
||||
for c in text:
|
||||
x += ord(c)
|
||||
return (x)
|
||||
pass
|
||||
|
||||
def tests(f):
|
||||
inputs = [["this is a string"], ["this is another string"]]
|
||||
|
@ -7,11 +7,7 @@ przez 3 lub 5 mniejszych niż n.
|
||||
"""
|
||||
|
||||
def sum_div35(n):
|
||||
x = 0
|
||||
for i in range(n):
|
||||
if ( i % 3 == 0 or i % 5 == 0 ) :
|
||||
x += i
|
||||
return(x)
|
||||
pass
|
||||
|
||||
def tests(f):
|
||||
inputs = [[10], [100], [3845]]
|
||||
|
@ -9,15 +9,8 @@ Np. leet('leet') powinno zwrócić '1337'.
|
||||
|
||||
|
||||
def leet_speak(text):
|
||||
if 'e' in text :
|
||||
text = text.replace("e", "3")
|
||||
if "l" in text :
|
||||
text = text.replace("l", "1")
|
||||
if "o" in text :
|
||||
text = text.replace("o", "0")
|
||||
if "t" in text :
|
||||
text = text.replace("t", "7")
|
||||
return(text)
|
||||
pass
|
||||
|
||||
|
||||
def tests(f):
|
||||
inputs = [['leet'], ['do not want']]
|
||||
|
@ -9,13 +9,7 @@ na wielką. Np. pokemon_speak('pokemon') powinno zwrócić 'PoKeMoN'.
|
||||
|
||||
|
||||
def pokemon_speak(text):
|
||||
if text[:].isupper() == True :
|
||||
return(text)
|
||||
else :
|
||||
R = [''] * len(text)
|
||||
R[::2], R[1::2] = text[::2].upper(), text[1::2].lower()
|
||||
R = ''.join(R)
|
||||
return(R)
|
||||
pass
|
||||
|
||||
|
||||
def tests(f):
|
||||
|
@ -9,12 +9,8 @@ Oba napisy będą składać się wyłacznie z małych liter.
|
||||
"""
|
||||
|
||||
def common_chars(string1, string2):
|
||||
string1 = "this is a string"
|
||||
string2 = "ala ma kota"
|
||||
s = set(string1.replace(" ", ""))
|
||||
t = set(string2.replace(" ", ""))
|
||||
intersect = s & t
|
||||
return(sorted(list(intersect)))
|
||||
pass
|
||||
|
||||
|
||||
def tests(f):
|
||||
inputs = [["this is a string", "ala ma kota"]]
|
||||
|
@ -1,21 +0,0 @@
|
||||
import glob
|
||||
|
||||
filelist = glob.glob('scores\\*.bleu')
|
||||
bleu_filename = ''
|
||||
max_bleu = 0
|
||||
|
||||
|
||||
def find_bleu(bleu_list, max_bleu):
|
||||
for bleufile in bleu_list:
|
||||
content = open(bleufile, 'r').read()
|
||||
bleu = content.split(r',')
|
||||
bleu_datum = bleu[0].split()
|
||||
if max_bleu <= float(bleu_datum[2]):
|
||||
max_bleu = float(bleu_datum[2])
|
||||
bleu_filename = bleufile
|
||||
return bleu_filename, max_bleu
|
||||
|
||||
|
||||
# filename, max_bleu = find_bleu([filelist[0]], max_bleu)
|
||||
filename, max_bleu = find_bleu(filelist, max_bleu)
|
||||
print(filename)
|
@ -1,56 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import sklearn
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
dane = pd.read_csv("mieszkania.csv")
|
||||
print(dane.head())
|
||||
print(dane.columns)
|
||||
|
||||
# check data for outliers
|
||||
from matplotlib import pyplot as plt
|
||||
plt.scatter(dane['SqrMeters'], dane['Expected'], color='g')
|
||||
plt.show()
|
||||
# remove all data points that have expected price <= 500.000 and living area <= 200 sqrt meters
|
||||
plt.scatter(dane['Rooms'], dane['Expected'], color='g')
|
||||
plt.show()
|
||||
# remove all data points that represent flats with more than 8 rooms
|
||||
|
||||
flats = dane[(dane['Rooms'] < 10) & (dane['SqrMeters'] <= 200) & (dane['Expected'] <= 500000)]
|
||||
print(flats.head(20))
|
||||
|
||||
y = flats['Expected']
|
||||
X = flats.drop(['Id', 'Expected', 'Floor', 'Location',
|
||||
'Description', 'Unnamed: 7', 'Unnamed: 8', 'Unnamed: 9', 'Unnamed: 10', 'Unnamed: 11'], axis=1)
|
||||
print(y.head())
|
||||
print(X.head())
|
||||
|
||||
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.3, random_state=38, shuffle=True)
|
||||
|
||||
from sklearn.linear_model import LinearRegression
|
||||
model = LinearRegression()
|
||||
model.fit(X,y)
|
||||
|
||||
|
||||
predicted = model.predict(test_X)
|
||||
print("Predictions:", predicted[:5])
|
||||
|
||||
for p in zip(train_X.columns, model.coef_):
|
||||
print("Intercept for {}: {:.3}".format(p[0], p[1]))
|
||||
|
||||
from sklearn.metrics import mean_squared_error
|
||||
rmse = np.sqrt(mean_squared_error(predicted, test_y))
|
||||
print("RMSE:", rmse)
|
||||
|
||||
r2 = model.score(test_X, test_y)
|
||||
|
||||
print("R squared:", r2) # 0.54 comparing to 0.02 before cleaning the data
|
||||
|
||||
|
||||
|
||||
|
136
labs06/task02.py
136
labs06/task02.py
@ -1,22 +1,14 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
def wczytaj_dane():
|
||||
dane = pd.read_csv("mieszkania.csv")
|
||||
print(dane.head())
|
||||
return(dane)
|
||||
pass
|
||||
|
||||
def most_common_room_number(dane):
|
||||
return(dane['Rooms'].value_counts().idxmax())
|
||||
|
||||
|
||||
pass
|
||||
|
||||
def cheapest_flats(dane, n):
|
||||
p = dane.sort_values(['Expected'], ascending=[0])
|
||||
p.head(7)
|
||||
pass
|
||||
|
||||
def find_borough(desc):
|
||||
dzielnice = ['Stare Miasto',
|
||||
@ -26,139 +18,37 @@ def find_borough(desc):
|
||||
'Piątkowo',
|
||||
'Winogrady',
|
||||
'Miłostowo',
|
||||
'Dębiec',
|
||||
'Grunwald',
|
||||
'Nowe Miasto']
|
||||
|
||||
|
||||
check = 0
|
||||
for dzielnica in dzielnice:
|
||||
if dzielnica in desc:
|
||||
check = 1
|
||||
save_dzielnica = dzielnica
|
||||
if check == 1:
|
||||
return(save_dzielnica)
|
||||
else:
|
||||
return("Inne")
|
||||
|
||||
'Dębiec']
|
||||
pass
|
||||
|
||||
|
||||
def add_borough(dane):
|
||||
dzielnice = ['Stare Miasto',
|
||||
'Wilda',
|
||||
'Jeżyce',
|
||||
'Rataje',
|
||||
'Piątkowo',
|
||||
'Winogrady',
|
||||
'Miłostowo',
|
||||
'Dębiec',
|
||||
'Grunwald',
|
||||
'Nowe Miasto']
|
||||
Borough = []
|
||||
column = dane['Location']
|
||||
for item in column:
|
||||
check = 0
|
||||
for dzielnica in dzielnice:
|
||||
if dzielnica in item:
|
||||
check = 1
|
||||
save_dzielnica = dzielnica
|
||||
if check == 1:
|
||||
Borough.append(save_dzielnica)
|
||||
else:
|
||||
Borough.append("Inne")
|
||||
|
||||
Borough = pd.DataFrame(Borough)
|
||||
|
||||
dane = pd.concat([dane.reset_index(drop=True), Borough], axis=1)
|
||||
print(dane)
|
||||
pass
|
||||
|
||||
def write_plot(dane, filename):
|
||||
dane.groupby('Borough')['Id'].nunique().plot(kind='bar')
|
||||
plt.show()
|
||||
plt.savefig('output.png')
|
||||
pass
|
||||
|
||||
def mean_price(dane, room_number):
|
||||
p1 = dane[dane['Rooms'] == room_number]
|
||||
p2 = p1['Expected']
|
||||
return(p2.mean())
|
||||
pass
|
||||
|
||||
def find_13(dane):
|
||||
p1 = dane[dane['Floor'] == 13]
|
||||
p1.Location.unique()
|
||||
pass
|
||||
|
||||
def find_best_flats(dane):
|
||||
p_index = dane['Location'].str.contains('Winogrady')
|
||||
p = dane[p_index]
|
||||
best_flats = p[(p['Rooms'] == 3) & (p['Floor'] == 1)]
|
||||
print(best_flats)
|
||||
pass
|
||||
|
||||
def main():
|
||||
dane = wczytaj_dane()
|
||||
|
||||
print(dane[:5])
|
||||
|
||||
print("Najpopularniejsza liczba pokoi w mieszkaniu to: {}"
|
||||
.format(most_common_room_number(dane)))
|
||||
|
||||
print("{} to najładniejsza dzielnica w Poznaniu."
|
||||
.format(find_borough("Grunwald i Jeżyce")))
|
||||
print("{} to najłądniejsza dzielnica w Poznaniu."
|
||||
.format(find_borough("Grunwald i Jeżyce"))))
|
||||
|
||||
print("Średnia cena mieszkania 3-pokojowego, to: {}"
|
||||
.format(mean_price(dane, 3)))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
|
||||
|
||||
|
||||
# zadanie dodatkowe
|
||||
|
||||
import sklearn
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
dane = pd.read_csv("mieszkania.csv")
|
||||
print(dane.head())
|
||||
print(dane.columns)
|
||||
|
||||
# check data for outliers
|
||||
from matplotlib import pyplot as plt
|
||||
plt.scatter(dane['SqrMeters'], dane['Expected'], color='g')
|
||||
plt.show()
|
||||
# remove all data points that have expected price <= 500.000 and living area <= 200 sqrt meters
|
||||
plt.scatter(dane['Rooms'], dane['Expected'], color='g')
|
||||
plt.show()
|
||||
# remove all data points that represent flats with more than 8 rooms
|
||||
|
||||
flats = dane[(dane['Rooms'] < 10) & (dane['SqrMeters'] <= 200) & (dane['Expected'] <= 500000)]
|
||||
print(flats.head(20))
|
||||
|
||||
y = flats['Expected']
|
||||
X = flats.drop(['Id', 'Expected', 'Floor', 'Location',
|
||||
'Description', 'Unnamed: 7', 'Unnamed: 8', 'Unnamed: 9', 'Unnamed: 10', 'Unnamed: 11'], axis=1)
|
||||
print(y.head())
|
||||
print(X.head())
|
||||
|
||||
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.3, random_state=38, shuffle=True)
|
||||
|
||||
from sklearn.linear_model import LinearRegression
|
||||
model = LinearRegression()
|
||||
model.fit(X,y)
|
||||
|
||||
|
||||
predicted = model.predict(test_X)
|
||||
print("Predictions:", predicted[:5])
|
||||
|
||||
for p in zip(train_X.columns, model.coef_):
|
||||
print("Intercept for {}: {:.3}".format(p[0], p[1]))
|
||||
|
||||
from sklearn.metrics import mean_squared_error
|
||||
rmse = np.sqrt(mean_squared_error(predicted, test_y))
|
||||
print("RMSE:", rmse)
|
||||
|
||||
r2 = model.score(test_X, test_y)
|
||||
|
||||
print("R squared:", r2) # 0.54 comparing to 0.02 before cleaning the data
|
@ -4,92 +4,77 @@
|
||||
"""
|
||||
1. Zaimportuj bibliotkę pandas jako pd.
|
||||
"""
|
||||
import pandas as pd
|
||||
|
||||
|
||||
"""
|
||||
2. Wczytaj zbiór danych `311.csv` do zmiennej data.
|
||||
2. Wczytaj zbiór danych `311.csv` do zniennej data.
|
||||
"""
|
||||
|
||||
data = pd.read_csv("311.csv", low_memory=False)
|
||||
|
||||
"""
|
||||
3. Wyświetl 5 pierwszych wierszy z data.
|
||||
"""
|
||||
print(data.head())
|
||||
|
||||
|
||||
"""
|
||||
4. Wyświetl nazwy kolumn.
|
||||
"""
|
||||
|
||||
print(data.columns)
|
||||
|
||||
"""
|
||||
5. Wyświetl ile nasz zbiór danych ma kolumn i wierszy.
|
||||
"""
|
||||
shape = data.shape
|
||||
print(shape)
|
||||
|
||||
|
||||
"""
|
||||
6. Wyświetl kolumnę 'City' z powyższego zbioru danych.
|
||||
"""
|
||||
print(data['City'])
|
||||
|
||||
|
||||
"""
|
||||
7. Wyświetl jakie wartoścu przyjmuje kolumna 'City'.
|
||||
"""
|
||||
data.City.unique()
|
||||
|
||||
"""
|
||||
8. Wyświetl tabelę rozstawną kolumny City.
|
||||
"""
|
||||
|
||||
t = data.City.value_counts()
|
||||
print(t)
|
||||
|
||||
"""
|
||||
9. Wyświetl tylko pierwsze 4 wiersze z wcześniejszego polecenia.
|
||||
"""
|
||||
t.head(4)
|
||||
|
||||
|
||||
"""
|
||||
10. Wyświetl, w ilu przypadkach kolumna City zawiera NaN.
|
||||
"""
|
||||
|
||||
p = pd.DataFrame(data['City'].isnull())
|
||||
t = p[p['City'] == True]
|
||||
shape = t.shape
|
||||
rows = shape[0]
|
||||
print(rows)
|
||||
|
||||
|
||||
"""
|
||||
11. Wyświetl data.info()
|
||||
"""
|
||||
print(data.info())
|
||||
|
||||
"""
|
||||
12. Wyświetl tylko kolumny Borough i Agency i tylko 5 ostatnich linii.
|
||||
"""
|
||||
print(data[['Borough', 'Agency']].tail())
|
||||
|
||||
|
||||
"""
|
||||
13. Wyświetl tylko te dane, dla których wartość z kolumny Agency jest równa
|
||||
NYPD. Zlicz ile jest takich przykładów.
|
||||
"""
|
||||
p = data[data['Agency'] == 'NYPD']
|
||||
p.Agency.value_counts()
|
||||
|
||||
"""
|
||||
14. Wyświetl wartość minimalną i maksymalną z kolumny Longitude.
|
||||
"""
|
||||
data['Longitude'].max()
|
||||
data['Longitude'].min()
|
||||
|
||||
"""
|
||||
15. Dodaj kolumne diff, która powstanie przez sumowanie kolumn Longitude i Latitude.
|
||||
"""
|
||||
|
||||
data['diff'] = data['Longitude'] + data['Latitude']
|
||||
|
||||
"""
|
||||
16. Wyświetl tablę rozstawną dla kolumny 'Descriptor', dla której Agency jest
|
||||
równe NYPD.
|
||||
"""
|
||||
p = data[data['Agency'] == 'NYPD']
|
||||
p.Descriptor.value_counts()
|
Loading…
Reference in New Issue
Block a user