import pandas as pd df_2311 = pd.read_csv('archive/apartments_pl_2023_11.csv') df_2312 = pd.read_csv('archive/apartments_pl_2023_12.csv') df_2401 = pd.read_csv('archive/apartments_pl_2024_01.csv') df_2402 = pd.read_csv('archive/apartments_pl_2024_02.csv') def pull_krakow(df): df_krakow = df[df["city"] == 'krakow'] return df_krakow df_2311 = pull_krakow(df_2311) df_2312 = pull_krakow(df_2312) df_2401 = pull_krakow(df_2401) df_2402 = pull_krakow(df_2402) df_concatenated = pd.concat([df_2311, df_2312, df_2401, df_2402], ignore_index=True) sum = len(df_2311) + len(df_2312) + len(df_2401) + len(df_2402) print(sum) df_no_duplicates = df_concatenated.drop_duplicates(subset = ["squareMeters", "rooms", "floor", "centreDistance", "price"]) print(len(df_no_duplicates)) df_selected_columns = df_no_duplicates[["squareMeters", "rooms", "floor", "buildYear", "centreDistance", "poiCount", "price"]] df_na_dropped = df_selected_columns.dropna() print(len(df_na_dropped)) df_na_dropped.to_csv('data.csv', index=False) print("Dane zapisane do data.csv.")