ADS/preprocessed_data/preprocessing.py

49 lines
1.1 KiB
Python
Raw Normal View History

import pandas as pd
def main():
df = pd.read_csv('resources/club-Pogon.csv', sep=';', encoding='windows-1250')
# Add new column with season
season = ['1' for i in range(30)] + ['2' for i in range(30)] + ['3' for i in range(30)]
df['Sezon'] = season
# Add columns with scored and lost goals
result = df['Wynik']
formatted_result = []
scored_goals = []
lost_goals = []
for i in result:
tmp = i.split(':')
scored = tmp[0].lstrip("0") or "0"
lost = tmp[1].lstrip("0") or "0"
scored_goals.append(scored)
lost_goals.append(lost)
formatted_result.append(scored + ":" + lost)
df['Wynik'] = formatted_result
df['Zdobyte bramki'] = scored_goals
df['Stracone bramki'] = lost_goals
# Change time format in column Godzina from hh:mm:ss to hh:mm
time = df['Godzina']
formatted_time = []
for i in time:
formatted_time.append(i[0:5])
df['Godzina'] = formatted_time
# Save to csv
df.to_csv('resources/club-Pogon-preprocessed.csv', index = False)
if __name__ == '__main__':
main()