ProjektPython/utils/calculate_statistics.py

64 lines
2.3 KiB
Python
Raw Normal View History

2024-01-21 17:51:17 +01:00
import pandas as pd
def countPercentOfNumbers(row):
numberOfChars = len(row['value'])
numberOfDigitsInString = sum(c.isdigit() for c in row['value'])
if (numberOfDigitsInString == 0):
return 0
else:
return (numberOfDigitsInString/numberOfChars) * 100
def countPercentOfLetters(row):
numberOfChars = len(row['value'])
numberOfAlphasInString = sum(c.isalpha() for c in row['value'])
if (numberOfAlphasInString == 0):
return 0
else:
return (numberOfAlphasInString/numberOfChars) * 100
def countPercentOfSpaces(row):
numberOfChars = len(row['value'])
numberOfSpacesInString = sum(c.isspace() for c in row['value'])
if (numberOfSpacesInString == 0):
return 0
else:
return (numberOfSpacesInString / numberOfChars) * 100
def countPercentOfSpecials(row):
numberOfChars = len(row['value'])
numberOfDigitsInString = sum(c.isdigit() for c in row['value'])
numberOfAlphasInString = sum(c.isalpha() for c in row['value'])
numberOfSpacesInString = sum(c.isspace() for c in row['value'])
numberOfSpecials = numberOfChars - numberOfDigitsInString - numberOfAlphasInString - numberOfSpacesInString
if (numberOfSpecials == 0):
return 0
else:
return (numberOfSpecials/numberOfChars) * 100
df = pd.read_csv('../datasets/out.csv',
usecols=range(2),
lineterminator='\n',
header=None)
df = df.rename(columns={0: "type", 1: "value"})
df['numberOfChars'] = df.apply(lambda row: len(row['value']), axis=1)
print('Mean amount of chars by type')
print(df.groupby(['type'])["numberOfChars"].mean())
df['percentOfLetters'] = df.apply(countPercentOfLetters, axis=1)
df['percentOfNumbers'] = df.apply(countPercentOfNumbers, axis=1)
df['percentOfSpecialChars'] = df.apply(countPercentOfSpecials, axis=1)
df['percentOfSpaces'] = df.apply(countPercentOfSpaces, axis=1)
print('Mean percent of alphanumeric by type')
print(df.groupby(['type'])["percentOfLetters"].mean())
print('Mean percent of digits by type')
print(df.groupby(['type'])["percentOfNumbers"].mean())
print('Mean percent of special characters by type')
print(df.groupby(['type'])["percentOfSpecialChars"].mean())
print('Mean percent of spaces by type')
print(df.groupby(['type'])["percentOfSpaces"].mean())