import pandas as pd def countPercentOfNumbers(row): numberOfChars = len(row['value']) numberOfDigitsInString = sum(c.isdigit() for c in row['value']) if (numberOfDigitsInString == 0): return 0 else: return (numberOfDigitsInString/numberOfChars) * 100 def countPercentOfLetters(row): numberOfChars = len(row['value']) numberOfAlphasInString = sum(c.isalpha() for c in row['value']) if (numberOfAlphasInString == 0): return 0 else: return (numberOfAlphasInString/numberOfChars) * 100 def countPercentOfSpaces(row): numberOfChars = len(row['value']) numberOfSpacesInString = sum(c.isspace() for c in row['value']) if (numberOfSpacesInString == 0): return 0 else: return (numberOfSpacesInString / numberOfChars) * 100 def countPercentOfSpecials(row): numberOfChars = len(row['value']) numberOfDigitsInString = sum(c.isdigit() for c in row['value']) numberOfAlphasInString = sum(c.isalpha() for c in row['value']) numberOfSpacesInString = sum(c.isspace() for c in row['value']) numberOfSpecials = numberOfChars - numberOfDigitsInString - numberOfAlphasInString - numberOfSpacesInString if (numberOfSpecials == 0): return 0 else: return (numberOfSpecials/numberOfChars) * 100 df = pd.read_csv('../datasets/out.csv', usecols=range(2), lineterminator='\n', header=None) df = df.rename(columns={0: "type", 1: "value"}) df['numberOfChars'] = df.apply(lambda row: len(row['value']), axis=1) print('Mean amount of chars by type') print(df.groupby(['type'])["numberOfChars"].mean()) df['percentOfLetters'] = df.apply(countPercentOfLetters, axis=1) df['percentOfNumbers'] = df.apply(countPercentOfNumbers, axis=1) df['percentOfSpecialChars'] = df.apply(countPercentOfSpecials, axis=1) df['percentOfSpaces'] = df.apply(countPercentOfSpaces, axis=1) print('Mean percent of alphanumeric by type') print(df.groupby(['type'])["percentOfLetters"].mean()) print('Mean percent of digits by type') print(df.groupby(['type'])["percentOfNumbers"].mean()) print('Mean percent of special characters by type') print(df.groupby(['type'])["percentOfSpecialChars"].mean()) print('Mean percent of spaces by type') print(df.groupby(['type'])["percentOfSpaces"].mean())