corpus figures creator
This commit is contained in:
parent
96c74c47ac
commit
f585ff9e01
@ -4,6 +4,7 @@
|
|||||||
from os import listdir
|
from os import listdir
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
input_dir = 'stats'
|
input_dir = 'stats'
|
||||||
@ -37,9 +38,9 @@ with open(output_dir+'/stats_table.tex', 'w') as stats_table:
|
|||||||
stats_table.write(r'Corpus name & Total sentences & Non-empty & Unique\\'+'\n')
|
stats_table.write(r'Corpus name & Total sentences & Non-empty & Unique\\'+'\n')
|
||||||
stats_table.write(r'\hline\hline'+'\n')
|
stats_table.write(r'\hline\hline'+'\n')
|
||||||
for corpus in corpora:
|
for corpus in corpora:
|
||||||
non_empty_percentage = float(100*corpus["total"] - corpus["empty"])/corpus["total"]
|
non_empty_percentage = float(100*(corpus["total"] - corpus["empty"]))/corpus["total"]
|
||||||
unique_percentage = float(100*corpus["unique"])/corpus["total"]
|
unique_percentage = float(100*corpus["unique"])/corpus["total"]
|
||||||
stats_table.write("%s & %d & %d (%.2f%%) & %d (%.2f%%) \\\\\n" % (corpus["name"], corpus["total"], corpus["total"] - corpus["empty"], non_empty_percentage, corpus["unique"], unique_percentage))
|
stats_table.write("%s & %d & %d (%.2f\%%) & %d (%.2f\%%) \\\\\n" % (corpus["name"], corpus["total"], corpus["total"] - corpus["empty"], non_empty_percentage, corpus["unique"], unique_percentage))
|
||||||
|
|
||||||
stats_table.write(r'\hline'+'\n')
|
stats_table.write(r'\hline'+'\n')
|
||||||
stats_table.write(r'\end{tabular}'+'\n')
|
stats_table.write(r'\end{tabular}'+'\n')
|
||||||
@ -59,7 +60,7 @@ for corpus in corpora:
|
|||||||
freq_table.write(r'Occurences & Sentence\\'+'\n')
|
freq_table.write(r'Occurences & Sentence\\'+'\n')
|
||||||
freq_table.write(r'\hline\hline'+'\n')
|
freq_table.write(r'\hline\hline'+'\n')
|
||||||
for data in corpus["most_frequent"]:
|
for data in corpus["most_frequent"]:
|
||||||
freq_table.write("%d & %s\n" % data)
|
freq_table.write("%d & %s\\\\\n" % data)
|
||||||
freq_table.write(r'\hline'+'\n')
|
freq_table.write(r'\hline'+'\n')
|
||||||
freq_table.write(r'\end{tabular}'+'\n')
|
freq_table.write(r'\end{tabular}'+'\n')
|
||||||
freq_table.write(r'\caption{Most frequent sentences in the corpus '+corpus["name"]+'}\n')
|
freq_table.write(r'\caption{Most frequent sentences in the corpus '+corpus["name"]+'}\n')
|
||||||
@ -69,23 +70,21 @@ for corpus in corpora:
|
|||||||
|
|
||||||
# plot
|
# plot
|
||||||
|
|
||||||
N = 5
|
N = len(corpora)
|
||||||
menMeans = (20, 35, 30, 35, 27)
|
uniques = [float(100*corpus["unique"]) / corpus["total"] for corpus in corpora]
|
||||||
womenMeans = (25, 32, 34, 20, 25)
|
repeated = [float(100*(corpus["total"] - corpus["unique"] - corpus["empty"])) / corpus["total"] for corpus in corpora]
|
||||||
menStd = (2, 3, 4, 1, 2)
|
empty = [float(100*corpus["empty"]) / corpus["total"] for corpus in corpora]
|
||||||
womenStd = (3, 5, 2, 3, 3)
|
|
||||||
ind = np.arange(N) # the x locations for the groups
|
ind = np.arange(N) # the x locations for the groups
|
||||||
width = 0.35 # the width of the bars: can also be len(x) sequence
|
width = 0.35 # the width of the bars: can also be len(x) sequence
|
||||||
|
|
||||||
p1 = plt.bar(ind, menMeans, width, color='r', yerr=womenStd)
|
p1 = plt.bar(ind, uniques, width, color='#009900')
|
||||||
p2 = plt.bar(ind, womenMeans, width, color='y',
|
p2 = plt.bar(ind, repeated, width, color='#99FF66', bottom=uniques)
|
||||||
bottom=menMeans, yerr=menStd)
|
p3 = plt.bar(ind, empty, width, color='#999966', bottom=[sum(x) for x in zip(repeated,uniques)])
|
||||||
|
|
||||||
plt.ylabel('Scores')
|
plt.xticks(ind+width/2., [corpus["name"] for corpus in corpora] )
|
||||||
plt.title('Scores by group and gender')
|
plt.yticks(np.arange(0,101,10))
|
||||||
plt.xticks(ind+width/2., ('G1', 'G2', 'G3', 'G4', 'G5') )
|
plt.legend( (p1[0], p2[0], p3[0]), ('unique', 'repeated', 'empty') )
|
||||||
plt.yticks(np.arange(0,81,10))
|
|
||||||
plt.legend( (p1[0], p2[0]), ('Men', 'Women') )
|
|
||||||
|
|
||||||
|
|
||||||
plt.savefig('bar_graph.eps', format='eps')
|
plt.savefig(output_dir+'/bar_graph.eps', format='eps')
|
||||||
|
Loading…
Reference in New Issue
Block a user