Europarl/clean.rb
2020-01-28 18:51:11 +01:00

12 lines
223 B
Ruby

regexp = /[ \t]*([^\.\!\?])[ \t]*\n/
while l=gets
l.gsub!(/[ \t]*<[^<>]*>[ \t]*/, "")
l.capitalize!
if regexp.match(l) then l.gsub!(/[ \t]*\n/, " .\n") end
#l.gsub!(/(.+)(\1)+/, '\1') nie poprawia wyniku
puts l
end