Europarl/test/clean.rb
2020-01-28 18:51:11 +01:00

12 lines
202 B
Ruby

regexp = /[ \t]*([^\.\!\?])[ \t]*\n/
while l=gets
l.gsub!(/[ \t]*<[^<>]*>[ \t]*/, "")
l.capitalize!
if regexp.match(l) then l.gsub!(/[ \t]*\n/, " .\n") end
l.gsub!(/(.+)(\1)+/, '\1')
puts l
end