Europarl/clean.rb

12 lines
223 B
Ruby
Raw Permalink Normal View History

2020-01-28 18:51:11 +01:00
regexp = /[ \t]*([^\.\!\?])[ \t]*\n/
while l=gets
l.gsub!(/[ \t]*<[^<>]*>[ \t]*/, "")
l.capitalize!
if regexp.match(l) then l.gsub!(/[ \t]*\n/, " .\n") end
#l.gsub!(/(.+)(\1)+/, '\1') nie poprawia wyniku
puts l
end