21 lines
689 B
Python
21 lines
689 B
Python
|
#!/usr/bin/env python3
|
||
|
# -*- coding: utf-8 -*-
|
||
|
# Author: Barry Haddow
|
||
|
# Distributed under MIT license
|
||
|
|
||
|
#
|
||
|
# Remove Romanian diacritics. Assumes s-comma and t-comma are normalised
|
||
|
|
||
|
import io
|
||
|
import sys
|
||
|
istream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
|
||
|
ostream = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
|
||
|
|
||
|
for line in istream:
|
||
|
line = line.replace("\u0218", "S").replace("\u0219", "s") #s-comma
|
||
|
line = line.replace("\u021a", "T").replace("\u021b", "t") #t-comma
|
||
|
line = line.replace("\u0102", "A").replace("\u0103", "a")
|
||
|
line = line.replace("\u00C2", "A").replace("\u00E2", "a")
|
||
|
line = line.replace("\u00CE", "I").replace("\u00EE", "i")
|
||
|
ostream.write(line)
|