import sys import re def solve(lines): # filter all substrings that match # print # print(None, re.findall(r"\b\d+\b", lines[3])) # return [] p = re.compile(r"\b\d+\b") res = [] for l in lines: m = re.findall(p, l) if m: res.append(' '.join(m) + '\n') return res if __name__ == "__main__": lines = [] fp = sys.argv[1] assert("polish_wiki_excerpt" in fp) print(fp) with open(fp, encoding="utf-8") as f: lines = f.readlines() sol = solve(lines) with open("./polish_wiki_excerpt.out", 'w', encoding="utf-8") as f: f.writelines(sol)