25 lines
861 B
Python
Executable File
25 lines
861 B
Python
Executable File
#!/usr/bin/python3
|
|
# -*- coding: utf-8 -*-
|
|
|
|
import sys, re
|
|
|
|
p = re.compile(r'^pl/([A-Z0-9]+).*\.xml\.gz$')
|
|
|
|
ids_dictionary = {}
|
|
next_free = 0
|
|
|
|
with open('DGT.ids') as in_ids, open('ids.txt', 'w') as out_ids, open('ids_sources.txt','w') as out_ids_sources:
|
|
for line in in_ids:
|
|
fields = line.split('\t')
|
|
filename = fields[1]
|
|
m = p.match(filename)
|
|
if not m:
|
|
print('Wrong line: '+line.rstrip())
|
|
else:
|
|
doc_number = m.group(1)
|
|
if not doc_number in ids_dictionary:
|
|
ids_dictionary[doc_number] = next_free
|
|
out_ids_sources.write('%d\thttps://eur-lex.europa.eu/legal-content/PL/TXT/?uri=CELEX:%s\tBaza aktów prawnych Unii Europejskiej\n' % (next_free,doc_number))
|
|
next_free += 1
|
|
out_ids.write(str(ids_dictionary[doc_number])+'\n')
|