aitech-sd-lab/data/create_intent_dataset.py

22 lines
633 B
Python

from pathlib import Path
import sys
def main():
with open('intent_data/train.tsv', 'w', encoding='utf-8') as outf:
path = sys.argv[1]
dir = Path(rf'{path}')
for file in dir.glob('*'):
with open(file, encoding='utf-8') as inf:
for line in inf:
line = line.split('\t')
if line[0] == 'user':
text = line[1]
intent = line[2].split('(')[0]
outf.write(text + '\t' + intent + '\n')
print('Processed: ', file.name)
if __name__ == '__main__':
main()