167 KiB
167 KiB
Setup
from google.colab import drive
drive.mount('/content/drive')
Mounted at /content/drive
!git clone https://github.com/kubapok/cnlps-caiccaic.git
Cloning into 'cnlps-caiccaic'... remote: Enumerating objects: 73, done.[K remote: Counting objects: 100% (73/73), done.[K remote: Compressing objects: 100% (56/56), done.[K remote: Total 73 (delta 32), reused 41 (delta 11), pack-reused 0[K Unpacking objects: 100% (73/73), 1.89 MiB | 4.48 MiB/s, done.
!pip install -Uq datasets transformers peft bitsandbytes loralib accelerate
[2K [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m474.6/474.6 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m [2K [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.1/7.1 MB[0m [31m78.0 MB/s[0m eta [36m0:00:00[0m [2K [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.8/56.8 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m [2K [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.2/92.2 MB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m [2K [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m227.5/227.5 kB[0m [31m27.6 MB/s[0m eta [36m0:00:00[0m [2K [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m110.5/110.5 kB[0m [31m14.4 MB/s[0m eta [36m0:00:00[0m [2K [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m212.5/212.5 kB[0m [31m23.3 MB/s[0m eta [36m0:00:00[0m [2K [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.3/134.3 kB[0m [31m16.8 MB/s[0m eta [36m0:00:00[0m [2K [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m75.1 MB/s[0m eta [36m0:00:00[0m [2K [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m236.8/236.8 kB[0m [31m27.4 MB/s[0m eta [36m0:00:00[0m [2K [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m126.6 MB/s[0m eta [36m0:00:00[0m [2K [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m114.5/114.5 kB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m [2K [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m30.3 MB/s[0m eta [36m0:00:00[0m [2K [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m149.6/149.6 kB[0m [31m19.0 MB/s[0m eta [36m0:00:00[0m [?25h
Create dataset
!cat cnlps-caiccaic/train/in.tsv cnlps-caiccaic/dev-A/in.tsv > in.tsv
!cat cnlps-caiccaic/train/expected.tsv cnlps-caiccaic/dev-A/expected.tsv > expected.tsv
import csv
from datasets import Dataset, DatasetDict
with open('in.tsv', encoding='utf-8') as train_f_in, open('expected.tsv', encoding='utf-8') as train_f_exp:
train_list_in = list(csv.reader(train_f_in, delimiter='\t'))
train_list_exp = train_f_exp.readlines()
with open('cnlps-caiccaic/test-A/in.tsv', encoding='utf-8') as test_f_in:
test_list_in = list(csv.reader(test_f_in, delimiter='\t'))
train_data = Dataset.from_list([{'text': f'{in_[3]} language: {in_[1]}', 'intent': exp.strip().replace('}', ']').replace('{', '[').replace('\t', '|')} for in_, exp in zip(train_list_in, train_list_exp)])
test_data = Dataset.from_list([{'text': f'{in_[3]} language: {in_[1]}', 'intent': ''} for in_ in test_list_in])
dataset = DatasetDict({'train': train_data, 'test': test_data})
dataset['train'][600]
{'text': "don't alert me when an event in my calendar in location kenner begins language: en-US", 'intent': "Calendar|NotNotifyOnEventInLocation|['location': 'kenner']"}
Define training components
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
model_name = 'google/flan-t5-large'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map='auto')
Downloading (…)okenizer_config.json: 0%| | 0.00/2.54k [00:00<?, ?B/s]
Downloading spiece.model: 0%| | 0.00/792k [00:00<?, ?B/s]
Downloading (…)/main/tokenizer.json: 0%| | 0.00/2.42M [00:00<?, ?B/s]
Downloading (…)cial_tokens_map.json: 0%| | 0.00/2.20k [00:00<?, ?B/s]
Downloading (…)lve/main/config.json: 0%| | 0.00/662 [00:00<?, ?B/s]
Downloading pytorch_model.bin: 0%| | 0.00/3.13G [00:00<?, ?B/s]
Downloading (…)neration_config.json: 0%| | 0.00/147 [00:00<?, ?B/s]
def preprocess_function(sample):
inputs = ['Create an annotation based on the following data. ' + item for item in sample['text']]
model_inputs = tokenizer(inputs, padding=True, truncation=True)
labels = tokenizer(text_target=sample['intent'], padding=True, truncation=True)
labels['input_ids'] = [[(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels['input_ids']]
model_inputs['labels'] = labels['input_ids']
return model_inputs
tokenized_dataset = dataset.map(preprocess_function, batched=True, remove_columns=['text', 'intent'])
from transformers import DataCollatorForSeq2Seq
data_collator = DataCollatorForSeq2Seq(
tokenizer,
model=model
)
Training
from transformers import Seq2SeqTrainer, Seq2SeqTrainingArguments
output_dir='flan-t5_large'
training_args = Seq2SeqTrainingArguments(
output_dir=output_dir,
auto_find_batch_size=True,
num_train_epochs=1,
logging_dir=f'{output_dir}/logs',
save_steps=3000
)
trainer = Seq2SeqTrainer(
model=model,
args=training_args,
data_collator=data_collator,
train_dataset=tokenized_dataset['train']
)
model.config.use_cache = False
trainer.train()
[ 2/6437 : < :, Epoch 0.00/1]
Step | Training Loss |
---|
[12874/12874 3:22:15, Epoch 1/1]
Step | Training Loss |
---|---|
500 | 0.348400 |
1000 | 0.147000 |
1500 | 0.096300 |
2000 | 0.077400 |
2500 | 0.070700 |
3000 | 0.053100 |
3500 | 0.051700 |
4000 | 0.046900 |
4500 | 0.040200 |
5000 | 0.039800 |
5500 | 0.038500 |
6000 | 0.037600 |
6500 | 0.040100 |
7000 | 0.040800 |
7500 | 0.038600 |
8000 | 0.036000 |
8500 | 0.035600 |
9000 | 0.040000 |
9500 | 0.038300 |
10000 | 0.034400 |
10500 | 0.038000 |
11000 | 0.037300 |
11500 | 0.035400 |
12000 | 0.035500 |
12500 | 0.035800 |
TrainOutput(global_step=12874, training_loss=0.06059208474804456, metrics={'train_runtime': 12136.5406, 'train_samples_per_second': 4.243, 'train_steps_per_second': 1.061, 'total_flos': 2.330193322605773e+16, 'train_loss': 0.06059208474804456, 'epoch': 1.0})
Save the model
trainer.model.save_pretrained('results_v2')
tokenizer.save_pretrained('results_v2')
!rm -r /content/drive/MyDrive/caiccaic; mkdir /content/drive/MyDrive/caiccaic
!cp -r results_v2 /content/drive/MyDrive/caiccaic
# from google.colab import runtime
# runtime.unassign()
Inference
!cp -r /content/drive/MyDrive/results_v2 .
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import torch
model = AutoModelForSeq2SeqLM.from_pretrained('results_v2', device_map={'':0})
tokenizer = AutoTokenizer.from_pretrained('results_v2')
model.eval()
T5ForConditionalGeneration( (shared): Embedding(32128, 1024) (encoder): T5Stack( (embed_tokens): Embedding(32128, 1024) (block): ModuleList( (0): T5Block( (layer): ModuleList( (0): T5LayerSelfAttention( (SelfAttention): T5Attention( (q): Linear(in_features=1024, out_features=1024, bias=False) (k): Linear(in_features=1024, out_features=1024, bias=False) (v): Linear(in_features=1024, out_features=1024, bias=False) (o): Linear(in_features=1024, out_features=1024, bias=False) (relative_attention_bias): Embedding(32, 16) ) (layer_norm): T5LayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) (1): T5LayerFF( (DenseReluDense): T5DenseGatedActDense( (wi_0): Linear(in_features=1024, out_features=2816, bias=False) (wi_1): Linear(in_features=1024, out_features=2816, bias=False) (wo): Linear(in_features=2816, out_features=1024, bias=False) (dropout): Dropout(p=0.1, inplace=False) (act): NewGELUActivation() ) (layer_norm): T5LayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) ) (1-23): 23 x T5Block( (layer): ModuleList( (0): T5LayerSelfAttention( (SelfAttention): T5Attention( (q): Linear(in_features=1024, out_features=1024, bias=False) (k): Linear(in_features=1024, out_features=1024, bias=False) (v): Linear(in_features=1024, out_features=1024, bias=False) (o): Linear(in_features=1024, out_features=1024, bias=False) ) (layer_norm): T5LayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) (1): T5LayerFF( (DenseReluDense): T5DenseGatedActDense( (wi_0): Linear(in_features=1024, out_features=2816, bias=False) (wi_1): Linear(in_features=1024, out_features=2816, bias=False) (wo): Linear(in_features=2816, out_features=1024, bias=False) (dropout): Dropout(p=0.1, inplace=False) (act): NewGELUActivation() ) (layer_norm): T5LayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (final_layer_norm): T5LayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) (decoder): T5Stack( (embed_tokens): Embedding(32128, 1024) (block): ModuleList( (0): T5Block( (layer): ModuleList( (0): T5LayerSelfAttention( (SelfAttention): T5Attention( (q): Linear(in_features=1024, out_features=1024, bias=False) (k): Linear(in_features=1024, out_features=1024, bias=False) (v): Linear(in_features=1024, out_features=1024, bias=False) (o): Linear(in_features=1024, out_features=1024, bias=False) (relative_attention_bias): Embedding(32, 16) ) (layer_norm): T5LayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) (1): T5LayerCrossAttention( (EncDecAttention): T5Attention( (q): Linear(in_features=1024, out_features=1024, bias=False) (k): Linear(in_features=1024, out_features=1024, bias=False) (v): Linear(in_features=1024, out_features=1024, bias=False) (o): Linear(in_features=1024, out_features=1024, bias=False) ) (layer_norm): T5LayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) (2): T5LayerFF( (DenseReluDense): T5DenseGatedActDense( (wi_0): Linear(in_features=1024, out_features=2816, bias=False) (wi_1): Linear(in_features=1024, out_features=2816, bias=False) (wo): Linear(in_features=2816, out_features=1024, bias=False) (dropout): Dropout(p=0.1, inplace=False) (act): NewGELUActivation() ) (layer_norm): T5LayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) ) (1-23): 23 x T5Block( (layer): ModuleList( (0): T5LayerSelfAttention( (SelfAttention): T5Attention( (q): Linear(in_features=1024, out_features=1024, bias=False) (k): Linear(in_features=1024, out_features=1024, bias=False) (v): Linear(in_features=1024, out_features=1024, bias=False) (o): Linear(in_features=1024, out_features=1024, bias=False) ) (layer_norm): T5LayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) (1): T5LayerCrossAttention( (EncDecAttention): T5Attention( (q): Linear(in_features=1024, out_features=1024, bias=False) (k): Linear(in_features=1024, out_features=1024, bias=False) (v): Linear(in_features=1024, out_features=1024, bias=False) (o): Linear(in_features=1024, out_features=1024, bias=False) ) (layer_norm): T5LayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) (2): T5LayerFF( (DenseReluDense): T5DenseGatedActDense( (wi_0): Linear(in_features=1024, out_features=2816, bias=False) (wi_1): Linear(in_features=1024, out_features=2816, bias=False) (wo): Linear(in_features=2816, out_features=1024, bias=False) (dropout): Dropout(p=0.1, inplace=False) (act): NewGELUActivation() ) (layer_norm): T5LayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) ) ) ) (final_layer_norm): T5LayerNorm() (dropout): Dropout(p=0.1, inplace=False) ) (lm_head): Linear(in_features=1024, out_features=32128, bias=False) )
import numpy as np
from datasets import load_from_disk
from tqdm import tqdm
def evaluate_peft_model(sample):
outputs = model.generate(input_ids=torch.tensor(sample['input_ids']).unsqueeze(0).cuda(), max_new_tokens=512)
prediction = tokenizer.decode(outputs[0].detach().cpu().numpy(), skip_special_tokens=True)
return prediction
test_dataset = tokenized_dataset['test']
predictions, references = [], []
i = 0
for sample in tqdm(test_dataset):
p = evaluate_peft_model(sample)
if i % 100 == 0:
print('\n', p)
predictions.append(p)
i += 1
0%| | 0/10358 [00:00<?, ?it/s]/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:1255: UserWarning: You have modified the pretrained model configuration to control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use a generation configuration file (see https://huggingface.co/docs/transformers/main_classes/text_generation) warnings.warn( 0%| | 1/10358 [00:03<9:53:16, 3.44s/it]
Airconditioner|SetTemperatureOnDevice|['device_name': 'firmes stet']
1%| | 101/10358 [02:47<4:55:07, 1.73s/it]
Calendar|AddEventWithName|['event_name':'meeting with conremarch fisht']
2%|▏ | 201/10358 [04:57<3:07:32, 1.11s/it]
Calendar|NotifyOnEventStart|[]
3%|▎ | 301/10358 [07:53<6:24:20, 2.29s/it]
Console|ConsoleCP|['from': 'petrofactor symonichs', 'to': 'andriocuga belly']
4%|▍ | 401/10358 [10:26<2:49:24, 1.02s/it]
Console|ConsoleLS|['pathname': 'cities']
5%|▍ | 501/10358 [13:31<4:59:07, 1.82s/it]
Contacts|EditContactWithNumber|['phone_number': '203 5943']
6%|▌ | 601/10358 [16:41<6:53:27, 2.54s/it]
Contacts|ShowContactWithNumberAndWithEmail|['email': 'teresa.aguilera-peon@enron.com', 'phone_number': '011 48 193 424']
7%|▋ | 701/10358 [20:16<8:48:44, 3.29s/it]
Email|SendEmailToAddressWithSubjectAndWithMessage|['message': 'for their championship this week','subject': 'fw: new erisa case against enron', 'to': 'bettine@kpmg.com']
8%|▊ | 801/10358 [24:53<4:26:13, 1.67s/it]
Email|SendEmailToAddress|['to': 'herpequipomythson']
9%|▊ | 901/10358 [27:23<6:45:00, 2.57s/it]
Facebook|PostPictureWithUrlAndWithCaption|['caption': 'with agatha', 'picture_url': 'bit.ly/jtCvu']
10%|▉ | 1001/10358 [29:30<3:19:15, 1.28s/it]
Gdrive|OpenFileWithName|['file_name': 'adoby's cony h it']
11%|█ | 1101/10358 [31:36<3:28:44, 1.35s/it]
Instagram|ShowPicturesWithFilter|['filter': 'ludwig']
12%|█▏ | 1201/10358 [33:46<1:55:40, 1.32it/s]
Email|SendEmailToAddress|[]
13%|█▎ | 1301/10358 [36:09<3:14:43, 1.29s/it]
Email|SendEmailToAddress|['to':'revered']
14%|█▎ | 1401/10358 [38:47<2:57:12, 1.19s/it]
Speaker|DecreaseVolumeByPercent|['percent': '40']
14%|█▍ | 1501/10358 [41:04<4:14:40, 1.73s/it]
Spotify|AddSongWithNameToPlaylistWithName|['playlist':'solonce patifar','song': 'o my plailest work']
15%|█▌ | 1601/10358 [43:24<3:35:06, 1.47s/it]
Spotify|AddSongWithNameToPlaylistWithName|['playlist': 'i still see your shadows in my room','song': 'endemoniada']
16%|█▋ | 1701/10358 [46:02<5:06:01, 2.12s/it]
Spotify|PlaySongByArtist|['artist': 'terry hoax','song':'super fly']
17%|█▋ | 1801/10358 [49:46<10:40:08, 4.49s/it]
Translate|TranslateTextFromLanguageToLanguageWithEngine|['src_lang_es':'spanish', 'text_es':'mircles', 'translator': 'deeple', 'trg_lang_de': 'german']
18%|█▊ | 1901/10358 [57:13<10:31:03, 4.48s/it]
Translate|TranslateTextFromLanguageToLanguageWithEngine|['src_lang_de': 'german', 'text_de': 'western', 'translator':'microsoft', 'trg_lang_es':'spanish']
19%|█▉ | 2001/10358 [1:04:34<10:21:17, 4.46s/it]
Translate|TranslateTextFromLanguageToLanguageWithEngine|['src_lang_pl': 'polish', 'text_pl':'stro', 'translator': 'gogel', 'trg_lang_es':'spanish']
20%|██ | 2101/10358 [1:11:50<8:21:13, 3.64s/it]
Translate|TranslateTextFromLanguageToLanguage|['src_lang_it': 'italian', 'text_it':'so lotted', 'trg_lang_en': 'english']
21%|██ | 2201/10358 [1:17:55<8:36:33, 3.80s/it]
Translate|TranslateTextFromLanguageToLanguage|['src_lang_de': 'german', 'text_de': 'bitine coal and mighty rump', 'trg_lang_pl': 'polish']
22%|██▏ | 2301/10358 [1:23:58<7:57:41, 3.56s/it]
Translate|TranslateTextFromLanguageToLanguage|['src_lang_fr': 'french', 'text_fr': 'des légumes', 'trg_lang_de': 'german']
23%|██▎ | 2401/10358 [1:28:28<3:59:16, 1.80s/it]
Translate|TranslateText|['text_en': 'i do i translate with pleasure']
24%|██▍ | 2501/10358 [1:31:04<2:30:25, 1.15s/it]
Weather|WeatherInLocation|['location': 'kinzers']
25%|██▌ | 2601/10358 [1:33:32<3:44:53, 1.74s/it]
Websearch|SearchImagesWithTextOnEngineWithWidthAndWithHeightAndWithCondition|[]
26%|██▌ | 2701/10358 [1:36:27<3:28:04, 1.63s/it]
Websearch|SearchImagesWithTextOnEngineWithWidthAndWithHeightAndWithCondition|[]
27%|██▋ | 2801/10358 [1:39:27<3:21:15, 1.60s/it]
Websearch|SearchImagesWithTextOnEngineWithWidthAndWithHeightAndWithCondition|['img_query': 'heisenberg']
28%|██▊ | 2901/10358 [1:42:24<2:38:30, 1.28s/it]
Websearch|SearchImagesWithTextOnEngineWithWidthAndWithHeight|[]
29%|██▉ | 3001/10358 [1:44:57<3:40:46, 1.80s/it]
Websearch|SearchTextOnEngine|['txt_query': 'nhl scores']
30%|██▉ | 3101/10358 [1:46:37<1:22:15, 1.47it/s]
Wikipedia|GoToElementNumber|[]
31%|███ | 3201/10358 [1:48:59<4:13:47, 2.13s/it]
Yelp|SearchByQueryInLocation|['location':'swan river', 'query': 'chicken parmesan']
32%|███▏ | 3301/10358 [1:51:53<2:39:36, 1.36s/it]
Youtube|FindQuery|['query': 'troy lanez']
33%|███▎ | 3401/10358 [1:54:09<3:18:56, 1.72s/it]
Calendar|AddEventWithName|['event_name': 'zubrzyca']
34%|███▍ | 3501/10358 [1:56:12<2:33:54, 1.35s/it]
Calendar|NotifyOnEventInLocation|['location': 'klubie lustra']
35%|███▍ | 3601/10358 [1:59:30<2:13:16, 1.18s/it]
Console|ConsoleEdit|['filename': 'p1_babych_pdf']
36%|███▌ | 3701/10358 [2:02:50<2:33:04, 1.38s/it]
Contacts|EditContactWithNumber|['phone_number': '55 469 53 16']
37%|███▋ | 3801/10358 [2:06:21<4:58:36, 2.73s/it]
Contacts|ShowContactWithNumberAndWithEmail|['email':'mojesz@onet.pl', 'phone_number': '+48 783 492 786']
38%|███▊ | 3901/10358 [2:10:22<4:51:48, 2.71s/it]
Email|SendEmailToAddressWithSubject|['subject': 'przewraliwiany szcznie dorosa munda']
39%|███▊ | 4001/10358 [2:16:28<3:25:05, 1.94s/it]
Email|ShowEmailFromSender|['sender_address': 'ludmia13277@onet.pl']
40%|███▉ | 4101/10358 [2:18:40<2:59:42, 1.72s/it]
Facebook|PostPictureWithUrl|['picture_url': 'bit.ly/gruffly']
41%|████ | 4201/10358 [2:22:16<1:57:30, 1.15s/it]
Fitbit|NotifyOnWeight|['weight': '88 kg']
42%|████▏ | 4301/10358 [2:24:33<2:44:31, 1.63s/it]
Fitbit|ShowStepsOnDate|['date': '18 czerwca']
42%|████▏ | 4401/10358 [2:26:48<2:16:49, 1.38s/it]
Gdrive|ShowFilesWithSize|['file_size': '50 mb']
43%|████▎ | 4501/10358 [2:28:59<1:16:34, 1.27it/s]
Instagram|ShowPictures|[]
44%|████▍ | 4601/10358 [2:31:15<2:17:08, 1.43s/it]
Instagram|ShowPicturesWithLocation|['location': 'grenola']
45%|████▌ | 4701/10358 [2:34:08<3:36:14, 2.29s/it]
News|NotifyWhenPortalUpdatesInSection|['portal': 'wp.pl','section': 'gwiazdy']
46%|████▋ | 4801/10358 [2:36:34<1:20:07, 1.16it/s]
Phone|CallEmergency|[]
47%|████▋ | 4901/10358 [2:39:09<1:47:22, 1.18s/it]
Slack|CheckMessagesFromUser|['sender': 'j.winiewski']
48%|████▊ | 5001/10358 [2:41:17<1:59:04, 1.33s/it]
Spotify|AddSongToPlaylistWithName|['playlist':'signed xoxo']
49%|████▉ | 5101/10358 [2:43:39<46:10, 1.90it/s]
Spotify|OpenSpotify|[]
50%|█████ | 5201/10358 [2:46:06<2:41:06, 1.87s/it]
Spotify|PlaySongByArtist|['artist': 'organek','song':'sarajevo grade moj']
51%|█████ | 5301/10358 [2:48:52<3:25:30, 2.44s/it]
Translate|TranslateTextFromLanguage|['src_lang_en': 'angielskiego', 'text_en':'south']
52%|█████▏ | 5401/10358 [2:53:21<3:33:55, 2.59s/it]
Translate|TranslateTextFromLanguage|['src_lang_en': 'angielskiego', 'text_en': 'does this bus stop in busan']
53%|█████▎ | 5501/10358 [2:59:19<5:19:55, 3.95s/it]
Translate|TranslateTextFromLanguageToLanguage|['src_lang_en': 'angielskiego', 'text_en': 'does the room come with bedsheets', 'trg_lang_pl': 'polski']
54%|█████▍ | 5601/10358 [3:06:49<6:08:33, 4.65s/it]
Translate|TranslateTextFromLanguageToLanguageWithEngine|['src_lang_es': 'hiszpaskiego', 'text_es': 'la próxima semana', 'translator':'microsoft', 'trg_lang_it': 'woski']
55%|█████▌ | 5701/10358 [3:14:42<5:59:08, 4.63s/it]
Translate|TranslateTextFromLanguageToLanguageWithEngine|['src_lang_en': 'angielskiego', 'text_en': 'november', 'translator': 'google', 'trg_lang_it': 'woski']
56%|█████▌ | 5801/10358 [3:18:24<2:46:55, 2.20s/it]
Weather|SunriseInLocation|['location': 'róanka']
57%|█████▋ | 5901/10358 [3:21:02<2:56:27, 2.38s/it]
Websearch|SearchImagesWithTextOnEngineWithWidthAndWithHeight|['img_query': 'koptyjski zrozumiao']
58%|█████▊ | 6001/10358 [3:25:01<3:04:41, 2.54s/it]
Websearch|SearchImagesWithTextOnEngineWithWidthAndWithHeightAndWithCondition|['img_query': 'kruchuteki ucisk']
59%|█████▉ | 6101/10358 [3:29:02<2:50:21, 2.40s/it]
Websearch|SearchImagesWithTextOnEngineWithWidthAndWithHeightAndWithCondition|['img_query': 'kolorowanki wielkanocne']
60%|█████▉ | 6201/10358 [3:32:31<1:43:03, 1.49s/it]
Websearch|SearchText|['txt_query': 'trackim tuck tucko']
61%|██████ | 6301/10358 [3:34:27<1:16:13, 1.13s/it]
Yelp|OpenRestaurantsInLocation|['location': 'wrzeszcz dolny']
62%|██████▏ | 6401/10358 [3:37:40<2:21:13, 2.14s/it]
Yelp|SearchByReviewCountInLocation|['location': 'poplar branch','review_count': '40']
63%|██████▎ | 6501/10358 [3:40:28<2:10:15, 2.03s/it]
Youtube|FindQueryOnChannel|['channel_id': 'laughing stock', 'query': 'barier cardi']
64%|██████▎ | 6601/10358 [3:44:00<2:36:07, 2.49s/it]
Youtube|FindQueryOnChannel|['channel_id': 'hearthstone legend', 'query': 'kabarety 2019 najnowsze']
65%|██████▍ | 6701/10358 [3:47:35<2:04:56, 2.05s/it]
Youtube|FindQueryOnChannel|['channel_id':'resourceniche', 'query': 'polska tahiti']
66%|██████▌ | 6801/10358 [3:50:27<50:32, 1.17it/s]
Youtube|NotifyOnNewFromFollowing|[]
67%|██████▋ | 6901/10358 [3:53:37<4:42:36, 4.91s/it]
Email|SendEmailToAddressWithSubjectAndWithMessage|['message': 'durante los próximos tres das o tendrás el 50 de descuento que','subject': 'durante los próximos tres das o tendrás el 50 de descuento que', 'to': 'chaya y soh mayer ir']
68%|██████▊ | 7001/10358 [3:57:03<1:56:39, 2.09s/it]
Calendar|CheckCalendarOnDate|['date': 'l'ultima matricola']
69%|██████▊ | 7101/10358 [3:59:17<1:27:44, 1.62s/it]
Airconditioner|SetTemperatureToValue|['value': '24 grados']
70%|██████▉ | 7201/10358 [4:02:06<1:11:36, 1.36s/it]
Calendar|CheckCalendarOnDate|['date': 'el 1 de agosto']
70%|███████ | 7301/10358 [4:04:14<1:18:49, 1.55s/it]
Contacts|ShowContactWithEmail|['email': 'luis@gmail.com']
71%|███████▏ | 7401/10358 [4:07:49<2:17:35, 2.79s/it]
Contacts|ShowContactWithNumberAndWithEmail|['email': 'rupert.yanni@apx.com', 'phone_number': '447 349 366']
72%|███████▏ | 7501/10358 [4:11:31<39:56, 1.19it/s]
Email|SendEmail|[]
73%|███████▎ | 7601/10358 [4:14:15<1:51:32, 2.43s/it]
Email|SendEmailToAddressWithSubject|['subject': 'fwd more than their fair share', 'to': 'eugenio']
74%|███████▍ | 7701/10358 [4:19:24<3:01:40, 4.10s/it]
Email|SendEmailToAddressWithSubjectAndWithMessage|['message': 'ahora viene mi chico','subject': 'Te gustara vivir la mejor experiencia de tu vida?', 'to': 'kena']
75%|███████▌ | 7801/10358 [4:26:21<2:56:35, 4.14s/it]
Email|SendEmailToAddressWithSubjectAndWithMessage|['message': 'que poquito que falta para cataratas','subject':'southwest into california', 'to': 'chuy']
76%|███████▋ | 7901/10358 [4:33:16<2:56:17, 4.31s/it]
Email|SendEmailToAddressWithSubjectAndWithMessage|['message': 'rt si te gusta trollear','subject': 'here is the complete email', 'to':'montse mendizabal']
77%|███████▋ | 8001/10358 [4:40:05<2:35:14, 3.95s/it]
Email|SendEmailToAddressWithSubjectAndWithMessage|['message': 'lo acabo y me voy a dormir','subject': 'ceo letter', 'to':'salva@hotmail.es']
78%|███████▊ | 8101/10358 [4:47:06<2:45:26, 4.40s/it]
Email|SendEmailToAddressWithSubjectAndWithMessage|['message': 'hoy a tomar tere con milaa','subject': 'have played here a few times', 'to': 'jenelle@hotmail.com']
79%|███████▉ | 8201/10358 [4:53:58<2:35:31, 4.33s/it]
Email|SendEmailToAddressWithSubjectAndWithMessage|['message': 'cada vez mas viejas lisitas','subject': 'the new dublin campus next year', 'to': 'roselyn@proton.com']
80%|████████ | 8301/10358 [4:58:21<51:18, 1.50s/it]
Email|ShowEmailWithSubject|['subject':'summarizing all media coverage']
81%|████████ | 8401/10358 [5:01:03<1:53:03, 3.47s/it]
Facebook|PostPictureWithUrlAndWithCaption|['caption': 'le pregunté a mi perro cómo me veo por eso lo amo tanto', 'picture_url': 'bit.ly/NIAGyW3']
82%|████████▏ | 8501/10358 [5:03:38<37:21, 1.21s/it]
Facebook|PostAlbumOfTypeByArtist|['album': 'vacaciones en argentina 2019']
83%|████████▎ | 8601/10358 [5:05:33<42:13, 1.44s/it]
Instagram|ShowPicturesWithFilter|['filter': 'gingham']
84%|████████▍ | 8701/10358 [5:07:50<55:57, 2.03s/it]
News|NotifyWhenPortalUpdatesInSection|['portal': 'la voz de galicia','section': 'deporte']
85%|████████▍ | 8801/10358 [5:11:03<49:30, 1.91s/it]
News|ShowNewsFromSection|['portal': 'el pas','section': 'deporte']
86%|████████▌ | 8901/10358 [5:14:16<51:23, 2.12s/it]
Slack|SendMessageToChannel|['channel': 'citibank','message': 'excelente fin de semana']
87%|████████▋ | 9001/10358 [5:16:36<52:36, 2.33s/it]
Spotify|AddSongWithNameToPlaylistWithName|['playlist':'soulfull disco','song': 'how i see it']
88%|████████▊ | 9101/10358 [5:18:57<20:56, 1.00it/s]
Spotify|PlaySong|['song':'silent edge']
89%|████████▉ | 9201/10358 [5:21:15<40:07, 2.08s/it]
Spotify|PlaySongByArtist|['artist': 'jonathan larson','song':'song x']
90%|████████▉ | 9301/10358 [5:24:38<34:06, 1.94s/it]
Spotify|PlaySongByArtist|['artist': 'hard stance','song': 'on my toes']
91%|█████████ | 9401/10358 [5:28:07<49:29, 3.10s/it]
Translate|TranslateTextFromLanguageToLanguage|['src_lang': 'fránces', 'text':'marrón', 'trg_lang': 'espaol']
92%|█████████▏| 9501/10358 [5:33:07<21:48, 1.53s/it]
Calendar|CheckCalendarOnDate|['date': 'fase de la luna para croacia']
93%|█████████▎| 9601/10358 [5:35:48<24:26, 1.94s/it]
Websearch|SearchImagesWithTextOnEngineWithWidthAndWithHeight|['img_query':'mechas balayage']
94%|█████████▎| 9701/10358 [5:39:09<21:16, 1.94s/it]
Websearch|SearchImagesWithTextOnEngineWithWidthAndWithHeight|['img_query': 'la roca village']
95%|█████████▍| 9801/10358 [5:42:42<20:17, 2.19s/it]
Websearch|SearchImagesWithTextOnEngineWithWidthAndWithHeightAndWithCondition|['img_query': 'calendario febrero 2020 para imprimir']
96%|█████████▌| 9901/10358 [5:46:23<15:14, 2.00s/it]
Websearch|SearchImagesWithTextOnEngineWithWidthAndWithHeightAndWithCondition|['img_query': 'formacion']
97%|█████████▋| 10001/10358 [5:50:01<12:44, 2.14s/it]
Websearch|SearchImagesWithTextOnEngineWithWidthAndWithHeightAndWithCondition|['img_query':'media melena']
98%|█████████▊| 10101/10358 [5:53:41<10:01, 2.34s/it]
Websearch|SearchImagesWithTextOnEngineWithWidthAndWithHeightAndWithCondition|['img_query':'memes graciosos']
98%|█████████▊| 10201/10358 [5:57:18<05:42, 2.18s/it]
Websearch|SearchImagesWithTextOnEngineWithWidthAndWithHeightAndWithCondition|['img_query': 'rita maestre']
99%|█████████▉| 10301/10358 [6:00:14<01:35, 1.67s/it]
Yelp|SearchByQueryInLocation|['location': 'aulesti', 'query': 'cerveceras artesanales y pubs']
100%|██████████| 10358/10358 [6:01:25<00:00, 2.09s/it]
# format predictions to target shape
predictions = [prediction.replace('|', '\t', 3).replace('[', '{').replace(']', '}').strip() for prediction in predictions]
with open('/content/cnlps-caiccaic/test-A/out_v2.tsv', 'w', encoding='utf-8') as out:
for pred in predictions:
out.write(pred + '\n')
import locale
locale.getpreferredencoding = lambda: "UTF-8"
!cp /content/cnlps-caiccaic/test-A/out_v2.tsv /content/drive/MyDrive/
from google.colab import runtime
runtime.unassign()