cnlps-caiccaic/solution.ipynb
2023-06-08 20:23:57 +02:00

167 KiB
Raw Blame History

Setup

from google.colab import drive
drive.mount('/content/drive')
Mounted at /content/drive
!git clone https://github.com/kubapok/cnlps-caiccaic.git
Cloning into 'cnlps-caiccaic'...
remote: Enumerating objects: 73, done.
remote: Counting objects: 100% (73/73), done.
remote: Compressing objects: 100% (56/56), done.
remote: Total 73 (delta 32), reused 41 (delta 11), pack-reused 0
Unpacking objects: 100% (73/73), 1.89 MiB | 4.48 MiB/s, done.
!pip install -Uq datasets transformers peft bitsandbytes loralib accelerate
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 474.6/474.6 kB 10.4 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 7.1/7.1 MB 78.0 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 56.8/56.8 kB 6.3 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 92.2/92.2 MB 7.9 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 227.5/227.5 kB 27.6 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 110.5/110.5 kB 14.4 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 212.5/212.5 kB 23.3 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 134.3/134.3 kB 16.8 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.0/1.0 MB 75.1 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 236.8/236.8 kB 27.4 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 7.8/7.8 MB 126.6 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 114.5/114.5 kB 12.1 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 268.8/268.8 kB 30.3 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 149.6/149.6 kB 19.0 MB/s eta 0:00:00
[?25h

Create dataset

!cat cnlps-caiccaic/train/in.tsv  cnlps-caiccaic/dev-A/in.tsv > in.tsv
!cat cnlps-caiccaic/train/expected.tsv  cnlps-caiccaic/dev-A/expected.tsv > expected.tsv
import csv
from datasets import Dataset, DatasetDict

with open('in.tsv', encoding='utf-8') as train_f_in, open('expected.tsv', encoding='utf-8') as train_f_exp:
  train_list_in = list(csv.reader(train_f_in, delimiter='\t'))
  train_list_exp = train_f_exp.readlines()

with open('cnlps-caiccaic/test-A/in.tsv', encoding='utf-8') as test_f_in:
  test_list_in = list(csv.reader(test_f_in, delimiter='\t'))

train_data = Dataset.from_list([{'text': f'{in_[3]} language: {in_[1]}', 'intent': exp.strip().replace('}', ']').replace('{', '[').replace('\t', '|')} for in_, exp in zip(train_list_in, train_list_exp)])
test_data = Dataset.from_list([{'text': f'{in_[3]} language: {in_[1]}', 'intent': ''} for in_ in test_list_in])
dataset = DatasetDict({'train': train_data, 'test': test_data})
dataset['train'][600]
{'text': "don't alert me when an event in my calendar in location kenner begins language: en-US",
 'intent': "Calendar|NotNotifyOnEventInLocation|['location': 'kenner']"}

Define training components

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

model_name = 'google/flan-t5-large'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map='auto')
Downloading (…)okenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]
Downloading spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]
Downloading (…)/main/tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]
Downloading (…)cial_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]
Downloading (…)lve/main/config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]
Downloading pytorch_model.bin:   0%|          | 0.00/3.13G [00:00<?, ?B/s]
Downloading (…)neration_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]
def preprocess_function(sample):
    inputs = ['Create an annotation based on the following data. ' + item for item in sample['text']]
    model_inputs = tokenizer(inputs, padding=True, truncation=True)
    labels = tokenizer(text_target=sample['intent'], padding=True, truncation=True)
    labels['input_ids'] = [[(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels['input_ids']]
    model_inputs['labels'] = labels['input_ids']
    return model_inputs

tokenized_dataset = dataset.map(preprocess_function, batched=True, remove_columns=['text', 'intent'])
from transformers import DataCollatorForSeq2Seq

data_collator = DataCollatorForSeq2Seq(
    tokenizer,
    model=model
)

Training

from transformers import Seq2SeqTrainer, Seq2SeqTrainingArguments

output_dir='flan-t5_large'

training_args = Seq2SeqTrainingArguments(
    output_dir=output_dir,
	  auto_find_batch_size=True,
    num_train_epochs=1,
    logging_dir=f'{output_dir}/logs',
    save_steps=3000
)

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=tokenized_dataset['train']
)
model.config.use_cache = False 
trainer.train()
[ 2/6437 : < :, Epoch 0.00/1]
Step Training Loss

[12874/12874 3:22:15, Epoch 1/1]
Step Training Loss
500 0.348400
1000 0.147000
1500 0.096300
2000 0.077400
2500 0.070700
3000 0.053100
3500 0.051700
4000 0.046900
4500 0.040200
5000 0.039800
5500 0.038500
6000 0.037600
6500 0.040100
7000 0.040800
7500 0.038600
8000 0.036000
8500 0.035600
9000 0.040000
9500 0.038300
10000 0.034400
10500 0.038000
11000 0.037300
11500 0.035400
12000 0.035500
12500 0.035800

TrainOutput(global_step=12874, training_loss=0.06059208474804456, metrics={'train_runtime': 12136.5406, 'train_samples_per_second': 4.243, 'train_steps_per_second': 1.061, 'total_flos': 2.330193322605773e+16, 'train_loss': 0.06059208474804456, 'epoch': 1.0})

Save the model

trainer.model.save_pretrained('results_v2')
tokenizer.save_pretrained('results_v2')
!rm -r /content/drive/MyDrive/caiccaic; mkdir /content/drive/MyDrive/caiccaic
!cp -r results_v2 /content/drive/MyDrive/caiccaic
# from google.colab import runtime
# runtime.unassign()

Inference

!cp -r /content/drive/MyDrive/results_v2 .
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import torch

model = AutoModelForSeq2SeqLM.from_pretrained('results_v2', device_map={'':0})
tokenizer = AutoTokenizer.from_pretrained('results_v2')
model.eval()
T5ForConditionalGeneration(
  (shared): Embedding(32128, 1024)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 1024)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=1024, out_features=1024, bias=False)
              (k): Linear(in_features=1024, out_features=1024, bias=False)
              (v): Linear(in_features=1024, out_features=1024, bias=False)
              (o): Linear(in_features=1024, out_features=1024, bias=False)
              (relative_attention_bias): Embedding(32, 16)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseGatedActDense(
              (wi_0): Linear(in_features=1024, out_features=2816, bias=False)
              (wi_1): Linear(in_features=1024, out_features=2816, bias=False)
              (wo): Linear(in_features=2816, out_features=1024, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
              (act): NewGELUActivation()
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (1-23): 23 x T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=1024, out_features=1024, bias=False)
              (k): Linear(in_features=1024, out_features=1024, bias=False)
              (v): Linear(in_features=1024, out_features=1024, bias=False)
              (o): Linear(in_features=1024, out_features=1024, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseGatedActDense(
              (wi_0): Linear(in_features=1024, out_features=2816, bias=False)
              (wi_1): Linear(in_features=1024, out_features=2816, bias=False)
              (wo): Linear(in_features=2816, out_features=1024, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
              (act): NewGELUActivation()
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
    )
    (final_layer_norm): T5LayerNorm()
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (decoder): T5Stack(
    (embed_tokens): Embedding(32128, 1024)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=1024, out_features=1024, bias=False)
              (k): Linear(in_features=1024, out_features=1024, bias=False)
              (v): Linear(in_features=1024, out_features=1024, bias=False)
              (o): Linear(in_features=1024, out_features=1024, bias=False)
              (relative_attention_bias): Embedding(32, 16)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerCrossAttention(
            (EncDecAttention): T5Attention(
              (q): Linear(in_features=1024, out_features=1024, bias=False)
              (k): Linear(in_features=1024, out_features=1024, bias=False)
              (v): Linear(in_features=1024, out_features=1024, bias=False)
              (o): Linear(in_features=1024, out_features=1024, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (2): T5LayerFF(
            (DenseReluDense): T5DenseGatedActDense(
              (wi_0): Linear(in_features=1024, out_features=2816, bias=False)
              (wi_1): Linear(in_features=1024, out_features=2816, bias=False)
              (wo): Linear(in_features=2816, out_features=1024, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
              (act): NewGELUActivation()
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (1-23): 23 x T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=1024, out_features=1024, bias=False)
              (k): Linear(in_features=1024, out_features=1024, bias=False)
              (v): Linear(in_features=1024, out_features=1024, bias=False)
              (o): Linear(in_features=1024, out_features=1024, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerCrossAttention(
            (EncDecAttention): T5Attention(
              (q): Linear(in_features=1024, out_features=1024, bias=False)
              (k): Linear(in_features=1024, out_features=1024, bias=False)
              (v): Linear(in_features=1024, out_features=1024, bias=False)
              (o): Linear(in_features=1024, out_features=1024, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (2): T5LayerFF(
            (DenseReluDense): T5DenseGatedActDense(
              (wi_0): Linear(in_features=1024, out_features=2816, bias=False)
              (wi_1): Linear(in_features=1024, out_features=2816, bias=False)
              (wo): Linear(in_features=2816, out_features=1024, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
              (act): NewGELUActivation()
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
    )
    (final_layer_norm): T5LayerNorm()
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (lm_head): Linear(in_features=1024, out_features=32128, bias=False)
)
import numpy as np
from datasets import load_from_disk
from tqdm import tqdm

def evaluate_peft_model(sample):
    outputs = model.generate(input_ids=torch.tensor(sample['input_ids']).unsqueeze(0).cuda(), max_new_tokens=512) 
    prediction = tokenizer.decode(outputs[0].detach().cpu().numpy(), skip_special_tokens=True)
    return prediction

test_dataset = tokenized_dataset['test']

predictions, references = [], []
i = 0
for sample in tqdm(test_dataset):
  p = evaluate_peft_model(sample)
  if i % 100 == 0:
    print('\n', p)
  predictions.append(p)
  i += 1
  0%|          | 0/10358 [00:00<?, ?it/s]/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:1255: UserWarning: You have modified the pretrained model configuration to control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use a generation configuration file (see https://huggingface.co/docs/transformers/main_classes/text_generation)
  warnings.warn(
  0%|          | 1/10358 [00:03<9:53:16,  3.44s/it]
 Airconditioner|SetTemperatureOnDevice|['device_name': 'firmes stet']
  1%|          | 101/10358 [02:47<4:55:07,  1.73s/it]
 Calendar|AddEventWithName|['event_name':'meeting with conremarch fisht']
  2%|▏         | 201/10358 [04:57<3:07:32,  1.11s/it]
 Calendar|NotifyOnEventStart|[]
  3%|▎         | 301/10358 [07:53<6:24:20,  2.29s/it]
 Console|ConsoleCP|['from': 'petrofactor symonichs', 'to': 'andriocuga belly']
  4%|▍         | 401/10358 [10:26<2:49:24,  1.02s/it]
 Console|ConsoleLS|['pathname': 'cities']
  5%|▍         | 501/10358 [13:31<4:59:07,  1.82s/it]
 Contacts|EditContactWithNumber|['phone_number': '203 5943']
  6%|▌         | 601/10358 [16:41<6:53:27,  2.54s/it]
 Contacts|ShowContactWithNumberAndWithEmail|['email': 'teresa.aguilera-peon@enron.com', 'phone_number': '011 48 193 424']
  7%|▋         | 701/10358 [20:16<8:48:44,  3.29s/it]
 Email|SendEmailToAddressWithSubjectAndWithMessage|['message': 'for their championship this week','subject': 'fw: new erisa case against enron', 'to': 'bettine@kpmg.com']
  8%|▊         | 801/10358 [24:53<4:26:13,  1.67s/it]
 Email|SendEmailToAddress|['to': 'herpequipomythson']
  9%|▊         | 901/10358 [27:23<6:45:00,  2.57s/it]
 Facebook|PostPictureWithUrlAndWithCaption|['caption': 'with agatha', 'picture_url': 'bit.ly/jtCvu']
 10%|▉         | 1001/10358 [29:30<3:19:15,  1.28s/it]
 Gdrive|OpenFileWithName|['file_name': 'adoby's cony h it']
 11%|█         | 1101/10358 [31:36<3:28:44,  1.35s/it]
 Instagram|ShowPicturesWithFilter|['filter': 'ludwig']
 12%|█▏        | 1201/10358 [33:46<1:55:40,  1.32it/s]
 Email|SendEmailToAddress|[]
 13%|█▎        | 1301/10358 [36:09<3:14:43,  1.29s/it]
 Email|SendEmailToAddress|['to':'revered']
 14%|█▎        | 1401/10358 [38:47<2:57:12,  1.19s/it]
 Speaker|DecreaseVolumeByPercent|['percent': '40']
 14%|█▍        | 1501/10358 [41:04<4:14:40,  1.73s/it]
 Spotify|AddSongWithNameToPlaylistWithName|['playlist':'solonce patifar','song': 'o my plailest work']
 15%|█▌        | 1601/10358 [43:24<3:35:06,  1.47s/it]
 Spotify|AddSongWithNameToPlaylistWithName|['playlist': 'i still see your shadows in my room','song': 'endemoniada']
 16%|█▋        | 1701/10358 [46:02<5:06:01,  2.12s/it]
 Spotify|PlaySongByArtist|['artist': 'terry hoax','song':'super fly']
 17%|█▋        | 1801/10358 [49:46<10:40:08,  4.49s/it]
 Translate|TranslateTextFromLanguageToLanguageWithEngine|['src_lang_es':'spanish', 'text_es':'mircles', 'translator': 'deeple', 'trg_lang_de': 'german']
 18%|█▊        | 1901/10358 [57:13<10:31:03,  4.48s/it]
 Translate|TranslateTextFromLanguageToLanguageWithEngine|['src_lang_de': 'german', 'text_de': 'western', 'translator':'microsoft', 'trg_lang_es':'spanish']
 19%|█▉        | 2001/10358 [1:04:34<10:21:17,  4.46s/it]
 Translate|TranslateTextFromLanguageToLanguageWithEngine|['src_lang_pl': 'polish', 'text_pl':'stro', 'translator': 'gogel', 'trg_lang_es':'spanish']
 20%|██        | 2101/10358 [1:11:50<8:21:13,  3.64s/it]
 Translate|TranslateTextFromLanguageToLanguage|['src_lang_it': 'italian', 'text_it':'so lotted', 'trg_lang_en': 'english']
 21%|██        | 2201/10358 [1:17:55<8:36:33,  3.80s/it]
 Translate|TranslateTextFromLanguageToLanguage|['src_lang_de': 'german', 'text_de': 'bitine coal and mighty rump', 'trg_lang_pl': 'polish']
 22%|██▏       | 2301/10358 [1:23:58<7:57:41,  3.56s/it]
 Translate|TranslateTextFromLanguageToLanguage|['src_lang_fr': 'french', 'text_fr': 'des légumes', 'trg_lang_de': 'german']
 23%|██▎       | 2401/10358 [1:28:28<3:59:16,  1.80s/it]
 Translate|TranslateText|['text_en': 'i do i translate with pleasure']
 24%|██▍       | 2501/10358 [1:31:04<2:30:25,  1.15s/it]
 Weather|WeatherInLocation|['location': 'kinzers']
 25%|██▌       | 2601/10358 [1:33:32<3:44:53,  1.74s/it]
 Websearch|SearchImagesWithTextOnEngineWithWidthAndWithHeightAndWithCondition|[]
 26%|██▌       | 2701/10358 [1:36:27<3:28:04,  1.63s/it]
 Websearch|SearchImagesWithTextOnEngineWithWidthAndWithHeightAndWithCondition|[]
 27%|██▋       | 2801/10358 [1:39:27<3:21:15,  1.60s/it]
 Websearch|SearchImagesWithTextOnEngineWithWidthAndWithHeightAndWithCondition|['img_query': 'heisenberg']
 28%|██▊       | 2901/10358 [1:42:24<2:38:30,  1.28s/it]
 Websearch|SearchImagesWithTextOnEngineWithWidthAndWithHeight|[]
 29%|██▉       | 3001/10358 [1:44:57<3:40:46,  1.80s/it]
 Websearch|SearchTextOnEngine|['txt_query': 'nhl scores']
 30%|██▉       | 3101/10358 [1:46:37<1:22:15,  1.47it/s]
 Wikipedia|GoToElementNumber|[]
 31%|███       | 3201/10358 [1:48:59<4:13:47,  2.13s/it]
 Yelp|SearchByQueryInLocation|['location':'swan river', 'query': 'chicken parmesan']
 32%|███▏      | 3301/10358 [1:51:53<2:39:36,  1.36s/it]
 Youtube|FindQuery|['query': 'troy lanez']
 33%|███▎      | 3401/10358 [1:54:09<3:18:56,  1.72s/it]
 Calendar|AddEventWithName|['event_name': 'zubrzyca']
 34%|███▍      | 3501/10358 [1:56:12<2:33:54,  1.35s/it]
 Calendar|NotifyOnEventInLocation|['location': 'klubie lustra']
 35%|███▍      | 3601/10358 [1:59:30<2:13:16,  1.18s/it]
 Console|ConsoleEdit|['filename': 'p1_babych_pdf']
 36%|███▌      | 3701/10358 [2:02:50<2:33:04,  1.38s/it]
 Contacts|EditContactWithNumber|['phone_number': '55 469 53 16']
 37%|███▋      | 3801/10358 [2:06:21<4:58:36,  2.73s/it]
 Contacts|ShowContactWithNumberAndWithEmail|['email':'mojesz@onet.pl', 'phone_number': '+48 783 492 786']
 38%|███▊      | 3901/10358 [2:10:22<4:51:48,  2.71s/it]
 Email|SendEmailToAddressWithSubject|['subject': 'przewraliwiany szcznie dorosa munda']
 39%|███▊      | 4001/10358 [2:16:28<3:25:05,  1.94s/it]
 Email|ShowEmailFromSender|['sender_address': 'ludmia13277@onet.pl']
 40%|███▉      | 4101/10358 [2:18:40<2:59:42,  1.72s/it]
 Facebook|PostPictureWithUrl|['picture_url': 'bit.ly/gruffly']
 41%|████      | 4201/10358 [2:22:16<1:57:30,  1.15s/it]
 Fitbit|NotifyOnWeight|['weight': '88 kg']
 42%|████▏     | 4301/10358 [2:24:33<2:44:31,  1.63s/it]
 Fitbit|ShowStepsOnDate|['date': '18 czerwca']
 42%|████▏     | 4401/10358 [2:26:48<2:16:49,  1.38s/it]
 Gdrive|ShowFilesWithSize|['file_size': '50 mb']
 43%|████▎     | 4501/10358 [2:28:59<1:16:34,  1.27it/s]
 Instagram|ShowPictures|[]
 44%|████▍     | 4601/10358 [2:31:15<2:17:08,  1.43s/it]
 Instagram|ShowPicturesWithLocation|['location': 'grenola']
 45%|████▌     | 4701/10358 [2:34:08<3:36:14,  2.29s/it]
 News|NotifyWhenPortalUpdatesInSection|['portal': 'wp.pl','section': 'gwiazdy']
 46%|████▋     | 4801/10358 [2:36:34<1:20:07,  1.16it/s]
 Phone|CallEmergency|[]
 47%|████▋     | 4901/10358 [2:39:09<1:47:22,  1.18s/it]
 Slack|CheckMessagesFromUser|['sender': 'j.winiewski']
 48%|████▊     | 5001/10358 [2:41:17<1:59:04,  1.33s/it]
 Spotify|AddSongToPlaylistWithName|['playlist':'signed xoxo']
 49%|████▉     | 5101/10358 [2:43:39<46:10,  1.90it/s]
 Spotify|OpenSpotify|[]
 50%|█████     | 5201/10358 [2:46:06<2:41:06,  1.87s/it]
 Spotify|PlaySongByArtist|['artist': 'organek','song':'sarajevo grade moj']
 51%|█████     | 5301/10358 [2:48:52<3:25:30,  2.44s/it]
 Translate|TranslateTextFromLanguage|['src_lang_en': 'angielskiego', 'text_en':'south']
 52%|█████▏    | 5401/10358 [2:53:21<3:33:55,  2.59s/it]
 Translate|TranslateTextFromLanguage|['src_lang_en': 'angielskiego', 'text_en': 'does this bus stop in busan']
 53%|█████▎    | 5501/10358 [2:59:19<5:19:55,  3.95s/it]
 Translate|TranslateTextFromLanguageToLanguage|['src_lang_en': 'angielskiego', 'text_en': 'does the room come with bedsheets', 'trg_lang_pl': 'polski']
 54%|█████▍    | 5601/10358 [3:06:49<6:08:33,  4.65s/it]
 Translate|TranslateTextFromLanguageToLanguageWithEngine|['src_lang_es': 'hiszpaskiego', 'text_es': 'la próxima semana', 'translator':'microsoft', 'trg_lang_it': 'woski']
 55%|█████▌    | 5701/10358 [3:14:42<5:59:08,  4.63s/it]
 Translate|TranslateTextFromLanguageToLanguageWithEngine|['src_lang_en': 'angielskiego', 'text_en': 'november', 'translator': 'google', 'trg_lang_it': 'woski']
 56%|█████▌    | 5801/10358 [3:18:24<2:46:55,  2.20s/it]
 Weather|SunriseInLocation|['location': 'róanka']
 57%|█████▋    | 5901/10358 [3:21:02<2:56:27,  2.38s/it]
 Websearch|SearchImagesWithTextOnEngineWithWidthAndWithHeight|['img_query': 'koptyjski zrozumiao']
 58%|█████▊    | 6001/10358 [3:25:01<3:04:41,  2.54s/it]
 Websearch|SearchImagesWithTextOnEngineWithWidthAndWithHeightAndWithCondition|['img_query': 'kruchuteki ucisk']
 59%|█████▉    | 6101/10358 [3:29:02<2:50:21,  2.40s/it]
 Websearch|SearchImagesWithTextOnEngineWithWidthAndWithHeightAndWithCondition|['img_query': 'kolorowanki wielkanocne']
 60%|█████▉    | 6201/10358 [3:32:31<1:43:03,  1.49s/it]
 Websearch|SearchText|['txt_query': 'trackim tuck tucko']
 61%|██████    | 6301/10358 [3:34:27<1:16:13,  1.13s/it]
 Yelp|OpenRestaurantsInLocation|['location': 'wrzeszcz dolny']
 62%|██████▏   | 6401/10358 [3:37:40<2:21:13,  2.14s/it]
 Yelp|SearchByReviewCountInLocation|['location': 'poplar branch','review_count': '40']
 63%|██████▎   | 6501/10358 [3:40:28<2:10:15,  2.03s/it]
 Youtube|FindQueryOnChannel|['channel_id': 'laughing stock', 'query': 'barier cardi']
 64%|██████▎   | 6601/10358 [3:44:00<2:36:07,  2.49s/it]
 Youtube|FindQueryOnChannel|['channel_id': 'hearthstone legend', 'query': 'kabarety 2019 najnowsze']
 65%|██████▍   | 6701/10358 [3:47:35<2:04:56,  2.05s/it]
 Youtube|FindQueryOnChannel|['channel_id':'resourceniche', 'query': 'polska tahiti']
 66%|██████▌   | 6801/10358 [3:50:27<50:32,  1.17it/s]
 Youtube|NotifyOnNewFromFollowing|[]
 67%|██████▋   | 6901/10358 [3:53:37<4:42:36,  4.91s/it]
 Email|SendEmailToAddressWithSubjectAndWithMessage|['message': 'durante los próximos tres das o tendrás el 50 de descuento que','subject': 'durante los próximos tres das o tendrás el 50 de descuento que', 'to': 'chaya y soh mayer ir']
 68%|██████▊   | 7001/10358 [3:57:03<1:56:39,  2.09s/it]
 Calendar|CheckCalendarOnDate|['date': 'l'ultima matricola']
 69%|██████▊   | 7101/10358 [3:59:17<1:27:44,  1.62s/it]
 Airconditioner|SetTemperatureToValue|['value': '24 grados']
 70%|██████▉   | 7201/10358 [4:02:06<1:11:36,  1.36s/it]
 Calendar|CheckCalendarOnDate|['date': 'el 1 de agosto']
 70%|███████   | 7301/10358 [4:04:14<1:18:49,  1.55s/it]
 Contacts|ShowContactWithEmail|['email': 'luis@gmail.com']
 71%|███████▏  | 7401/10358 [4:07:49<2:17:35,  2.79s/it]
 Contacts|ShowContactWithNumberAndWithEmail|['email': 'rupert.yanni@apx.com', 'phone_number': '447 349 366']
 72%|███████▏  | 7501/10358 [4:11:31<39:56,  1.19it/s]
 Email|SendEmail|[]
 73%|███████▎  | 7601/10358 [4:14:15<1:51:32,  2.43s/it]
 Email|SendEmailToAddressWithSubject|['subject': 'fwd more than their fair share', 'to': 'eugenio']
 74%|███████▍  | 7701/10358 [4:19:24<3:01:40,  4.10s/it]
 Email|SendEmailToAddressWithSubjectAndWithMessage|['message': 'ahora viene mi chico','subject': 'Te gustara vivir la mejor experiencia de tu vida?', 'to': 'kena']
 75%|███████▌  | 7801/10358 [4:26:21<2:56:35,  4.14s/it]
 Email|SendEmailToAddressWithSubjectAndWithMessage|['message': 'que poquito que falta para cataratas','subject':'southwest into california', 'to': 'chuy']
 76%|███████▋  | 7901/10358 [4:33:16<2:56:17,  4.31s/it]
 Email|SendEmailToAddressWithSubjectAndWithMessage|['message': 'rt si te gusta trollear','subject': 'here is the complete email', 'to':'montse mendizabal']
 77%|███████▋  | 8001/10358 [4:40:05<2:35:14,  3.95s/it]
 Email|SendEmailToAddressWithSubjectAndWithMessage|['message': 'lo acabo y me voy a dormir','subject': 'ceo letter', 'to':'salva@hotmail.es']
 78%|███████▊  | 8101/10358 [4:47:06<2:45:26,  4.40s/it]
 Email|SendEmailToAddressWithSubjectAndWithMessage|['message': 'hoy a tomar tere con milaa','subject': 'have played here a few times', 'to': 'jenelle@hotmail.com']
 79%|███████▉  | 8201/10358 [4:53:58<2:35:31,  4.33s/it]
 Email|SendEmailToAddressWithSubjectAndWithMessage|['message': 'cada vez mas viejas lisitas','subject': 'the new dublin campus next year', 'to': 'roselyn@proton.com']
 80%|████████  | 8301/10358 [4:58:21<51:18,  1.50s/it]
 Email|ShowEmailWithSubject|['subject':'summarizing all media coverage']
 81%|████████  | 8401/10358 [5:01:03<1:53:03,  3.47s/it]
 Facebook|PostPictureWithUrlAndWithCaption|['caption': 'le pregunté a mi perro cómo me veo por eso lo amo tanto', 'picture_url': 'bit.ly/NIAGyW3']
 82%|████████▏ | 8501/10358 [5:03:38<37:21,  1.21s/it]
 Facebook|PostAlbumOfTypeByArtist|['album': 'vacaciones en argentina 2019']
 83%|████████▎ | 8601/10358 [5:05:33<42:13,  1.44s/it]
 Instagram|ShowPicturesWithFilter|['filter': 'gingham']
 84%|████████▍ | 8701/10358 [5:07:50<55:57,  2.03s/it]
 News|NotifyWhenPortalUpdatesInSection|['portal': 'la voz de galicia','section': 'deporte']
 85%|████████▍ | 8801/10358 [5:11:03<49:30,  1.91s/it]
 News|ShowNewsFromSection|['portal': 'el pas','section': 'deporte']
 86%|████████▌ | 8901/10358 [5:14:16<51:23,  2.12s/it]
 Slack|SendMessageToChannel|['channel': 'citibank','message': 'excelente fin de semana']
 87%|████████▋ | 9001/10358 [5:16:36<52:36,  2.33s/it]
 Spotify|AddSongWithNameToPlaylistWithName|['playlist':'soulfull disco','song': 'how i see it']
 88%|████████▊ | 9101/10358 [5:18:57<20:56,  1.00it/s]
 Spotify|PlaySong|['song':'silent edge']
 89%|████████▉ | 9201/10358 [5:21:15<40:07,  2.08s/it]
 Spotify|PlaySongByArtist|['artist': 'jonathan larson','song':'song x']
 90%|████████▉ | 9301/10358 [5:24:38<34:06,  1.94s/it]
 Spotify|PlaySongByArtist|['artist': 'hard stance','song': 'on my toes']
 91%|█████████ | 9401/10358 [5:28:07<49:29,  3.10s/it]
 Translate|TranslateTextFromLanguageToLanguage|['src_lang': 'fránces', 'text':'marrón', 'trg_lang': 'espaol']
 92%|█████████▏| 9501/10358 [5:33:07<21:48,  1.53s/it]
 Calendar|CheckCalendarOnDate|['date': 'fase de la luna para croacia']
 93%|█████████▎| 9601/10358 [5:35:48<24:26,  1.94s/it]
 Websearch|SearchImagesWithTextOnEngineWithWidthAndWithHeight|['img_query':'mechas balayage']
 94%|█████████▎| 9701/10358 [5:39:09<21:16,  1.94s/it]
 Websearch|SearchImagesWithTextOnEngineWithWidthAndWithHeight|['img_query': 'la roca village']
 95%|█████████▍| 9801/10358 [5:42:42<20:17,  2.19s/it]
 Websearch|SearchImagesWithTextOnEngineWithWidthAndWithHeightAndWithCondition|['img_query': 'calendario febrero 2020 para imprimir']
 96%|█████████▌| 9901/10358 [5:46:23<15:14,  2.00s/it]
 Websearch|SearchImagesWithTextOnEngineWithWidthAndWithHeightAndWithCondition|['img_query': 'formacion']
 97%|█████████▋| 10001/10358 [5:50:01<12:44,  2.14s/it]
 Websearch|SearchImagesWithTextOnEngineWithWidthAndWithHeightAndWithCondition|['img_query':'media melena']
 98%|█████████▊| 10101/10358 [5:53:41<10:01,  2.34s/it]
 Websearch|SearchImagesWithTextOnEngineWithWidthAndWithHeightAndWithCondition|['img_query':'memes graciosos']
 98%|█████████▊| 10201/10358 [5:57:18<05:42,  2.18s/it]
 Websearch|SearchImagesWithTextOnEngineWithWidthAndWithHeightAndWithCondition|['img_query': 'rita maestre']
 99%|█████████▉| 10301/10358 [6:00:14<01:35,  1.67s/it]
 Yelp|SearchByQueryInLocation|['location': 'aulesti', 'query': 'cerveceras artesanales y pubs']
100%|██████████| 10358/10358 [6:01:25<00:00,  2.09s/it]
# format predictions to target shape
predictions = [prediction.replace('|', '\t', 3).replace('[', '{').replace(']', '}').strip() for prediction in predictions]
with open('/content/cnlps-caiccaic/test-A/out_v2.tsv', 'w', encoding='utf-8') as out:
  for pred in predictions:
    out.write(pred + '\n')
import locale
locale.getpreferredencoding = lambda: "UTF-8"
!cp /content/cnlps-caiccaic/test-A/out_v2.tsv /content/drive/MyDrive/
from google.colab import runtime
runtime.unassign()