diff --git a/cw/15_ekstrakcja_informacji_seq_2_seq.ipynb b/cw/15_ekstrakcja_informacji_seq_2_seq.ipynb new file mode 100644 index 0000000..e76e84d --- /dev/null +++ b/cw/15_ekstrakcja_informacji_seq_2_seq.ipynb @@ -0,0 +1,2318 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "https://arxiv.org/pdf/1910.10683.pdf" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "https://github.com/applicaai/kleister-nda" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from transformers import T5Tokenizer, T5ForConditionalGeneration" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "text = \"translate English to French: My name is Azeem and I live in India\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "text = \"summarize: Machine learning involves computers discovering how they can perform tasks without being explicitly programmed to do so. It involves computers learning from data provided so that they carry out certain tasks. For simple tasks assigned to computers, it is possible to program algorithms telling the machine how to execute all steps required to solve the problem at hand; on the computer's part, no learning is needed. For more advanced tasks, it can be challenging for a human to manually create the needed algorithms. In practice, it can turn out to be more effective to help the machine develop its own algorithm, rather than having human programmers specify every needed step.\"" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "machine learning involves computers learning from data provided so that they carry out certain tasks without being explicitly programme\n" + ] + } + ], + "source": [ + "from transformers import T5Tokenizer, T5ForConditionalGeneration\n", + "\n", + "tokenizer = T5Tokenizer.from_pretrained('t5-small')\n", + "\n", + "model = T5ForConditionalGeneration.from_pretrained('t5-small', return_dict=True,).to('cuda')\n", + "\n", + "\n", + "# You can also use \"translate English to French\" and \"translate English to Romanian\"\n", + "input_ids = tokenizer(text, return_tensors=\"pt\").input_ids.to('cuda') # Batch size 1\n", + "\n", + "outputs = model.generate(input_ids)\n", + "\n", + "decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)\n", + "\n", + "print(decoded)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "T5ForConditionalGeneration(\n", + " (shared): Embedding(32128, 512)\n", + " (encoder): T5Stack(\n", + " (embed_tokens): Embedding(32128, 512)\n", + " (block): ModuleList(\n", + " (0): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " (relative_attention_bias): Embedding(32, 8)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseReluDense(\n", + " (wi): Linear(in_features=512, out_features=2048, bias=False)\n", + " (wo): Linear(in_features=2048, out_features=512, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (1): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseReluDense(\n", + " (wi): Linear(in_features=512, out_features=2048, bias=False)\n", + " (wo): Linear(in_features=2048, out_features=512, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (2): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseReluDense(\n", + " (wi): Linear(in_features=512, out_features=2048, bias=False)\n", + " (wo): Linear(in_features=2048, out_features=512, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (3): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseReluDense(\n", + " (wi): Linear(in_features=512, out_features=2048, bias=False)\n", + " (wo): Linear(in_features=2048, out_features=512, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (4): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseReluDense(\n", + " (wi): Linear(in_features=512, out_features=2048, bias=False)\n", + " (wo): Linear(in_features=2048, out_features=512, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (5): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseReluDense(\n", + " (wi): Linear(in_features=512, out_features=2048, bias=False)\n", + " (wo): Linear(in_features=2048, out_features=512, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " )\n", + " (final_layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (decoder): T5Stack(\n", + " (embed_tokens): Embedding(32128, 512)\n", + " (block): ModuleList(\n", + " (0): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " (relative_attention_bias): Embedding(32, 8)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseReluDense(\n", + " (wi): Linear(in_features=512, out_features=2048, bias=False)\n", + " (wo): Linear(in_features=2048, out_features=512, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (1): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseReluDense(\n", + " (wi): Linear(in_features=512, out_features=2048, bias=False)\n", + " (wo): Linear(in_features=2048, out_features=512, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (2): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseReluDense(\n", + " (wi): Linear(in_features=512, out_features=2048, bias=False)\n", + " (wo): Linear(in_features=2048, out_features=512, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (3): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseReluDense(\n", + " (wi): Linear(in_features=512, out_features=2048, bias=False)\n", + " (wo): Linear(in_features=2048, out_features=512, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (4): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseReluDense(\n", + " (wi): Linear(in_features=512, out_features=2048, bias=False)\n", + " (wo): Linear(in_features=2048, out_features=512, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (5): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseReluDense(\n", + " (wi): Linear(in_features=512, out_features=2048, bias=False)\n", + " (wo): Linear(in_features=2048, out_features=512, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " )\n", + " (final_layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (lm_head): Linear(in_features=512, out_features=32128, bias=False)\n", + ")" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "KLEISTER_PATH = '/media/kuba/ssdsam/Syncthing/Syncthing/przedmioty/2020-02/IE/applica/kleister-nda/'" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "train_exp_f = open(KLEISTER_PATH + 'train/expected.tsv')\n", + "train_exp = []\n", + "for line in train_exp_f:\n", + " line_splitted = line.strip('\\n').split(' ')\n", + " found = False\n", + " for elem in line_splitted:\n", + " if 'jurisdiction=' in elem:\n", + " train_exp.append('jurisdiction: ' + elem.split('=')[1])\n", + " found = True\n", + " break\n", + " if not found:\n", + " train_exp.append('jurisdiction: NONE')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "dev_exp_f = open(KLEISTER_PATH + 'dev-0/expected.tsv')\n", + "dev_exp = []\n", + "for line in dev_exp_f:\n", + " line_splitted = line.strip('\\n').split(' ')\n", + " found = False\n", + " for elem in line_splitted:\n", + " if 'jurisdiction=' in elem:\n", + " dev_exp.append('jurisdiction: ' + elem.split('=')[1])\n", + " found = True\n", + " break\n", + " if not found:\n", + " dev_exp.append('jurisdiction: NONE')" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['jurisdiction: Oregon',\n", + " 'jurisdiction: California',\n", + " 'jurisdiction: Florida',\n", + " 'jurisdiction: Pennsylvania',\n", + " 'jurisdiction: California',\n", + " 'jurisdiction: California',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: Delaware',\n", + " 'jurisdiction: Illinois',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: Delaware',\n", + " 'jurisdiction: Iowa',\n", + " 'jurisdiction: Delaware',\n", + " 'jurisdiction: Indiana',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: Massachusetts',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: Michigan',\n", + " 'jurisdiction: Indiana',\n", + " 'jurisdiction: Colorado',\n", + " 'jurisdiction: Georgia',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: Oregon',\n", + " 'jurisdiction: Pennsylvania',\n", + " 'jurisdiction: Delaware',\n", + " 'jurisdiction: Florida',\n", + " 'jurisdiction: Delaware',\n", + " 'jurisdiction: Illinois',\n", + " 'jurisdiction: Illinois',\n", + " 'jurisdiction: Delaware',\n", + " 'jurisdiction: California',\n", + " 'jurisdiction: Delaware',\n", + " 'jurisdiction: Missouri',\n", + " 'jurisdiction: Oregon',\n", + " 'jurisdiction: Delaware',\n", + " 'jurisdiction: Delaware',\n", + " 'jurisdiction: Connecticut',\n", + " 'jurisdiction: Nevada',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: Illinois',\n", + " 'jurisdiction: Idaho',\n", + " 'jurisdiction: Florida',\n", + " 'jurisdiction: Delaware',\n", + " 'jurisdiction: Delaware',\n", + " 'jurisdiction: Minnesota',\n", + " 'jurisdiction: Virginia',\n", + " 'jurisdiction: California',\n", + " 'jurisdiction: California',\n", + " 'jurisdiction: Nevada',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: Washington',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: Ohio',\n", + " 'jurisdiction: Nevada',\n", + " 'jurisdiction: Georgia',\n", + " 'jurisdiction: Massachusetts',\n", + " 'jurisdiction: Texas',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: Virginia',\n", + " 'jurisdiction: Wisconsin',\n", + " 'jurisdiction: Colorado',\n", + " 'jurisdiction: Oregon',\n", + " 'jurisdiction: Delaware',\n", + " 'jurisdiction: Ohio',\n", + " 'jurisdiction: Missouri',\n", + " 'jurisdiction: South_Dakota',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: Indiana',\n", + " 'jurisdiction: Minnesota',\n", + " 'jurisdiction: Maine',\n", + " 'jurisdiction: Missouri',\n", + " 'jurisdiction: Delaware',\n", + " 'jurisdiction: Illinois',\n", + " 'jurisdiction: Indiana',\n", + " 'jurisdiction: Massachusetts',\n", + " 'jurisdiction: Illinois',\n", + " 'jurisdiction: New_Jersey',\n", + " 'jurisdiction: California',\n", + " 'jurisdiction: California',\n", + " 'jurisdiction: Maine',\n", + " 'jurisdiction: North_Carolina',\n", + " 'jurisdiction: Missouri',\n", + " 'jurisdiction: Georgia',\n", + " 'jurisdiction: Missouri',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: Georgia',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: Kansas',\n", + " 'jurisdiction: California',\n", + " 'jurisdiction: Oregon',\n", + " 'jurisdiction: Delaware',\n", + " 'jurisdiction: Delaware',\n", + " 'jurisdiction: Connecticut',\n", + " 'jurisdiction: Utah',\n", + " 'jurisdiction: Texas',\n", + " 'jurisdiction: Delaware',\n", + " 'jurisdiction: Ohio',\n", + " 'jurisdiction: California',\n", + " 'jurisdiction: California',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: South_Carolina',\n", + " 'jurisdiction: Texas',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: New_Jersey',\n", + " 'jurisdiction: Georgia',\n", + " 'jurisdiction: Massachusetts',\n", + " 'jurisdiction: Texas',\n", + " 'jurisdiction: Delaware',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: Pennsylvania',\n", + " 'jurisdiction: Pennsylvania',\n", + " 'jurisdiction: Massachusetts',\n", + " 'jurisdiction: Delaware',\n", + " 'jurisdiction: Florida',\n", + " 'jurisdiction: California',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: Delaware',\n", + " 'jurisdiction: Oregon',\n", + " 'jurisdiction: North_Carolina',\n", + " 'jurisdiction: Delaware',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: Delaware',\n", + " 'jurisdiction: Delaware',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: Massachusetts',\n", + " 'jurisdiction: Massachusetts',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: Missouri',\n", + " 'jurisdiction: Virginia',\n", + " 'jurisdiction: California',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: California',\n", + " 'jurisdiction: Massachusetts',\n", + " 'jurisdiction: Wisconsin',\n", + " 'jurisdiction: Washington',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: California',\n", + " 'jurisdiction: Illinois',\n", + " 'jurisdiction: Delaware',\n", + " 'jurisdiction: Massachusetts',\n", + " 'jurisdiction: California',\n", + " 'jurisdiction: Ohio',\n", + " 'jurisdiction: Illinois',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: New_Jersey',\n", + " 'jurisdiction: Delaware',\n", + " 'jurisdiction: Massachusetts',\n", + " 'jurisdiction: Massachusetts',\n", + " 'jurisdiction: Utah',\n", + " 'jurisdiction: Washington',\n", + " 'jurisdiction: Texas',\n", + " 'jurisdiction: California',\n", + " 'jurisdiction: California',\n", + " 'jurisdiction: Colorado',\n", + " 'jurisdiction: Delaware',\n", + " 'jurisdiction: Ohio',\n", + " 'jurisdiction: Pennsylvania',\n", + " 'jurisdiction: New_Jersey',\n", + " 'jurisdiction: Virginia',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: Delaware',\n", + " 'jurisdiction: Nevada',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: Texas',\n", + " 'jurisdiction: California',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: California',\n", + " 'jurisdiction: New_Jersey',\n", + " 'jurisdiction: Missouri',\n", + " 'jurisdiction: Illinois',\n", + " 'jurisdiction: Texas',\n", + " 'jurisdiction: New_Jersey',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: Missouri',\n", + " 'jurisdiction: Delaware',\n", + " 'jurisdiction: Nevada',\n", + " 'jurisdiction: Florida',\n", + " 'jurisdiction: Kansas',\n", + " 'jurisdiction: Oregon',\n", + " 'jurisdiction: Delaware',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: Texas',\n", + " 'jurisdiction: New_Jersey',\n", + " 'jurisdiction: Florida',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: Delaware',\n", + " 'jurisdiction: Delaware',\n", + " 'jurisdiction: Oregon',\n", + " 'jurisdiction: Minnesota',\n", + " 'jurisdiction: Texas',\n", + " 'jurisdiction: California',\n", + " 'jurisdiction: Delaware',\n", + " 'jurisdiction: California',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: Delaware',\n", + " 'jurisdiction: Colorado',\n", + " 'jurisdiction: Pennsylvania',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: Indiana',\n", + " 'jurisdiction: Delaware',\n", + " 'jurisdiction: Pennsylvania',\n", + " 'jurisdiction: Massachusetts',\n", + " 'jurisdiction: Massachusetts',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: Ohio',\n", + " 'jurisdiction: Illinois',\n", + " 'jurisdiction: California',\n", + " 'jurisdiction: California',\n", + " 'jurisdiction: California',\n", + " 'jurisdiction: Oregon',\n", + " 'jurisdiction: Texas',\n", + " 'jurisdiction: Texas',\n", + " 'jurisdiction: Michigan',\n", + " 'jurisdiction: Delaware',\n", + " 'jurisdiction: California',\n", + " 'jurisdiction: Florida',\n", + " 'jurisdiction: California',\n", + " 'jurisdiction: Ohio',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: Massachusetts',\n", + " 'jurisdiction: Delaware',\n", + " 'jurisdiction: Georgia',\n", + " 'jurisdiction: Delaware',\n", + " 'jurisdiction: Massachusetts',\n", + " 'jurisdiction: Texas',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: Pennsylvania',\n", + " 'jurisdiction: Michigan',\n", + " 'jurisdiction: Washington',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: Missouri',\n", + " 'jurisdiction: California',\n", + " 'jurisdiction: California',\n", + " 'jurisdiction: California',\n", + " 'jurisdiction: Texas',\n", + " 'jurisdiction: Florida',\n", + " 'jurisdiction: Ohio',\n", + " 'jurisdiction: Delaware',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: Pennsylvania',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: Rhode_Island',\n", + " 'jurisdiction: California',\n", + " 'jurisdiction: Florida',\n", + " 'jurisdiction: New_York',\n", + " 'jurisdiction: Delaware',\n", + " 'jurisdiction: California',\n", + " 'jurisdiction: Delaware']" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_exp" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "train_in_f = open(KLEISTER_PATH + 'train/in.tsv')\n", + "train_in = []\n", + "for line in train_in_f:\n", + " line = line.rstrip('\\n')\n", + " train_in.append(line)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "dev_in_f = open(KLEISTER_PATH + 'dev-0/in.tsv')\n", + "dev_in = []\n", + "for line in dev_in_f:\n", + " line = line.rstrip('\\n')\n", + " dev_in.append(line)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'00a1d238e37ac225b8045a97953e845d.pdf\\teffective_date jurisdiction party term\\tEX-10.23 5 dex1023.htm COVENANT NOT TO COMPETE AND NON-DISCLOSURE AGREEMENT\\\\nExhibit 10.23\\\\nCOVENANT NOT TO COMPETE\\\\nAND NON-DISCLOSURE AGREEMENT\\\\nPARTIES:\\\\nEric Dean Sprunk (“EMPLOYEE”)\\\\nand\\\\nNIKE, Inc., divisions, subsidiaries\\\\nand affiliates. (“NIKE”):\\\\nRECITALS:\\\\nA. This Covenant Not to Compete and Non-Disclosure Agreement is executed upon initial employment or upon the EMPLOYEE’s\\\\nadvancement with NIKE and is a condition of such employment or advancement.\\\\nB. Over the course of EMPLOYEE’s employment with NIKE, EMPLOYEE will be or has been exposed to and/or is in a position to\\\\ndevelop confidential information peculiar to NIKE’s business and not generally known to the public as defined below (“Protected Information”). It is\\\\nanticipated that EMPLOYEE will continue to be exposed to Protected Information of greater sensitivity as EMPLOYEE advances in the company.\\\\nC. The nature of NIKE’s business is highly competitive and disclosure of any Protected Information would result in severe damage to NIKE\\\\nand be difficult to measure.\\\\nD. NIKE makes use of its Protected Information throughout the world. Protected Information of NIKE can be used to NIKE’s detriment\\\\nanywhere in the world.\\\\nAGREEMENT:\\\\nIn consideration of the foregoing, and the terms and conditions set forth below, the parties agree as follows:\\\\n1. Covenant Not to Compete.\\\\n(a) Competition Restriction. During EMPLOYEE’s employment by NIKE, under the terms of any employment contract or\\\\notherwise, and for one year thereafter, (the “Restriction Period”), EMPLOYEE will not directly or indirectly, own, manage, control, or participate in\\\\nthe ownership,\\\\nmanagement or control of, or be employed by, consult for, or be connected in any manner with, any business engaged anywhere in the world in the\\\\nathletic footwear, athletic apparel or sports equipment and accessories business, or any other business which directly competes with NIKE or any of\\\\nits parent, subsidiaries or affiliated corporations ( “Competitor”). By way of illustration only, examples of NIKE competitors include, but are not\\\\nlimited to: Adidas, FILA, Reebok, Puma, Champion, Oakley, DKNY, Converse, Asics, Saucony, New Balance, Ralph Lauren/Polo Sport, B.U.M,\\\\nFUBU, The Gap, Tommy Hilfiger, Umbro, Northface, Venator (Foot lockers), Sports Authority, Columbia Sportswear, Wilson, Mizuno, Callaway\\\\nGolf and Titleist. This provision is subject to NIKE’s option to waive all or any portion of the Restriction Period as more specifically provided\\\\nbelow.\\\\n(b) Extension of Time. In the event EMPLOYEE breaches this covenant not to compete, the Restriction Period shall automatically\\\\ntoll from the date of the first breach, and all subsequent breaches, until the resolution of the breach through private settlement, judicial or other\\\\naction, including all appeals. The Restriction Period shall continue upon the effective date of any such settlement judicial or other resolution. NIKE\\\\nshall not be obligated to pay EMPLOYEE the additional compensation described in paragraph 1(d) below during any period of time in which this\\\\nAgreement is tolled due to EMPLOYEE’s breach. In the event EMPLOYEE receives such additional compensation after any such breach,\\\\nEMPLOYEE must immediately reimburse NIKE in the amount of all such compensation upon the receipt of a written request by NIKE.\\\\n(c) Waiver of Non-Compete. NIKE has the option, in its sole discretion, to elect to waive all or a portion of the Restriction Period or\\\\nto limit the definition of Competitor, by giving EMPLOYEE seven (7) days prior notice of such election. In the event all or a portion of the\\\\nRestriction Period is waived, NIKE shall not be obligated to pay EMPLOYEE for any period of time as to which the covenant not to compete has\\\\nbeen waived.\\\\n(d) Additional Consideration. As additional consideration for the covenant not to compete described above, should NIKE terminate\\\\nEMPLOYEE’s employment and elect to enforce the non-competition agreement, NIKE shall pay EMPLOYEE a monthly payment equal to one\\\\nhundred percent (100%) of EMPLOYEE’s last monthly base salary while the Restriction Period is in effect. If EMPLOYEE voluntarily terminates\\\\nemployment and NIKE elects to enforce the non-competition agreement, NIKE shall pay EMPLOYEE a monthly severance payment equal to fifty\\\\npercent (50%) of EMPLOYEE’s last monthly base salary while the Restriction Period is in effect. The first payment to EMPLOYEE of additional\\\\nconsideration shall follow on the next applicable pay period after the election to enforce the non-competition agreement, payable in accordance with\\\\nNIKE’s payroll practices.\\\\n2. Subsequent Employer. EMPLOYEE agrees to notify NIKE at the time of separation of employment of the name of EMPLOYEE’s new\\\\nemployer, if known. EMPLOYEE further agrees to disclose to NIKE the name of any subsequent employer during the Restriction Period, wherever\\\\nlocated and regardless of whether such employer is a competitor of NIKE.\\\\n3. Non-Disclosure Agreement.\\\\n(a) Protected Information Defined. “Protected Information” shall mean all proprietary information, in whatever form and format, of\\\\nNIKE and all information provided to NIKE by third parties which NIKE is obligated to keep confidential. EMPLOYEE agrees that any and all\\\\ninformation to which EMPLOYEE has access concerning NIKE projects and internal NIKE information is Protected Information, whether in verbal\\\\nform, machine-readable form, written or other tangible form, and whether designated as confidential or unmarked. Without limiting the foregoing,\\\\nProtected Information includes information relating to NIKE’s research and development activities, its intellectual property and the filing or\\\\npendency of patent applications, confidential techniques, methods, styles, designs, design concepts and ideas, customer and vendor lists, contract\\\\nfactory lists, pricing information, manufacturing plans, business and marketing plans, sales information, methods of operation, manufacturing\\\\nprocesses and methods, products, and personnel information.\\\\n(b) Excluded Information. Notwithstanding paragraph 3(a), Protected Information excludes any information that is or becomes part\\\\nof the public domain through no act or failure to act on the part of EMPLOYEE. Specifically, employees shall be permitted to retain as part of their\\\\npersonal portfolio copies of the employees’ original artwork and designs, provided the artwork or designs have become part of the public domain. In\\\\nany dispute between the parties with respect to this exclusion, the burden of proof will be on EMPLOYEE and such proof will be by clear and\\\\nconvincing evidence.\\\\n(c) Employee’s Obligations. During the period of employment by NIKE and for a period of two (2) years thereafter, EMPLOYEE\\\\nwill hold in confidence and protect all Protected Information and will not, at any time, directly or indirectly, use any Protected Information for any\\\\npurpose outside the scope of EMPLOYEE’s employment with NIKE or disclose any Protected Information to any third person or organization\\\\nwithout the prior written consent of NIKE. Specifically, but not by way of limitation, EMPLOYEE will not ever copy, transmit, reproduce,\\\\nsummarize, quote, publish or make any commercial or other use whatsoever of any Protected Information without the prior written consent of NIKE.\\\\nEMPLOYEE will also take reasonable security precautions and such other actions as may be necessary to insure that there is no use or disclosure,\\\\nintentional or inadvertent, of Protected Information in violation of this Agreement.\\\\n4. Return of Protected Information. At the request of NIKE at anytime, and in any event, upon termination of employment, EMPLOYEE\\\\nshall immediately return to NIKE all confidential documents, including tapes, notebooks, drawings, computer disks and other similar repositories of\\\\nor containing Protected Information, and all copies thereof, then in EMPLOYEE’s possession or under EMPLOYEE’s control.\\\\n5. Unauthorized Use. During the period of employment with NIKE and thereafter, EMPLOYEE will notify NIKE immediately if\\\\nEMPLOYEE becomes aware of the unauthorized possession, use or knowledge of any Protected Information by any person employed or not\\\\nemployed by NIKE at the time of such possession, use or knowledge. EMPLOYEE will cooperate with NIKE in the investigation of any such\\\\nincident and will cooperate with NIKE in any litigation with third parties deemed necessary by NIKE to protect the Protected Information. NIKE\\\\nshall provide reasonable reimbursement to EMPLOYEE for each hour so engaged and that amount shall not be diminished by operation of any\\\\npayment under Paragraph 1(d) of this Agreement.\\\\n6. Non-Recruitment. During the term of this Agreement and for a period of one (1) year thereafter, EMPLOYEE will not directly or\\\\nindirectly , solicit, divert or hire away (or attempt to solicit, divert or hire away) to or for himself or any other company or business organization, any\\\\nNIKE employee, whether or not such employee is a full-time employee or temporary employee and whether or not such employment is pursuant to a\\\\nwritten agreement or is at will.\\\\n7. Accounting of Profits. EMPLOYEE agrees that, if EMPLOYEE should violate any term of this Agreement, NIKE shall be entitled to an\\\\naccounting and repayment of all profits, compensation, commissions, remuneration or benefits which EMPLOYEE directly or indirectly has realized\\\\nand/or may realize as a result of or in connection with any such violation (including the return of any additional consideration paid by NIKE\\\\npursuant to Paragraph 1(d) above). Such remedy shall be in addition to and not in limitation of any injunctive relief or other rights or remedies to\\\\nwhich NIKE may be entitled at law or in equity.\\\\n8. General Provisions.\\\\n(a) Survival. This Agreement shall continue in effect after the termination of EMPLOYEE’s employment, regardless of the reason for\\\\ntermination.\\\\n(b) Waiver. No waiver, amendment, modification or cancellation of any term or condition of this Agreement will be effective unless\\\\nexecuted in writing by both parties. No written waiver will excuse the performance of any act other than the act or acts specifically referred to\\\\ntherein.\\\\n(c) Severability. Each provision herein will be treated as a separate and independent clause and unenforceability of any one clause\\\\nwill in no way impact the enforceability of any other clause. Should any of the provisions in this Agreement be found to be unreasonable or invalid\\\\nby a court of competent jurisdiction, such provision will be enforceable to the maximum extent enforceable by the law of that jurisdiction.\\\\n(d) Applicable Law/Jurisdiction. This Agreement, and EMPLOYEE’s employment hereunder, shall be construed according to the\\\\nlaws of the State of Oregon. EMPLOYEE further hereby submits to the jurisdiction of, and agrees that exclusive jurisdiction over and venue for any\\\\naction or proceeding arising out of or relating to this Agreement shall lie in the state and federal courts located in Oregon.\\\\nEMPLOYEE\\\\nNIKE, Inc.\\\\n/s/ Eric Dean Sprunk\\\\nBy\\\\n/s/ Jeffrey M. Cava\\\\nDATE 04/18/01\\\\nName:\\\\nTitle:\\\\nJeffrey M. Cava\\\\nVice President, Global Human Resources\\tEX-10.23 5 dex1023.htm COVENANT NOT TO COMPETE AND NON-DISCLOSURE AGREEMENT\\\\nExhibit 10.23\\\\nCOVENANT NOT TO COMPETE\\\\nAND NON-DISCLOSURE AGREEMENT\\\\nPARTIES:\\\\nEric Dean Sprunk (“EMPLOYEE”)\\\\n \\\\nand\\\\nNIKE, Inc., divisions, subsidiaries\\\\nand affiliates. (“NIKE”):\\\\nRECITALS:\\\\nA. This Covenant Not to Compete and Non-Disclosure Agreement is executed upon initial employment or upon the EMPLOYEE’s\\\\nadvancement with NIKE and is a condition of such employment or advancement.\\\\nB. Over the course of EMPLOYEE’s employment with NIKE, EMPLOYEE will be or has been exposed to and/or is in a position to\\\\ndevelop confidential information peculiar to NIKE’s business and not generally known to the public as defined below (“Protected Information”). It is\\\\nanticipated that EMPLOYEE will continue to be exposed to Protected Information of greater sensitivity as EMPLOYEE advances in the company.\\\\nC. The nature of NIKE’s business is highly competitive and disclosure of any Protected Information would result in severe damage to NIKE\\\\nand be difficult to measure.\\\\nD. NIKE makes use of its Protected Information throughout the world. Protected Information of NIKE can be used to NIKE’s detriment\\\\nanywhere in the world.\\\\nAGREEMENT:\\\\nIn consideration of the foregoing, and the terms and conditions set forth below, the parties agree as follows:\\\\n1. Covenant Not to Compete.\\\\n(@) Competition Restriction. During EMPLOYEE’s employment by NIKE, under the terms of any employment contract or\\\\notherwise, and for one year thereafter, (the “Restriction Period”), EMPLOYEE will not directly or indirectly, own, manage, control, or participate in\\\\nthe ownership,\\\\nmanagement or control of, or be employed by, consult for, or be connected in any manner with, any business engaged anywhere in the world in the\\\\nathletic footwear, athletic apparel or sports equipment and accessories business, or any other business which directly competes with NIKE or any of\\\\nits parent, subsidiaries or affiliated corporations ( “Competitor”). By way of illustration only, examples of NIKE competitors include, but are not\\\\nlimited to: Adidas, FILA, Reebok, Puma, Champion, Oakley, DKNY, Converse, Asics, Saucony, New Balance, Ralph Lauren/Polo Sport, B.U.M,\\\\nFUBU, The Gap, Tommy Hilfiger, Umbro, Northface, Venator (Foot lockers), Sports Authority, Columbia Sportswear, Wilson, Mizuno, Callaway\\\\nGolf and Titleist. This provision is subject to NIKE’s option to waive all or any portion of the Restriction Period as more specifically provided\\\\nbelow.\\\\n(b) Extension of Time. In the event EMPLOYEE breaches this covenant not to compete, the Restriction Period shall automatically\\\\ntoll from the date of the first breach, and all subsequent breaches, until the resolution of the breach through private settlement, judicial or other\\\\naction, including all appeals. The Restriction Period shall continue upon the effective date of any such settlement judicial or other resolution. NIKE\\\\nshall not be obligated to pay EMPLOYEE the additional compensation described in paragraph 1(d) below during any period of time in which this\\\\nAgreement is tolled due to EMPLOYEE’s breach. In the event EMPLOYEE receives such additional compensation after any such breach,\\\\nEMPLOYEE must immediately reimburse NIKE in the amount of all such compensation upon the receipt of a written request by NIKE.\\\\n(c) Waiver of Non-Compete. NIKE has the option, in its sole discretion, to elect to waive all or a portion of the Restriction Period or\\\\nto limit the definition of Competitor, by giving EMPLOYEE seven (7) days prior notice of such election. In the event all or a portion of the\\\\nRestriction Period is waived, NIKE shall not be obligated to pay EMPLOYEE for any period of time as to which the covenant not to compete has\\\\nbeen waived.\\\\n(d) Additional Consideration. As additional consideration for the covenant not to compete described above, should NIKE terminate\\\\nEMPLOYEE’s employment and elect to enforce the non-competition agreement, NIKE shall pay EMPLOYEE a monthly payment equal to one\\\\nhundred percent (100%) of EMPLOY EE’s last monthly base salary while the Restriction Period is in effect. If EMPLOYEE voluntarily terminates\\\\nemployment and NIKE elects to enforce the non-competition agreement, NIKE shall pay EMPLOYEE a monthly severance payment equal to fifty\\\\npercent (50%) of EMPLOYEE’s last monthly base salary while the Restriction Period is in effect. The first payment to EMPLOYEE of additional\\\\nconsideration shall follow on the next applicable pay period after the election to enforce the non-competition agreement, payable in accordance with\\\\nNIKE\\'’s payroll practices.\\\\n2. Subsequent Employer. EMPLOYEE agrees to notify NIKE at the time of separation of employment of the name of EMPLOYEE’s new\\\\nemployer, if known. EMPLOYEE further agrees to disclose to NIKE the name of any subsequent employer during the Restriction Period, wherever\\\\nlocated and regardless of whether such employer is a competitor of NIKE.\\\\n3. Non-Disclosure Agreement.\\\\n(@) Protected Information Defined. “Protected Information” shall mean all proprietary information, in whatever form and format, of\\\\nNIKE and all information provided to NIKE by third parties which NIKE is obligated to keep confidential. EMPLOYEE agrees that any and all\\\\ninformation to which EMPLOYEE has access concerning NIKE projects and internal NIKE information is Protected Information, whether in verbal\\\\nform, machine-readable form, written or other tangible form, and whether designated as confidential or unmarked. Without limiting the foregoing,\\\\nProtected Information includes information relating to NIKE’s research and development activities, its intellectual property and the filing or\\\\npendency of patent applications, confidential techniques, methods, styles, designs, design concepts and ideas, customer and vendor lists, contract\\\\nfactory lists, pricing information, manufacturing plans, business and marketing plans, sales information, methods of operation, manufacturing\\\\nprocesses and methods, products, and personnel information.\\\\n(b) Excluded Information. Notwithstanding paragraph 3(a), Protected Information excludes any information that is or becomes part\\\\nof the public domain through no act or failure to act on the part of EMPLOYEE. Specifically, employees shall be permitted to retain as part of their\\\\npersonal portfolio copies of the employees’ original artwork and designs, provided the artwork or designs have become part of the public domain. In\\\\nany dispute between the parties with respect to this exclusion, the burden of proof will be on EMPLOYEE and such proof will be by clear and\\\\nconvincing evidence.\\\\n(c) Employee’s Obligations. During the period of employment by NIKE and for a period of two (2) years thereafter, EMPLOYEE\\\\nwill hold in confidence and protect all Protected Information and will not, at any time, directly or indirectly, use any Protected Information for any\\\\npurpose outside the scope of EMPLOYEE’s employment with NIKE or disclose any Protected Information to any third person or organization\\\\nwithout the prior written consent of NIKE. Specifically, but not by way of limitation, EMPLOYEE will not ever copy, transmit, reproduce,\\\\nsummarize, quote, publish or make any commercial or other use whatsoever of any Protected Information without the prior written consent of NIKE.\\\\nEMPLOYEE will also take reasonable security precautions and such other actions as may be necessary to insure that there is no use or disclosure,\\\\nintentional or inadvertent, of Protected Information in violation of this Agreement.\\\\n \\\\n4. Return of Protected Information. At the request of NIKE at anytime, and in any event, upon termination of employment, EMPLOYEE\\\\nshall immediately return to NIKE all confidential documents, including tapes, notebooks, drawings, computer disks and other similar repositories of\\\\nor containing Protected Information, and all copies thereof, then in EMPLOYEE’s possession or under EMPLOYEE’s control.\\\\n5. Unauthorized Use. During the period of employment with NIKE and thereafter, EMPLOYEE will notify NIKE immediately if\\\\nEMPLOYEE becomes aware of the unauthorized possession, use or knowledge of any Protected Information by any person employed or not\\\\nemployed by NIKE at the time of such possession, use or knowledge. EMPLOYEE will cooperate with NIKE in the investigation of any such\\\\nincident and will cooperate with NIKE in any litigation with third parties deemed necessary by NIKE to protect the Protected Information. NIKE\\\\nshall provide reasonable reimbursement to EMPLOYEE for each hour so engaged and that amount shall not be diminished by operation of any\\\\npayment under Paragraph 1(d) of this Agreement.\\\\n6. Non-Recruitment. During the term of this Agreement and for a period of one (1) year thereafter, EMPLOYEE will not directly or\\\\nindirectly , solicit, divert or hire away (or attempt to solicit, divert or hire away) to or for himself or any other company or business organization, any\\\\nNIKE employee, whether or not such employee is a full-time employee or temporary employee and whether or not such employment is pursuant to a\\\\nwritten agreement or is at will.\\\\n7. Accounting of Profits. EMPLOYEE agrees that, if EMPLOYEE should violate any term of this Agreement, NIKE shall be entitled to an\\\\naccounting and repayment of all profits, compensation, commissions, remuneration or benefits which EMPLOYEE directly or indirectly has realized\\\\nand/or may realize as a result of or in connection with any such violation (including the return of any additional consideration paid by NIKE\\\\npursuant to Paragraph 1(d) above). Such remedy shall be in addition to and not in limitation of any injunctive relief or other rights or remedies to\\\\nwhich NIKE may be entitled at law or in equity.\\\\n8. General Provisions.\\\\n(@) Survival. This Agreement shall continue in effect after the termination of EMPLOYEE’s employment, regardless of the reason for\\\\ntermination.\\\\n(b) Waiver. No waiver, amendment, modification or cancellation of any term or condition of this Agreement will be effective unless\\\\nexecuted in writing by both parties. No written waiver will excuse the performance of any act other than the act or acts specifically referred to\\\\ntherein.\\\\n(c) Severability. Each provision herein will be treated as a separate and independent clause and unenforceability of any one clause\\\\nwill in no way impact the enforceability of any other clause. Should any of the provisions in this Agreement be found to be unreasonable or invalid\\\\nby a court of competent jurisdiction, such provision will be enforceable to the maximum extent enforceable by the law of that jurisdiction.\\\\n(d) Applicable Law/Jurisdiction. This Agreement, and EMPLOYEE’s employment hereunder, shall be construed according to the\\\\nlaws of the State of Oregon. EMPLOYEE further hereby submits to the jurisdiction of, and agrees that exclusive jurisdiction over and venue for any\\\\naction or proceeding arising out of or relating to this Agreement shall lie in the state and federal courts located in Oregon.\\\\nEMPLOYEE NIKE, Inc.\\\\n/s/ Eric Dean Sprunk By /s/ Jeffrey M. Cava\\\\nName: Jeffrey M. Cava\\\\nDATE 04/18/01 Title: Vice President, Global Human Resources\\tEX-10.23 5 dlex1023.htm COVENANT NOT TO COMPETE AND NON-DISCLOSURE AGREEMENT\\\\nExhibit 10.23\\\\nCOVENANT NOT TO COMPETE\\\\nAND NON-DISCLOSURE AGREEMENT\\\\nPARTIES:\\\\nEric Dean Sprunk (\"EMPLOYEE\")\\\\nand\\\\nNIKE, Inc., divisions, subsidiaries\\\\nand affiliates. (\"NIKE\"):\\\\nRECITALS:\\\\nA. This Covenant Not to Compete and Non-Disclosure Agreement is executed upon initial employment or upon the EMPLOYEE\\'S\\\\nadvancement with NIKE and is a condition of such employment or advancement.\\\\nB. Over the course of EMPLOYEE\\'S employment with NIKE, EMPLOYEE will be or has been exposed to and/or is in a position to\\\\ndevelop confidential information peculiar to NIKE\\'s business and not generally known to the public as defined below (\"Protected Information\").\\\\nIt\\\\nis\\\\nanticipated that EMPLOYEE will continue to be exposed to Protected Information of greater sensitivity as EMPLOYEE advances in the company.\\\\nC.\\\\nThe nature of NIKE\\'s business is highly competitive and disclosure of any Protected Information would result in severe damage to NIKE\\\\nand be difficult to measure.\\\\nD. NIKE makes use of its Protected Information throughout the world. Protected Information of NIKE can be used to NIKE\\'s detriment\\\\nanywhere in the world.\\\\nAGREEMENT:\\\\nIn consideration of the foregoing, and the terms and conditions set forth below, the parties agree as follows:\\\\n1. Covenant Not to Compete.\\\\n(a) Competition Restriction During EMPLOYEE\\'S employment by NIKE, under the terms of any employment contract\\\\nor\\\\notherwise, and for one year thereafter, (the \"Restriction Period\"), EMPLOYEE will not directly or indirectly, own, manage, control, or participate in\\\\nthe ownership,\\\\nmanagement or control of, or be employed by, consult for, or be connected in any manner with, any business engaged anywhere in the world in the\\\\nathletic footwear, athletic apparel or sports equipment and accessories business, or any other business which directly competes with NIKE or any\\\\nof\\\\nits parent, subsidiaries or affiliated corporations C \"Competitor\"). By way of illustration only, examples of NIKE competitors include, but are not\\\\nlimited to: Adidas, FILA, Reebok, Puma, Champion, Oakley, DKNY, Converse, Asics, Saucony, New Balance, Ralph Lauren/Polo Sport, B.U.M,\\\\nFUBU, The Gap, Tommy Hilfiger, Umbro, Northface, Venator (Foot lockers), Sports Authority, Columbia Sportswear, Wilson, Mizuno, Callaway\\\\nGolf and Titleist. This provision is subject to NIKE\\'s option to waive all or any portion of the Restriction Period as more specifically provided\\\\nbelow.\\\\n(b)\\\\nExtension of Time. In the event EMPLOYEE breaches this covenant not to compete, the Restriction Period shall automatically\\\\ntoll from the date of the first breach, and all subsequent breaches, until the resolution of the breach through private settlement, judicial or other\\\\naction, including all appeals. The Restriction Period shall continue upon the effective date of any such settlement judicial or other resolution. NIKE\\\\nshall not be obligated to pay EMPLOYEE the additional compensation described in paragraph 1(d) below during any period of time in which this\\\\nAgreement is tolled due to EMPLOYEE\\'S breach. In the event EMPLOYEE receives such additional compensation after any such breach,\\\\nEMPLOYEE must immediately reimburse NIKE in the amount of all such compensation upon the receipt of a written request by NIKE.\\\\n(c) Waiver of Non-Compete. NIKE has the option, in its sole discretion, to elect to waive all or a portion of the Restriction Period or\\\\nto limit the definition of Competitor, by giving EMPLOYEE seven (7) days prior notice of such election. In the event all or a portion of the\\\\nRestriction Period is waived, NIKE shall not be obligated to pay EMPLOYEE for any period of time as to which the covenant not to compete has\\\\nbeen waived.\\\\n(d) Additional Consideration. As additional consideration for the covenant not to compete described above, should NIKE terminate\\\\nEMPLOYEE\\'s employment and elect to enforce the non-competition agreement, NIKE shall pay EMPLOYEE a monthly payment equal to one\\\\nhundred percent (100%) of EMPLOYEE\\'S last monthly base salary while the Restriction Period is in effect. If EMPLOYEE voluntarily terminates\\\\nemployment and NIKE elects to enforce the non-competition agreement, NIKE shall pay EMPLOYEE a monthly severance payment equal to fifty\\\\npercent (50%) of EMPLOYEE\\'S last monthly base salary while the Restriction Period is in effect. The first payment to EMPLOYEE of additional\\\\nconsideration shall follow on the next applicable pay period after the election to enforce the non-competition agreement, payable in accordance with\\\\nNIKE\\'s payroll practices.\\\\n2. Subsequent Employer. EMPLOYEE agrees to notify NIKE at the time of separation of employment of the name of EMPLOYEE\\'S new\\\\nemployer, if known. EMPLOYEE further agrees to disclose to NIKE the name of any subsequent employer during the Restriction Period, wherever\\\\nlocated and regardless of whether such employer is a competitor of NIKE.\\\\n3.\\\\nNon-Disclosure Agreement.\\\\n(a) Protected Information Defined. \"Protected Information\" shall mean all proprietary information, in whatever form and format, of\\\\nNIKE and all information provided to NIKE by third parties which NIKE is obligated to keep confidential. EMPLOYEE agrees that any and all\\\\ninformation to which EMPLOYEE has access concerning NIKE projects and internal NIKE information is Protected Information, whether in verbal\\\\nform, machine-readable form, written or other tangible form, and whether designated as confidential or unmarked. Without limiting the foregoing,\\\\nProtected Information includes information relating to NIKE\\'s research and development activities, its intellectual property and the filing or\\\\npendency of patent applications, confidential techniques, methods, styles, designs, design concepts and ideas, customer and vendor lists, contract\\\\nfactory lists, pricing information, manufacturing plans, business and marketing plans, sales information methods of operation, manufacturing\\\\nprocesses and methods, products, and personnel information.\\\\n(b) Excluded Information. Notwithstanding paragraph 3(a), Protected Information excludes any information that is or becomes part\\\\nof the public domain through no act or failure to act on the part of EMPLOYEE. Specifically, employees shall be permitted to retain as part of their\\\\npersonal portfolio copies of the employees\\' original artwork and designs, provided the artwork or designs have become part of the public domain. In\\\\nany dispute between the parties with respect to this exclusion, the burden of proof will be on EMPLOYEE and such proof will be by clear and\\\\nconvincing evidence.\\\\n(c)\\\\nEmployee\\'s Obligations. During the period of employment by NIKE and for a period of two (2) years thereafter, EMPLOYEE\\\\nwill hold in confidence and protect all Protected Information and will not, at any time, directly or indirectly, use any Protected Information for any\\\\npurpose outside the scope of EMPLOYEE\\'S employment with NIKE or disclose any Protected Information to any third person or organization\\\\nwithout the prior written consent of NIKE. Specifically, but not by way of limitation, EMPLOYEE will not ever copy, transmit, reproduce,\\\\nsummarize, quote, publish or make any commercial or other use whatsoever of any Protected Information without the prior written consent of NIKE.\\\\nEMPLOYEE will also take reasonable security precautions and such other actions as may be necessary to insure that there is no use or disclosure,\\\\nintentional or inadvertent, of Protected Information in violation of this Agreement.\\\\n4. Return of Protected Information. At the request of NIKE at anytime, and in any event, upon termination of employment, EMPLOYEE\\\\nshall immediately return to NIKE all confidential documents, including tapes, notebooks, drawings, computer disks and other similar repositories of\\\\nor\\\\ncontaining Protected Information, and all copies thereof, then in EMPLOYEE\\'S possession or under EMPLOYEE\\'S control.\\\\n5.\\\\nUnauthorized Use. During the period of employment with NIKE and thereafter, EMPLOYEE wil notify NIKE immediately\\\\nif\\\\nEMPLOYEE becomes aware of the unauthorized possession, use or knowledge of any Protected Information by any person employed or not\\\\nemployed by NIKE at the time of such possession, use or knowledge. EMPLOYEE will cooperate with NIKE in the investigation of any such\\\\nincident and will cooperate with NIKE in any litigation with third parties deemed necessary by NIKE to protect the Protected Information. NIKE\\\\nshall provide reasonable reimbursement to EMPLOYEE for each hour so engaged and that amount shall not be diminished by operation of any\\\\npayment under Paragraph 1(d) of this Agreement.\\\\n6. Non-Recruitment. During the term of this Agreement and for a period of one (1) year thereafter, EMPLOYEE will not directly or\\\\nindirectly solicit, divert or hire away (or attempt to solicit, divert or hire away) to or for himself or any other company or business organization, any\\\\nNIKE employee, whether or not such employee is a full-time employee or temporary employee and whether or not such employment is pursuant to a\\\\nwritten agreement or is at will.\\\\n7. Accounting of Profits. EMPLOYEE agrees that, if EMPLOYEE should violate any term of this Agreement, NIKE shall be entitled to an\\\\naccounting and repayment of all profits, compensation, commissions, remuneration or benefits which EMPLOYEE directly or indirectly has realized\\\\nand/or may realize as a result of or in connection with any such violation (including the return of any additional consideration paid by NIKE\\\\npursuant to Paragraph 1(d) above). Such remedy shall be in addition to and not in limitation of any injunctive relief or other rights or remedies\\\\nto\\\\nwhich NIKE may be entitled at law or in equity.\\\\n8.\\\\nGeneral Provisions.\\\\n(a)\\\\nSurvival. This Agreement shall continue in effect after the termination of EMPLOYEE\\'S employment, regardless of the reason for\\\\ntermination.\\\\n(b) Waiver. No waiver, amendment, modification or cancellation of any term or condition of this Agreement will be effective unless\\\\nexecuted in writing by both parties. No written waiver will excuse the performance of any act other than the act or acts specifically referred to\\\\ntherein.\\\\n(c) Severability.. Each provision herein will be treated as a separate and independent clause and unenforceability of any one clause\\\\nwill in no way impact the enforceability of any other clause. Should any of the provisions in this Agreement be found to be unreasonable or invalid\\\\nby a court of competent jurisdiction, such provision will be enforceable to the maximum extent enforceable by the law of that jurisdiction.\\\\n(d) Applicable Law/Jurisdiction. This Agreement, and EMPLOYEE\\'S employment hereunder, shall be construed according to the\\\\nlaws\\\\nof\\\\nthe State of Oregon. EMPLOYEE further hereby submits to the jurisdiction of, and agrees that exclusive jurisdiction over and venue for any\\\\naction or proceeding arising out of or relating to this Agreement shall lie in the state and federal courts located in Oregon.\\\\nEMPLOYEE\\\\nNIKE, Inc.\\\\n/s/ Eric Dean Sprunk\\\\nBy\\\\n/s/ Jeffrey M. Cava\\\\nName: Jeffrey M. Cava\\\\nDATE 04/18/01\\\\nTitle: Vice President, Global Human Resources\\tEX-10.23 5 dex1023.htm COVENANT NOT TO COMPETE AND NON-DISCLOSURE AGREEMENT\\\\nExhibit 10.23\\\\nCOVENANT NOT TO COMPETE\\\\nAND NON-DISCLOSURE AGREEMENT\\\\nPARTIES:\\\\nEric Dean Sprunk (“EMPLOYEE”)\\\\nand\\\\nNIKE, Inc., divisions, subsidiaries\\\\nand affiliates. (“NIKE”):\\\\nRECITALS:\\\\nA. This Covenant Not to Compete and Non-Disclosure Agreement is executed upon initial employment or upon the EMPLOYEE’s\\\\nadvancement with NIKE and is a condition of such employment or advancement.\\\\nB. Over the course of EMPLOYEE’s employment with NIKE, EMPLOYEE will be or has been exposed to and/or is in a position to\\\\ndevelop confidential information peculiar to NIKE’s business and not generally known to the public as defined below (“Protected Information”). It is\\\\nanticipated that EMPLOYEE will continue to be exposed to Protected Information of greater sensitivity as EMPLOYEE advances in the company.\\\\nC. The nature of NIKE’s business is highly competitive and disclosure of any Protected Information would result in severe damage to NIKE\\\\nand be difficult to measure.\\\\nD. NIKE makes use of its Protected Information throughout the world. Protected Information of NIKE can be used to NIKE’s detriment\\\\nanywhere in the world.\\\\nAGREEMENT:\\\\nIn consideration of the foregoing, and the terms and conditions set forth below, the parties agree as follows:\\\\n1. Covenant Not to Compete.\\\\n(a) Competition Restriction. During EMPLOYEE’s employment by NIKE, under the terms of any employment contract or\\\\notherwise, and for one year thereafter, (the “Restriction Period”), EMPLOYEE will not directly or indirectly, own, manage, control, or participate in\\\\nthe ownership,\\\\nmanagement or control of, or be employed by, consult for, or be connected in any manner with, any business engaged anywhere in the world in the\\\\nathletic footwear, athletic apparel or sports equipment and accessories business, or any other business which directly competes with NIKE or any of\\\\nits parent, subsidiaries or affiliated corporations ( “Competitor”). By way of illustration only, examples of NIKE competitors include, but are not\\\\nlimited to: Adidas, FILA, Reebok, Puma, Champion, Oakley, DKNY, Converse, Asics, Saucony, New Balance, Ralph Lauren/Polo Sport, B.U.M,\\\\nFUBU, The Gap, Tommy Hilfiger, Umbro, Northface, Venator (Foot lockers), Sports Authority, Columbia Sportswear, Wilson, Mizuno, Callaway\\\\nGolf and Titleist. This provision is subject to NIKE’s option to waive all or any portion of the Restriction Period as more specifically provided\\\\nbelow.\\\\n(b) Extension of Time. In the event EMPLOYEE breaches this covenant not to compete, the Restriction Period shall automatically\\\\ntoll from the date of the first breach, and all subsequent breaches, until the resolution of the breach through private settlement, judicial or other\\\\naction, including all appeals. The Restriction Period shall continue upon the effective date of any such settlement judicial or other resolution. NIKE\\\\nshall not be obligated to pay EMPLOYEE the additional compensation described in paragraph 1(d) below during any period of time in which this\\\\nAgreement is tolled due to EMPLOYEE’s breach. In the event EMPLOYEE receives such additional compensation after any such breach,\\\\nEMPLOYEE must immediately reimburse NIKE in the amount of all such compensation upon the receipt of a written request by NIKE.\\\\n(c) Waiver of Non-Compete. NIKE has the option, in its sole discretion, to elect to waive all or a portion of the Restriction Period or\\\\nto limit the definition of Competitor, by giving EMPLOYEE seven (7) days prior notice of such election. In the event all or a portion of the\\\\nRestriction Period is waived, NIKE shall not be obligated to pay EMPLOYEE for any period of time as to which the covenant not to compete has\\\\nbeen waived.\\\\n(d) Additional Consideration. As additional consideration for the covenant not to compete described above, should NIKE terminate\\\\nEMPLOYEE’s employment and elect to enforce the non-competition agreement, NIKE shall pay EMPLOYEE a monthly payment equal to one\\\\nhundred percent (100%) of EMPLOYEE’s last monthly base salary while the Restriction Period is in effect. If EMPLOYEE voluntarily terminates\\\\nemployment and NIKE elects to enforce the non-competition agreement, NIKE shall pay EMPLOYEE a monthly severance payment equal to fifty\\\\npercent (50%) of EMPLOYEE’s last monthly base salary while the Restriction Period is in effect. The first payment to EMPLOYEE of additional\\\\nconsideration shall follow on the next applicable pay period after the election to enforce the non-competition agreement, payable in accordance with\\\\nNIKE’s payroll practices.\\\\n2. Subsequent Employer. EMPLOYEE agrees to notify NIKE at the time of separation of employment of the name of EMPLOYEE’s new\\\\nemployer, if known. EMPLOYEE further agrees to disclose to NIKE the name of any subsequent employer during the Restriction Period, wherever\\\\nlocated and regardless of whether such employer is a competitor of NIKE.\\\\n3. Non-Disclosure Agreement.\\\\n(a) Protected Information Defined. “Protected Information” shall mean all proprietary information, in whatever form and format, of\\\\nNIKE and all information provided to NIKE by third parties which NIKE is obligated to keep confidential. EMPLOYEE agrees that any and all\\\\ninformation to which EMPLOYEE has access concerning NIKE projects and internal NIKE information is Protected Information, whether in verbal\\\\nform, machine-readable form, written or other tangible form, and whether designated as confidential or unmarked. Without limiting the foregoing,\\\\nProtected Information includes information relating to NIKE’s research and development activities, its intellectual property and the filing or\\\\npendency of patent applications, confidential techniques, methods, styles, designs, design concepts and ideas, customer and vendor lists, contract\\\\nfactory lists, pricing information, manufacturing plans, business and marketing plans, sales information, methods of operation, manufacturing\\\\nprocesses and methods, products, and personnel information.\\\\n(b) Excluded Information. Notwithstanding paragraph 3(a), Protected Information excludes any information that is or becomes part\\\\nof the public domain through no act or failure to act on the part of EMPLOYEE. Specifically, employees shall be permitted to retain as part of their\\\\npersonal portfolio copies of the employees’ original artwork and designs, provided the artwork or designs have become part of the public domain. In\\\\nany dispute between the parties with respect to this exclusion, the burden of proof will be on EMPLOYEE and such proof will be by clear and\\\\nconvincing evidence.\\\\n(c) Employee’s Obligations. During the period of employment by NIKE and for a period of two (2) years thereafter, EMPLOYEE\\\\nwill hold in confidence and protect all Protected Information and will not, at any time, directly or indirectly, use any Protected Information for any\\\\npurpose outside the scope of EMPLOYEE’s employment with NIKE or disclose any Protected Information to any third person or organization\\\\nwithout the prior written consent of NIKE. Specifically, but not by way of limitation, EMPLOYEE will not ever copy, transmit, reproduce,\\\\nsummarize, quote, publish or make any commercial or other use whatsoever of any Protected Information without the prior written consent of NIKE.\\\\nEMPLOYEE will also take reasonable security precautions and such other actions as may be necessary to insure that there is no use or disclosure,\\\\nintentional or inadvertent, of Protected Information in violation of this Agreement.\\\\n4. Return of Protected Information. At the request of NIKE at anytime, and in any event, upon termination of employment, EMPLOYEE\\\\nshall immediately return to NIKE all confidential documents, including tapes, notebooks, drawings, computer disks and other similar repositories of\\\\nor containing Protected Information, and all copies thereof, then in EMPLOYEE’s possession or under EMPLOYEE’s control.\\\\n5. Unauthorized Use. During the period of employment with NIKE and thereafter, EMPLOYEE will notify NIKE immediately if\\\\nEMPLOYEE becomes aware of the unauthorized possession, use or knowledge of any Protected Information by any person employed or not\\\\nemployed by NIKE at the time of such possession, use or knowledge. EMPLOYEE will cooperate with NIKE in the investigation of any such\\\\nincident and will cooperate with NIKE in any litigation with third parties deemed necessary by NIKE to protect the Protected Information. NIKE\\\\nshall provide reasonable reimbursement to EMPLOYEE for each hour so engaged and that amount shall not be diminished by operation of any\\\\npayment under Paragraph 1(d) of this Agreement.\\\\n6. Non-Recruitment. During the term of this Agreement and for a period of one (1) year thereafter, EMPLOYEE will not directly or\\\\nindirectly , solicit, divert or hire away (or attempt to solicit, divert or hire away) to or for himself or any other company or business organization, any\\\\nNIKE employee, whether or not such employee is a full-time employee or temporary employee and whether or not such employment is pursuant to a\\\\nwritten agreement or is at will.\\\\n7. Accounting of Profits. EMPLOYEE agrees that, if EMPLOYEE should violate any term of this Agreement, NIKE shall be entitled to an\\\\naccounting and repayment of all profits, compensation, commissions, remuneration or benefits which EMPLOYEE directly or indirectly has realized\\\\nand/or may realize as a result of or in connection with any such violation (including the return of any additional consideration paid by NIKE\\\\npursuant to Paragraph 1(d) above). Such remedy shall be in addition to and not in limitation of any injunctive relief or other rights or remedies to\\\\nwhich NIKE may be entitled at law or in equity.\\\\n8. General Provisions.\\\\n(a) Survival. This Agreement shall continue in effect after the termination of EMPLOYEE’s employment, regardless of the reason for\\\\ntermination.\\\\n(b) Waiver. No waiver, amendment, modification or cancellation of any term or condition of this Agreement will be effective unless\\\\nexecuted in writing by both parties. No written waiver will excuse the performance of any act other than the act or acts specifically referred to\\\\ntherein.\\\\n(c) Severability. Each provision herein will be treated as a separate and independent clause and unenforceability of any one clause\\\\nwill in no way impact the enforceability of any other clause. Should any of the provisions in this Agreement be found to be unreasonable or invalid\\\\nby a court of competent jurisdiction, such provision will be enforceable to the maximum extent enforceable by the law of that jurisdiction.\\\\n(d) Applicable Law/Jurisdiction. This Agreement, and EMPLOYEE’s employment hereunder, shall be construed according to the\\\\nlaws of the State of Oregon. EMPLOYEE further hereby submits to the jurisdiction of, and agrees that exclusive jurisdiction over and venue for any\\\\naction or proceeding arising out of or relating to this Agreement shall lie in the state and federal courts located in Oregon.\\\\nEMPLOYEE\\\\nNIKE, Inc.\\\\n/s/ Eric Dean Sprunk\\\\nBy\\\\n/s/ Jeffrey M. Cava\\\\nDATE 04/18/01\\\\nName:\\\\nTitle:\\\\nJeffrey M. Cava\\\\nVice President, Global Human Resources'" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_in[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "device(type='cuda', index=0)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.device" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Token indices sequence length is longer than the specified maximum sequence length for this model (11717 > 512). Running this sequence through the model will result in indexing errors\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "and non-disclosure Agreement.n(a) Competition Restriction.\n" + ] + } + ], + "source": [ + "input = train_in[0]\n", + "\n", + "# You can also use \"translate English to French\" and \"translate English to Romanian\"\n", + "input_ids = tokenizer(input, return_tensors=\"pt\").input_ids[:,:512].to('cuda') # Batch size 1\n", + "\n", + "outputs = model.generate(input_ids)\n", + "\n", + "decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)\n", + "\n", + "print(decoded)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "input_ids = tokenizer('translate English to German: The house is wonderful.', return_tensors='pt').input_ids.to('cuda')\n", + "labels = tokenizer('Das Haus ist wunderbar.', return_tensors='pt').input_ids.to('cuda')\n", + "# the forward function automatically creates the correct decoder_input_ids\n", + "loss = model(input_ids=input_ids, labels=labels).loss" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor(0.2543, device='cuda:0', grad_fn=)" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "loss" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "from transformers import AdamW\n", + "\n", + "optimizer = AdamW(model.parameters(), lr=5e-5)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "T5ForConditionalGeneration(\n", + " (shared): Embedding(32128, 512)\n", + " (encoder): T5Stack(\n", + " (embed_tokens): Embedding(32128, 512)\n", + " (block): ModuleList(\n", + " (0): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " (relative_attention_bias): Embedding(32, 8)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseReluDense(\n", + " (wi): Linear(in_features=512, out_features=2048, bias=False)\n", + " (wo): Linear(in_features=2048, out_features=512, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (1): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseReluDense(\n", + " (wi): Linear(in_features=512, out_features=2048, bias=False)\n", + " (wo): Linear(in_features=2048, out_features=512, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (2): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseReluDense(\n", + " (wi): Linear(in_features=512, out_features=2048, bias=False)\n", + " (wo): Linear(in_features=2048, out_features=512, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (3): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseReluDense(\n", + " (wi): Linear(in_features=512, out_features=2048, bias=False)\n", + " (wo): Linear(in_features=2048, out_features=512, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (4): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseReluDense(\n", + " (wi): Linear(in_features=512, out_features=2048, bias=False)\n", + " (wo): Linear(in_features=2048, out_features=512, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (5): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseReluDense(\n", + " (wi): Linear(in_features=512, out_features=2048, bias=False)\n", + " (wo): Linear(in_features=2048, out_features=512, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " )\n", + " (final_layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (decoder): T5Stack(\n", + " (embed_tokens): Embedding(32128, 512)\n", + " (block): ModuleList(\n", + " (0): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " (relative_attention_bias): Embedding(32, 8)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseReluDense(\n", + " (wi): Linear(in_features=512, out_features=2048, bias=False)\n", + " (wo): Linear(in_features=2048, out_features=512, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (1): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseReluDense(\n", + " (wi): Linear(in_features=512, out_features=2048, bias=False)\n", + " (wo): Linear(in_features=2048, out_features=512, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (2): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseReluDense(\n", + " (wi): Linear(in_features=512, out_features=2048, bias=False)\n", + " (wo): Linear(in_features=2048, out_features=512, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (3): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseReluDense(\n", + " (wi): Linear(in_features=512, out_features=2048, bias=False)\n", + " (wo): Linear(in_features=2048, out_features=512, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (4): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseReluDense(\n", + " (wi): Linear(in_features=512, out_features=2048, bias=False)\n", + " (wo): Linear(in_features=2048, out_features=512, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (5): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseReluDense(\n", + " (wi): Linear(in_features=512, out_features=2048, bias=False)\n", + " (wo): Linear(in_features=2048, out_features=512, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " )\n", + " (final_layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (lm_head): Linear(in_features=512, out_features=32128, bias=False)\n", + ")" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.train()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "13.828309059143066\n", + "11.455500602722168\n", + "12.591864585876465\n", + "11.697681427001953\n", + "9.457676887512207\n", + "10.367218017578125\n", + "7.407022953033447\n", + "8.830719947814941\n", + "10.031709671020508\n", + "6.843804359436035\n", + "9.030264854431152\n", + "8.841073989868164\n", + "9.884418487548828\n", + "8.1090087890625\n", + "5.866975784301758\n", + "8.52608585357666\n", + "5.992447853088379\n", + "7.147337436676025\n", + "6.601171970367432\n", + "8.028266906738281\n", + "6.183577060699463\n", + "5.559406280517578\n", + "6.755654335021973\n", + "5.919793128967285\n", + "5.167813301086426\n", + "5.351068496704102\n", + "5.7952165603637695\n", + "6.730508804321289\n", + "5.469816207885742\n", + "4.3772478103637695\n", + "4.868475914001465\n", + "5.726585865020752\n", + "3.966099739074707\n", + "5.961289405822754\n", + "5.155783653259277\n", + "4.634646892547607\n", + "4.736303806304932\n", + "4.152906894683838\n", + "4.373996257781982\n", + "4.358081340789795\n", + "4.958395957946777\n", + "3.8232321739196777\n", + "4.142550945281982\n", + "2.666247606277466\n", + "4.235062122344971\n", + "4.233397483825684\n", + "3.8168039321899414\n", + "3.1151959896087646\n", + "1.9562475681304932\n", + "3.445767641067505\n", + "4.4933247566223145\n", + "3.4922804832458496\n", + "2.250882625579834\n", + "2.4218058586120605\n", + "2.260007858276367\n", + "2.5280778408050537\n", + "2.7701780796051025\n", + "3.8142340183258057\n", + "3.0554733276367188\n", + "1.8644142150878906\n", + "3.2941484451293945\n", + "2.286688804626465\n", + "3.366548538208008\n", + "1.0562607049942017\n", + "1.8493285179138184\n", + "2.8790605068206787\n", + "4.513855934143066\n", + "2.9482157230377197\n", + "2.0251893997192383\n", + "1.5018310546875\n", + "1.8084921836853027\n", + "1.7678613662719727\n", + "1.0362716913223267\n", + "1.6407744884490967\n", + "1.2443599700927734\n", + "2.2683565616607666\n", + "1.4040197134017944\n", + "3.9230520725250244\n", + "0.8626512289047241\n", + "0.7241716384887695\n", + "0.8391153812408447\n", + "3.9508471488952637\n", + "1.4111053943634033\n", + "1.333533525466919\n", + "0.38448166847229004\n", + "2.132805109024048\n", + "1.7784374952316284\n", + "2.150501251220703\n", + "2.3192851543426514\n", + "1.4407600164413452\n", + "1.4160407781600952\n", + "0.5990514159202576\n", + "1.2548216581344604\n", + "1.1115673780441284\n", + "1.957241177558899\n", + "1.2597360610961914\n", + "1.0772262811660767\n", + "1.1419639587402344\n", + "0.30694711208343506\n", + "2.0387325286865234\n", + "2.2052383422851562\n", + "4.552682399749756\n", + "1.1284838914871216\n", + "1.628050446510315\n", + "2.827632188796997\n", + "1.256350040435791\n", + "1.5137629508972168\n", + "0.17800401151180267\n", + "1.1130807399749756\n", + "1.4471491575241089\n", + "1.4046872854232788\n", + "1.5159196853637695\n", + "1.5683913230895996\n", + "0.9050359725952148\n", + "0.2453073114156723\n", + "0.829986572265625\n", + "1.342026948928833\n", + "0.697879433631897\n", + "0.8360342383384705\n", + "3.773777723312378\n", + "1.0000628232955933\n", + "1.163111925125122\n", + "0.636287271976471\n", + "0.6960057616233826\n", + "1.2984236478805542\n", + "1.4369347095489502\n", + "1.2260591983795166\n", + "1.1619309186935425\n", + "1.2387232780456543\n", + "0.4039798974990845\n", + "1.261201024055481\n", + "2.0990383625030518\n", + "0.6930045485496521\n", + "1.9684548377990723\n", + "0.41637909412384033\n", + "1.5580865144729614\n", + "0.935876727104187\n", + "0.5318026542663574\n", + "1.207798719406128\n", + "0.5434905290603638\n", + "0.10893465578556061\n", + "0.8033742904663086\n", + "0.25061750411987305\n", + "0.9297510981559753\n", + "1.1515181064605713\n", + "2.179370641708374\n", + "0.912304699420929\n", + "0.9962441325187683\n", + "1.3243765830993652\n", + "1.5690778493881226\n", + "1.0356395244598389\n", + "1.3098541498184204\n", + "0.2543454170227051\n", + "0.7984715104103088\n", + "0.10885466635227203\n", + "1.5388046503067017\n", + "1.3934229612350464\n", + "1.0405352115631104\n", + "1.744563341140747\n", + "0.9149143695831299\n", + "0.4559175670146942\n", + "0.7720739841461182\n", + "1.6526525020599365\n", + "0.5373530387878418\n", + "0.5430313348770142\n", + "0.5173842310905457\n", + "0.7213934659957886\n", + "0.6729367971420288\n", + "0.8275019526481628\n", + "1.3139863014221191\n", + "1.1809828281402588\n", + "1.423504114151001\n", + "0.4956137537956238\n", + "1.2472567558288574\n", + "0.3318641185760498\n", + "0.3209134638309479\n", + "0.09695105999708176\n", + "0.6424573063850403\n", + "1.224516749382019\n", + "0.13458161056041718\n", + "1.1670427322387695\n", + "1.1272934675216675\n", + "1.0477215051651\n", + "0.7291663289070129\n", + "0.6467929482460022\n", + "0.924201488494873\n", + "1.455331563949585\n", + "0.6269064545631409\n", + "0.7512378692626953\n", + "0.5907666087150574\n", + "0.8808064460754395\n", + "0.5326775312423706\n", + "0.4754364490509033\n", + "0.5422216653823853\n", + "0.9144468307495117\n", + "0.6809101700782776\n", + "0.1790292114019394\n", + "0.7104746103286743\n", + "0.41490861773490906\n", + "1.4695433378219604\n", + "1.381641149520874\n", + "0.34390121698379517\n", + "0.5615295171737671\n", + "0.4991306960582733\n", + "1.755591630935669\n", + "0.02876635640859604\n", + "0.06847237050533295\n", + "1.4051387310028076\n", + "0.3321903944015503\n", + "0.5550190210342407\n", + "0.8398134708404541\n", + "0.6281668543815613\n", + "0.7955247759819031\n", + "0.4672299921512604\n", + "1.0951168537139893\n", + "0.6541656255722046\n", + "0.8140543699264526\n", + "0.043958500027656555\n", + "0.04899679496884346\n", + "0.8996919989585876\n", + "0.275490403175354\n", + "0.2666592597961426\n", + "0.09318255633115768\n", + "0.3718479871749878\n", + "1.495982050895691\n", + "0.0595063678920269\n", + "1.7708230018615723\n", + "0.7092909216880798\n", + "0.9086990356445312\n", + "0.010129873640835285\n", + "0.7636302709579468\n", + "1.0733331441879272\n", + "0.060608845204114914\n", + "1.3388985395431519\n", + "0.4673462510108948\n", + "0.21733486652374268\n", + "0.5459968447685242\n", + "0.050972938537597656\n", + "0.4641537666320801\n", + "0.7601963877677917\n", + "0.44411876797676086\n", + "0.09443528205156326\n", + "1.623687982559204\n", + "0.5162641406059265\n", + "0.6031121611595154\n", + "0.8987085223197937\n", + "0.3393983840942383\n", + "2.8573479652404785\n", + "0.8427947759628296\n", + "1.0764878988265991\n", + "0.4185052812099457\n", + "0.6308793425559998\n", + "0.01906685344874859\n", + "0.141354501247406\n" + ] + } + ], + "source": [ + "for line_in, line_exp in zip(train_in, train_exp):\n", + " input_ids = tokenizer(line_in, return_tensors='pt').input_ids[:,:512].to('cuda')\n", + " labels = tokenizer(line_exp, return_tensors='pt').input_ids.to('cuda')\n", + " # the forward function automatically creates the correct decoder_input_ids\n", + " loss = model(input_ids=input_ids, labels=labels).loss\n", + " loss.backward()\n", + " optimizer.step()\n", + " optimizer.zero_grad()\n", + " print(loss.item())" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "T5ForConditionalGeneration(\n", + " (shared): Embedding(32128, 512)\n", + " (encoder): T5Stack(\n", + " (embed_tokens): Embedding(32128, 512)\n", + " (block): ModuleList(\n", + " (0): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " (relative_attention_bias): Embedding(32, 8)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseReluDense(\n", + " (wi): Linear(in_features=512, out_features=2048, bias=False)\n", + " (wo): Linear(in_features=2048, out_features=512, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (1): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseReluDense(\n", + " (wi): Linear(in_features=512, out_features=2048, bias=False)\n", + " (wo): Linear(in_features=2048, out_features=512, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (2): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseReluDense(\n", + " (wi): Linear(in_features=512, out_features=2048, bias=False)\n", + " (wo): Linear(in_features=2048, out_features=512, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (3): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseReluDense(\n", + " (wi): Linear(in_features=512, out_features=2048, bias=False)\n", + " (wo): Linear(in_features=2048, out_features=512, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (4): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseReluDense(\n", + " (wi): Linear(in_features=512, out_features=2048, bias=False)\n", + " (wo): Linear(in_features=2048, out_features=512, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (5): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerFF(\n", + " (DenseReluDense): T5DenseReluDense(\n", + " (wi): Linear(in_features=512, out_features=2048, bias=False)\n", + " (wo): Linear(in_features=2048, out_features=512, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " )\n", + " (final_layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (decoder): T5Stack(\n", + " (embed_tokens): Embedding(32128, 512)\n", + " (block): ModuleList(\n", + " (0): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " (relative_attention_bias): Embedding(32, 8)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseReluDense(\n", + " (wi): Linear(in_features=512, out_features=2048, bias=False)\n", + " (wo): Linear(in_features=2048, out_features=512, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (1): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseReluDense(\n", + " (wi): Linear(in_features=512, out_features=2048, bias=False)\n", + " (wo): Linear(in_features=2048, out_features=512, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (2): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseReluDense(\n", + " (wi): Linear(in_features=512, out_features=2048, bias=False)\n", + " (wo): Linear(in_features=2048, out_features=512, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (3): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseReluDense(\n", + " (wi): Linear(in_features=512, out_features=2048, bias=False)\n", + " (wo): Linear(in_features=2048, out_features=512, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (4): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseReluDense(\n", + " (wi): Linear(in_features=512, out_features=2048, bias=False)\n", + " (wo): Linear(in_features=2048, out_features=512, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " (5): T5Block(\n", + " (layer): ModuleList(\n", + " (0): T5LayerSelfAttention(\n", + " (SelfAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (1): T5LayerCrossAttention(\n", + " (EncDecAttention): T5Attention(\n", + " (q): Linear(in_features=512, out_features=512, bias=False)\n", + " (k): Linear(in_features=512, out_features=512, bias=False)\n", + " (v): Linear(in_features=512, out_features=512, bias=False)\n", + " (o): Linear(in_features=512, out_features=512, bias=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (2): T5LayerFF(\n", + " (DenseReluDense): T5DenseReluDense(\n", + " (wi): Linear(in_features=512, out_features=2048, bias=False)\n", + " (wo): Linear(in_features=2048, out_features=512, bias=False)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " )\n", + " (final_layer_norm): T5LayerNorm()\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (lm_head): Linear(in_features=512, out_features=32128, bias=False)\n", + ")" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.eval()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "jurisdiction: Colorado\n" + ] + } + ], + "source": [ + "input = dev_in[0]\n", + "\n", + "input_ids = tokenizer(input, return_tensors=\"pt\").input_ids[:,:512].to('cuda') # Batch size 1\n", + "\n", + "outputs = model.generate(input_ids)\n", + "\n", + "decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)\n", + "\n", + "print(decoded)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'jurisdiction: New_York'" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dev_exp[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "jurisdiction: Delaware\n" + ] + } + ], + "source": [ + "input = dev_in[2]\n", + "\n", + "input_ids = tokenizer(input, return_tensors=\"pt\").input_ids[:,:512].to('cuda') # Batch size 1\n", + "\n", + "outputs = model.generate(input_ids)\n", + "\n", + "decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)\n", + "\n", + "print(decoded)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'jurisdiction: Delaware'" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dev_exp[2]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## pytanie:\n", + "- co można poprawić w istniejącym rozwiązaniu?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Zadanie domowe:\n", + "\n", + "Za pomocą powyższej metody stworzyć rozwiązanie dla kleister-nda. Wymagania:\n", + " - niezerowy wynik zarówno dla precision i recall dla conajmniej 3 spośród pól: effective_date, jurisdiction, party, term\n", + " \n", + "Punkty: 80.\n", + " \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### SIMILARITY SEARCH\n", + "1. zainstaluj faiss i zrób tutorial: https://github.com/facebookresearch/faiss\n", + "2. wczytaj treści artykułów z BBC News Train.csv\n", + "3. Użyj któregoś z transformerów (możesz użyć biblioteki sentence-transformers) do stworzenia embeddingów dokumentów\n", + "4. wczytaj embeddingi do bazy danych faiss\n", + "5. wyszukaj query 'consumer electronics market'" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import pickle\n", + "import numpy as np\n", + "import faiss\n", + "from sklearn.metrics import ndcg_score, dcg_score, average_precision_score" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: sentence-transformers in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (1.2.0)\n", + "Requirement already satisfied: sentencepiece in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from sentence-transformers) (0.1.91)\n", + "Requirement already satisfied: torchvision in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from sentence-transformers) (0.6.0)\n", + "Requirement already satisfied: scipy in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from sentence-transformers) (1.4.1)\n", + "Requirement already satisfied: torch>=1.6.0 in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from sentence-transformers) (1.8.1)\n", + "Requirement already satisfied: tqdm in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from sentence-transformers) (4.48.2)\n", + "Requirement already satisfied: scikit-learn in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from sentence-transformers) (0.23.2)\n", + "Requirement already satisfied: nltk in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from sentence-transformers) (3.5)\n", + "Requirement already satisfied: transformers<5.0.0,>=3.1.0 in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from sentence-transformers) (4.4.2)\n", + "Requirement already satisfied: numpy in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from sentence-transformers) (1.20.3)\n", + "Requirement already satisfied: pillow>=4.1.1 in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from torchvision->sentence-transformers) (8.0.1)\n", + "Requirement already satisfied: typing-extensions in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from torch>=1.6.0->sentence-transformers) (3.7.4.3)\n", + "Requirement already satisfied: joblib>=0.11 in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from scikit-learn->sentence-transformers) (0.16.0)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from scikit-learn->sentence-transformers) (2.1.0)\n", + "Requirement already satisfied: click in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from nltk->sentence-transformers) (7.1.2)\n", + "Requirement already satisfied: regex in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from nltk->sentence-transformers) (2020.7.14)\n", + "Requirement already satisfied: sacremoses in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from transformers<5.0.0,>=3.1.0->sentence-transformers) (0.0.43)\n", + "Requirement already satisfied: packaging in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from transformers<5.0.0,>=3.1.0->sentence-transformers) (20.4)\n", + "Requirement already satisfied: filelock in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from transformers<5.0.0,>=3.1.0->sentence-transformers) (3.0.12)\n", + "Requirement already satisfied: tokenizers<0.11,>=0.10.1 in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from transformers<5.0.0,>=3.1.0->sentence-transformers) (0.10.1)\n", + "Requirement already satisfied: requests in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from transformers<5.0.0,>=3.1.0->sentence-transformers) (2.24.0)\n", + "Requirement already satisfied: six in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from sacremoses->transformers<5.0.0,>=3.1.0->sentence-transformers) (1.15.0)\n", + "Requirement already satisfied: pyparsing>=2.0.2 in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from packaging->transformers<5.0.0,>=3.1.0->sentence-transformers) (2.4.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from requests->transformers<5.0.0,>=3.1.0->sentence-transformers) (2020.6.20)\n", + "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from requests->transformers<5.0.0,>=3.1.0->sentence-transformers) (1.25.10)\n", + "Requirement already satisfied: idna<3,>=2.5 in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from requests->transformers<5.0.0,>=3.1.0->sentence-transformers) (2.10)\n", + "Requirement already satisfied: chardet<4,>=3.0.2 in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from requests->transformers<5.0.0,>=3.1.0->sentence-transformers) (3.0.4)\n" + ] + } + ], + "source": [ + "!pip install sentence-transformers" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[-0.07142266 -0.07716199 -0.03047761 ... 0.01356028 -0.04016104\n", + " -0.02446149]\n", + " [-0.06508802 -0.06923407 -0.03735013 ... 0.01013562 -0.04027328\n", + " -0.02171571]]\n" + ] + } + ], + "source": [ + "from sentence_transformers import SentenceTransformer\n", + "sentences = [\"Hello World\", \"Hallo Welt\"]\n", + "\n", + "model = SentenceTransformer('LaBSE')\n", + "embeddings = model.encode(sentences)\n", + "print(embeddings)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "r = pd.read_csv('BBC News Train.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "DOCUMENTS = list(r.Text)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "embeddings = model.encode(DOCUMENTS)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "embeddings = model.encode(list(r.Text))" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "QUERY_STR = 'consumer electronics market'" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "query = model.encode([QUERY_STR])" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "index = faiss.IndexFlatL2(embeddings.shape[1]) " + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "index.add(np.ascontiguousarray(embeddings))" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "D, I = index.search(query, 5) " + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1363, 1371, 898, 744, 292]])" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "I" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1.3110979, 1.4027181, 1.4045265, 1.4421673, 1.4421673]],\n", + " dtype=float32)" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "D" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'internet boom for gift shopping cyberspace is becoming a very popular destination for christmas shoppers. forecasts predict that british people will spend £4bn buying gifts online during the festive season an increase of 64% on 2003. surveys also show that the average amount that people are spending is rising as is the range of goods that they are happy to buy online. savvy shoppers are also using the net to find the hot presents that are all but sold out in high street stores. almost half of the uk population now shop online according to figures collected by the interactive media in retail group which represents web retailers. about 85% of this group 18m people expect to do a lot of their christmas gift buying online this year reports the industry group. on average each shopper will spend £220 and britons lead europe in their affection for online shopping. almost a third of all the money spent online this christmas will come out of british wallets and purses compared to 29% from german shoppers and only 4% from italian gift buyers. james roper director of the imrg said shoppers were now much happier to buy so-called big ticket items such as lcd television sets and digital cameras. mr roper added that many retailers were working hard to reassure consumers that online shopping was safe and that goods ordered as presents would arrive in time for christmas. he advised consumers to give shops a little more time than usual to fulfil orders given that online buying is proving so popular. a survey by hostway suggests that many men prefer to shop online to avoid the embarrassment of buying some types of presents such as lingerie for wives and girlfriends. much of this online shopping is likely to be done during work time according to research carried out by security firm saint bernard software. the research reveals that up to two working days will be lost by staff who do their shopping via their work computer. worst offenders will be those in the 18-35 age bracket suggests the research who will spend up to five hours per week in december browsing and buying at online shops. iggy fanlo chief revenue officer at shopping.com said that the growing numbers of people using broadband was driving interest in online shopping. when you consider narrowband and broadband the conversion to sale is two times higher he said. higher speeds meant that everything happened much faster he said which let people spend time browsing and finding out about products before they buy. the behaviour of online shoppers was also changing he said. the single biggest reason people went online before this year was price he said. the number one reason now is convenience. very few consumers click on the lowest price he said. they are looking for good prices and merchant reliability. consumer comments and reviews were also proving popular with shoppers keen to find out who had the most reliable customer service. data collected by ebay suggests that some smart shoppers are getting round the shortages of hot presents by buying them direct through the auction site. according to ebay uk there are now more than 150 robosapiens remote control robots for sale via the site. the robosapiens toy is almost impossible to find in online and offline stores. similarly many shoppers are turning to ebay to help them get hold of the hard-to-find slimline playstation 2 which many retailers are only selling as part of an expensive bundle. the high demand for the playstation 2 has meant that prices for it are being driven up. in shops the ps2 is supposed to sell for £104.99. in some ebay uk auctions the price has risen to more than double this figure. many people are also using ebay to get hold of gadgets not even released in this country. the portable version of the playstation has only just gone on sale in japan yet some enterprising ebay users are selling the device to uk gadget fans.'" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "DOCUMENTS[1363]" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}