aitech-eks-pub/cw/15_similarity_search.ipynb

130 KiB
Raw Blame History

from transformers import T5Tokenizer, T5ForConditionalGeneration
text = "translate English to French: My name is Azeem and I live in India"
text = "summarize: Machine learning involves computers discovering how they can perform tasks without being explicitly programmed to do so. It involves computers learning from data provided so that they carry out certain tasks. For simple tasks assigned to computers, it is possible to program algorithms telling the machine how to execute all steps required to solve the problem at hand; on the computer's part, no learning is needed. For more advanced tasks, it can be challenging for a human to manually create the needed algorithms. In practice, it can turn out to be more effective to help the machine develop its own algorithm, rather than having human programmers specify every needed step."
from transformers import T5Tokenizer, T5ForConditionalGeneration

tokenizer = T5Tokenizer.from_pretrained('t5-small')

model = T5ForConditionalGeneration.from_pretrained('t5-small', return_dict=True,).to('cuda')


# You can also use "translate English to French" and "translate English to Romanian"
input_ids = tokenizer(text, return_tensors="pt").input_ids.to('cuda')  # Batch size 1

outputs = model.generate(input_ids)

decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(decoded)
machine learning involves computers learning from data provided so that they carry out certain tasks without being explicitly programme
model
T5ForConditionalGeneration(
  (shared): Embedding(32128, 512)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 512)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
              (relative_attention_bias): Embedding(32, 8)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (1): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (2): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (3): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (4): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (5): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
    )
    (final_layer_norm): T5LayerNorm()
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (decoder): T5Stack(
    (embed_tokens): Embedding(32128, 512)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
              (relative_attention_bias): Embedding(32, 8)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerCrossAttention(
            (EncDecAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (2): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (1): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerCrossAttention(
            (EncDecAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (2): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (2): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerCrossAttention(
            (EncDecAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (2): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (3): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerCrossAttention(
            (EncDecAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (2): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (4): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerCrossAttention(
            (EncDecAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (2): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (5): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerCrossAttention(
            (EncDecAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (2): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
    )
    (final_layer_norm): T5LayerNorm()
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (lm_head): Linear(in_features=512, out_features=32128, bias=False)
)
KLEISTER_PATH = '/media/kuba/ssdsam/Syncthing/Syncthing/przedmioty/2020-02/IE/applica/kleister-nda/'
train_exp_f = open(KLEISTER_PATH + 'train/expected.tsv')
train_exp = []
for line in train_exp_f:
    line_splitted = line.strip('\n').split(' ')
    found = False
    for elem in line_splitted:
        if 'jurisdiction=' in elem:
            train_exp.append('jurisdiction: ' + elem.split('=')[1])
            found = True
            break
    if not found:
        train_exp.append('jurisdiction: NONE')
dev_exp_f = open(KLEISTER_PATH + 'dev-0/expected.tsv')
dev_exp = []
for line in dev_exp_f:
    line_splitted = line.strip('\n').split(' ')
    found = False
    for elem in line_splitted:
        if 'jurisdiction=' in elem:
            dev_exp.append('jurisdiction: ' + elem.split('=')[1])
            found = True
            break
    if not found:
        dev_exp.append('jurisdiction: NONE')
train_exp
['jurisdiction: Oregon',
 'jurisdiction: California',
 'jurisdiction: Florida',
 'jurisdiction: Pennsylvania',
 'jurisdiction: California',
 'jurisdiction: California',
 'jurisdiction: New_York',
 'jurisdiction: Delaware',
 'jurisdiction: Illinois',
 'jurisdiction: New_York',
 'jurisdiction: Delaware',
 'jurisdiction: Iowa',
 'jurisdiction: Delaware',
 'jurisdiction: Indiana',
 'jurisdiction: New_York',
 'jurisdiction: Massachusetts',
 'jurisdiction: New_York',
 'jurisdiction: Michigan',
 'jurisdiction: Indiana',
 'jurisdiction: Colorado',
 'jurisdiction: Georgia',
 'jurisdiction: New_York',
 'jurisdiction: Oregon',
 'jurisdiction: Pennsylvania',
 'jurisdiction: Delaware',
 'jurisdiction: Florida',
 'jurisdiction: Delaware',
 'jurisdiction: Illinois',
 'jurisdiction: Illinois',
 'jurisdiction: Delaware',
 'jurisdiction: California',
 'jurisdiction: Delaware',
 'jurisdiction: Missouri',
 'jurisdiction: Oregon',
 'jurisdiction: Delaware',
 'jurisdiction: Delaware',
 'jurisdiction: Connecticut',
 'jurisdiction: Nevada',
 'jurisdiction: New_York',
 'jurisdiction: Illinois',
 'jurisdiction: Idaho',
 'jurisdiction: Florida',
 'jurisdiction: Delaware',
 'jurisdiction: Delaware',
 'jurisdiction: Minnesota',
 'jurisdiction: Virginia',
 'jurisdiction: California',
 'jurisdiction: California',
 'jurisdiction: Nevada',
 'jurisdiction: New_York',
 'jurisdiction: Washington',
 'jurisdiction: New_York',
 'jurisdiction: Ohio',
 'jurisdiction: Nevada',
 'jurisdiction: Georgia',
 'jurisdiction: Massachusetts',
 'jurisdiction: Texas',
 'jurisdiction: New_York',
 'jurisdiction: New_York',
 'jurisdiction: Virginia',
 'jurisdiction: Wisconsin',
 'jurisdiction: Colorado',
 'jurisdiction: Oregon',
 'jurisdiction: Delaware',
 'jurisdiction: Ohio',
 'jurisdiction: Missouri',
 'jurisdiction: South_Dakota',
 'jurisdiction: New_York',
 'jurisdiction: Indiana',
 'jurisdiction: Minnesota',
 'jurisdiction: Maine',
 'jurisdiction: Missouri',
 'jurisdiction: Delaware',
 'jurisdiction: Illinois',
 'jurisdiction: Indiana',
 'jurisdiction: Massachusetts',
 'jurisdiction: Illinois',
 'jurisdiction: New_Jersey',
 'jurisdiction: California',
 'jurisdiction: California',
 'jurisdiction: Maine',
 'jurisdiction: North_Carolina',
 'jurisdiction: Missouri',
 'jurisdiction: Georgia',
 'jurisdiction: Missouri',
 'jurisdiction: New_York',
 'jurisdiction: Georgia',
 'jurisdiction: New_York',
 'jurisdiction: Kansas',
 'jurisdiction: California',
 'jurisdiction: Oregon',
 'jurisdiction: Delaware',
 'jurisdiction: Delaware',
 'jurisdiction: Connecticut',
 'jurisdiction: Utah',
 'jurisdiction: Texas',
 'jurisdiction: Delaware',
 'jurisdiction: Ohio',
 'jurisdiction: California',
 'jurisdiction: California',
 'jurisdiction: New_York',
 'jurisdiction: South_Carolina',
 'jurisdiction: Texas',
 'jurisdiction: New_York',
 'jurisdiction: New_Jersey',
 'jurisdiction: Georgia',
 'jurisdiction: Massachusetts',
 'jurisdiction: Texas',
 'jurisdiction: Delaware',
 'jurisdiction: New_York',
 'jurisdiction: Pennsylvania',
 'jurisdiction: Pennsylvania',
 'jurisdiction: Massachusetts',
 'jurisdiction: Delaware',
 'jurisdiction: Florida',
 'jurisdiction: California',
 'jurisdiction: New_York',
 'jurisdiction: Delaware',
 'jurisdiction: Oregon',
 'jurisdiction: North_Carolina',
 'jurisdiction: Delaware',
 'jurisdiction: New_York',
 'jurisdiction: Delaware',
 'jurisdiction: Delaware',
 'jurisdiction: New_York',
 'jurisdiction: Massachusetts',
 'jurisdiction: Massachusetts',
 'jurisdiction: New_York',
 'jurisdiction: Missouri',
 'jurisdiction: Virginia',
 'jurisdiction: California',
 'jurisdiction: New_York',
 'jurisdiction: California',
 'jurisdiction: Massachusetts',
 'jurisdiction: Wisconsin',
 'jurisdiction: Washington',
 'jurisdiction: New_York',
 'jurisdiction: California',
 'jurisdiction: Illinois',
 'jurisdiction: Delaware',
 'jurisdiction: Massachusetts',
 'jurisdiction: California',
 'jurisdiction: Ohio',
 'jurisdiction: Illinois',
 'jurisdiction: New_York',
 'jurisdiction: New_Jersey',
 'jurisdiction: Delaware',
 'jurisdiction: Massachusetts',
 'jurisdiction: Massachusetts',
 'jurisdiction: Utah',
 'jurisdiction: Washington',
 'jurisdiction: Texas',
 'jurisdiction: California',
 'jurisdiction: California',
 'jurisdiction: Colorado',
 'jurisdiction: Delaware',
 'jurisdiction: Ohio',
 'jurisdiction: Pennsylvania',
 'jurisdiction: New_Jersey',
 'jurisdiction: Virginia',
 'jurisdiction: New_York',
 'jurisdiction: Delaware',
 'jurisdiction: Nevada',
 'jurisdiction: New_York',
 'jurisdiction: Texas',
 'jurisdiction: California',
 'jurisdiction: New_York',
 'jurisdiction: New_York',
 'jurisdiction: California',
 'jurisdiction: New_Jersey',
 'jurisdiction: Missouri',
 'jurisdiction: Illinois',
 'jurisdiction: Texas',
 'jurisdiction: New_Jersey',
 'jurisdiction: New_York',
 'jurisdiction: New_York',
 'jurisdiction: Missouri',
 'jurisdiction: Delaware',
 'jurisdiction: Nevada',
 'jurisdiction: Florida',
 'jurisdiction: Kansas',
 'jurisdiction: Oregon',
 'jurisdiction: Delaware',
 'jurisdiction: New_York',
 'jurisdiction: New_York',
 'jurisdiction: Texas',
 'jurisdiction: New_Jersey',
 'jurisdiction: Florida',
 'jurisdiction: New_York',
 'jurisdiction: New_York',
 'jurisdiction: Delaware',
 'jurisdiction: Delaware',
 'jurisdiction: Oregon',
 'jurisdiction: Minnesota',
 'jurisdiction: Texas',
 'jurisdiction: California',
 'jurisdiction: Delaware',
 'jurisdiction: California',
 'jurisdiction: New_York',
 'jurisdiction: Delaware',
 'jurisdiction: Colorado',
 'jurisdiction: Pennsylvania',
 'jurisdiction: New_York',
 'jurisdiction: Indiana',
 'jurisdiction: Delaware',
 'jurisdiction: Pennsylvania',
 'jurisdiction: Massachusetts',
 'jurisdiction: Massachusetts',
 'jurisdiction: New_York',
 'jurisdiction: Ohio',
 'jurisdiction: Illinois',
 'jurisdiction: California',
 'jurisdiction: California',
 'jurisdiction: California',
 'jurisdiction: Oregon',
 'jurisdiction: Texas',
 'jurisdiction: Texas',
 'jurisdiction: Michigan',
 'jurisdiction: Delaware',
 'jurisdiction: California',
 'jurisdiction: Florida',
 'jurisdiction: California',
 'jurisdiction: Ohio',
 'jurisdiction: New_York',
 'jurisdiction: Massachusetts',
 'jurisdiction: Delaware',
 'jurisdiction: Georgia',
 'jurisdiction: Delaware',
 'jurisdiction: Massachusetts',
 'jurisdiction: Texas',
 'jurisdiction: New_York',
 'jurisdiction: Pennsylvania',
 'jurisdiction: Michigan',
 'jurisdiction: Washington',
 'jurisdiction: New_York',
 'jurisdiction: Missouri',
 'jurisdiction: California',
 'jurisdiction: California',
 'jurisdiction: California',
 'jurisdiction: Texas',
 'jurisdiction: Florida',
 'jurisdiction: Ohio',
 'jurisdiction: Delaware',
 'jurisdiction: New_York',
 'jurisdiction: New_York',
 'jurisdiction: Pennsylvania',
 'jurisdiction: New_York',
 'jurisdiction: Rhode_Island',
 'jurisdiction: California',
 'jurisdiction: Florida',
 'jurisdiction: New_York',
 'jurisdiction: Delaware',
 'jurisdiction: California',
 'jurisdiction: Delaware']
train_in_f = open(KLEISTER_PATH + 'train/in.tsv')
train_in = []
for line in train_in_f:
    line = line.rstrip('\n')
    train_in.append(line)
dev_in_f = open(KLEISTER_PATH + 'dev-0/in.tsv')
dev_in = []
for line in dev_in_f:
    line = line.rstrip('\n')
    dev_in.append(line)
train_in[0]
'00a1d238e37ac225b8045a97953e845d.pdf\teffective_date jurisdiction party term\tEX-10.23 5 dex1023.htm COVENANT NOT TO COMPETE AND NON-DISCLOSURE AGREEMENT\\\\nExhibit 10.23\\\\nCOVENANT NOT TO COMPETE\\\\nAND NON-DISCLOSURE AGREEMENT\\\\nPARTIES:\\\\nEric Dean Sprunk (“EMPLOYEE”)\\\\nand\\\\nNIKE, Inc., divisions, subsidiaries\\\\nand affiliates. (“NIKE”):\\\\nRECITALS:\\\\nA. This Covenant Not to Compete and Non-Disclosure Agreement is executed upon initial employment or upon the EMPLOYEEs\\\\nadvancement with NIKE and is a condition of such employment or advancement.\\\\nB. Over the course of EMPLOYEEs employment with NIKE, EMPLOYEE will be or has been exposed to and/or is in a position to\\\\ndevelop confidential information peculiar to NIKEs business and not generally known to the public as defined below (“Protected Information”). It is\\\\nanticipated that EMPLOYEE will continue to be exposed to Protected Information of greater sensitivity as EMPLOYEE advances in the company.\\\\nC. The nature of NIKEs business is highly competitive and disclosure of any Protected Information would result in severe damage to NIKE\\\\nand be difficult to measure.\\\\nD. NIKE makes use of its Protected Information throughout the world. Protected Information of NIKE can be used to NIKEs detriment\\\\nanywhere in the world.\\\\nAGREEMENT:\\\\nIn consideration of the foregoing, and the terms and conditions set forth below, the parties agree as follows:\\\\n1. Covenant Not to Compete.\\\\n(a) Competition Restriction. During EMPLOYEEs employment by NIKE, under the terms of any employment contract or\\\\notherwise, and for one year thereafter, (the “Restriction Period”), EMPLOYEE will not directly or indirectly, own, manage, control, or participate in\\\\nthe ownership,\\\\nmanagement or control of, or be employed by, consult for, or be connected in any manner with, any business engaged anywhere in the world in the\\\\nathletic footwear, athletic apparel or sports equipment and accessories business, or any other business which directly competes with NIKE or any of\\\\nits parent, subsidiaries or affiliated corporations ( “Competitor”). By way of illustration only, examples of NIKE competitors include, but are not\\\\nlimited to: Adidas, FILA, Reebok, Puma, Champion, Oakley, DKNY, Converse, Asics, Saucony, New Balance, Ralph Lauren/Polo Sport, B.U.M,\\\\nFUBU, The Gap, Tommy Hilfiger, Umbro, Northface, Venator (Foot lockers), Sports Authority, Columbia Sportswear, Wilson, Mizuno, Callaway\\\\nGolf and Titleist. This provision is subject to NIKEs option to waive all or any portion of the Restriction Period as more specifically provided\\\\nbelow.\\\\n(b) Extension of Time. In the event EMPLOYEE breaches this covenant not to compete, the Restriction Period shall automatically\\\\ntoll from the date of the first breach, and all subsequent breaches, until the resolution of the breach through private settlement, judicial or other\\\\naction, including all appeals. The Restriction Period shall continue upon the effective date of any such settlement judicial or other resolution. NIKE\\\\nshall not be obligated to pay EMPLOYEE the additional compensation described in paragraph 1(d) below during any period of time in which this\\\\nAgreement is tolled due to EMPLOYEEs breach. In the event EMPLOYEE receives such additional compensation after any such breach,\\\\nEMPLOYEE must immediately reimburse NIKE in the amount of all such compensation upon the receipt of a written request by NIKE.\\\\n(c) Waiver of Non-Compete. NIKE has the option, in its sole discretion, to elect to waive all or a portion of the Restriction Period or\\\\nto limit the definition of Competitor, by giving EMPLOYEE seven (7) days prior notice of such election. In the event all or a portion of the\\\\nRestriction Period is waived, NIKE shall not be obligated to pay EMPLOYEE for any period of time as to which the covenant not to compete has\\\\nbeen waived.\\\\n(d) Additional Consideration. As additional consideration for the covenant not to compete described above, should NIKE terminate\\\\nEMPLOYEEs employment and elect to enforce the non-competition agreement, NIKE shall pay EMPLOYEE a monthly payment equal to one\\\\nhundred percent (100%) of EMPLOYEEs last monthly base salary while the Restriction Period is in effect. If EMPLOYEE voluntarily terminates\\\\nemployment and NIKE elects to enforce the non-competition agreement, NIKE shall pay EMPLOYEE a monthly severance payment equal to fifty\\\\npercent (50%) of EMPLOYEEs last monthly base salary while the Restriction Period is in effect. The first payment to EMPLOYEE of additional\\\\nconsideration shall follow on the next applicable pay period after the election to enforce the non-competition agreement, payable in accordance with\\\\nNIKEs payroll practices.\\\\n2. Subsequent Employer. EMPLOYEE agrees to notify NIKE at the time of separation of employment of the name of EMPLOYEEs new\\\\nemployer, if known. EMPLOYEE further agrees to disclose to NIKE the name of any subsequent employer during the Restriction Period, wherever\\\\nlocated and regardless of whether such employer is a competitor of NIKE.\\\\n3. Non-Disclosure Agreement.\\\\n(a) Protected Information Defined. “Protected Information” shall mean all proprietary information, in whatever form and format, of\\\\nNIKE and all information provided to NIKE by third parties which NIKE is obligated to keep confidential. EMPLOYEE agrees that any and all\\\\ninformation to which EMPLOYEE has access concerning NIKE projects and internal NIKE information is Protected Information, whether in verbal\\\\nform, machine-readable form, written or other tangible form, and whether designated as confidential or unmarked. Without limiting the foregoing,\\\\nProtected Information includes information relating to NIKEs research and development activities, its intellectual property and the filing or\\\\npendency of patent applications, confidential techniques, methods, styles, designs, design concepts and ideas, customer and vendor lists, contract\\\\nfactory lists, pricing information, manufacturing plans, business and marketing plans, sales information, methods of operation, manufacturing\\\\nprocesses and methods, products, and personnel information.\\\\n(b) Excluded Information. Notwithstanding paragraph 3(a), Protected Information excludes any information that is or becomes part\\\\nof the public domain through no act or failure to act on the part of EMPLOYEE. Specifically, employees shall be permitted to retain as part of their\\\\npersonal portfolio copies of the employees original artwork and designs, provided the artwork or designs have become part of the public domain. In\\\\nany dispute between the parties with respect to this exclusion, the burden of proof will be on EMPLOYEE and such proof will be by clear and\\\\nconvincing evidence.\\\\n(c) Employees Obligations. During the period of employment by NIKE and for a period of two (2) years thereafter, EMPLOYEE\\\\nwill hold in confidence and protect all Protected Information and will not, at any time, directly or indirectly, use any Protected Information for any\\\\npurpose outside the scope of EMPLOYEEs employment with NIKE or disclose any Protected Information to any third person or organization\\\\nwithout the prior written consent of NIKE. Specifically, but not by way of limitation, EMPLOYEE will not ever copy, transmit, reproduce,\\\\nsummarize, quote, publish or make any commercial or other use whatsoever of any Protected Information without the prior written consent of NIKE.\\\\nEMPLOYEE will also take reasonable security precautions and such other actions as may be necessary to insure that there is no use or disclosure,\\\\nintentional or inadvertent, of Protected Information in violation of this Agreement.\\\\n4. Return of Protected Information. At the request of NIKE at anytime, and in any event, upon termination of employment, EMPLOYEE\\\\nshall immediately return to NIKE all confidential documents, including tapes, notebooks, drawings, computer disks and other similar repositories of\\\\nor containing Protected Information, and all copies thereof, then in EMPLOYEEs possession or under EMPLOYEEs control.\\\\n5. Unauthorized Use. During the period of employment with NIKE and thereafter, EMPLOYEE will notify NIKE immediately if\\\\nEMPLOYEE becomes aware of the unauthorized possession, use or knowledge of any Protected Information by any person employed or not\\\\nemployed by NIKE at the time of such possession, use or knowledge. EMPLOYEE will cooperate with NIKE in the investigation of any such\\\\nincident and will cooperate with NIKE in any litigation with third parties deemed necessary by NIKE to protect the Protected Information. NIKE\\\\nshall provide reasonable reimbursement to EMPLOYEE for each hour so engaged and that amount shall not be diminished by operation of any\\\\npayment under Paragraph 1(d) of this Agreement.\\\\n6. Non-Recruitment. During the term of this Agreement and for a period of one (1) year thereafter, EMPLOYEE will not directly or\\\\nindirectly , solicit, divert or hire away (or attempt to solicit, divert or hire away) to or for himself or any other company or business organization, any\\\\nNIKE employee, whether or not such employee is a full-time employee or temporary employee and whether or not such employment is pursuant to a\\\\nwritten agreement or is at will.\\\\n7. Accounting of Profits. EMPLOYEE agrees that, if EMPLOYEE should violate any term of this Agreement, NIKE shall be entitled to an\\\\naccounting and repayment of all profits, compensation, commissions, remuneration or benefits which EMPLOYEE directly or indirectly has realized\\\\nand/or may realize as a result of or in connection with any such violation (including the return of any additional consideration paid by NIKE\\\\npursuant to Paragraph 1(d) above). Such remedy shall be in addition to and not in limitation of any injunctive relief or other rights or remedies to\\\\nwhich NIKE may be entitled at law or in equity.\\\\n8. General Provisions.\\\\n(a) Survival. This Agreement shall continue in effect after the termination of EMPLOYEEs employment, regardless of the reason for\\\\ntermination.\\\\n(b) Waiver. No waiver, amendment, modification or cancellation of any term or condition of this Agreement will be effective unless\\\\nexecuted in writing by both parties. No written waiver will excuse the performance of any act other than the act or acts specifically referred to\\\\ntherein.\\\\n(c) Severability. Each provision herein will be treated as a separate and independent clause and unenforceability of any one clause\\\\nwill in no way impact the enforceability of any other clause. Should any of the provisions in this Agreement be found to be unreasonable or invalid\\\\nby a court of competent jurisdiction, such provision will be enforceable to the maximum extent enforceable by the law of that jurisdiction.\\\\n(d) Applicable Law/Jurisdiction. This Agreement, and EMPLOYEEs employment hereunder, shall be construed according to the\\\\nlaws of the State of Oregon. EMPLOYEE further hereby submits to the jurisdiction of, and agrees that exclusive jurisdiction over and venue for any\\\\naction or proceeding arising out of or relating to this Agreement shall lie in the state and federal courts located in Oregon.\\\\nEMPLOYEE\\\\nNIKE, Inc.\\\\n/s/ Eric Dean Sprunk\\\\nBy\\\\n/s/ Jeffrey M. Cava\\\\nDATE 04/18/01\\\\nName:\\\\nTitle:\\\\nJeffrey M. Cava\\\\nVice President, Global Human Resources\tEX-10.23 5 dex1023.htm COVENANT NOT TO COMPETE AND NON-DISCLOSURE AGREEMENT\\\\nExhibit 10.23\\\\nCOVENANT NOT TO COMPETE\\\\nAND NON-DISCLOSURE AGREEMENT\\\\nPARTIES:\\\\nEric Dean Sprunk (“EMPLOYEE”)\\\\n \\\\nand\\\\nNIKE, Inc., divisions, subsidiaries\\\\nand affiliates. (“NIKE”):\\\\nRECITALS:\\\\nA.  This Covenant Not to Compete and Non-Disclosure Agreement is executed upon initial employment or upon the EMPLOYEEs\\\\nadvancement with NIKE and is a condition of such employment or advancement.\\\\nB.  Over the course of EMPLOYEEs employment with NIKE, EMPLOYEE will be or has been exposed to and/or is in a position to\\\\ndevelop confidential information peculiar to NIKEs business and not generally known to the public as defined below (“Protected Information”). It is\\\\nanticipated that EMPLOYEE will continue to be exposed to Protected Information of greater sensitivity as EMPLOYEE advances in the company.\\\\nC.  The nature of NIKEs business is highly competitive and disclosure of any Protected Information would result in severe damage to NIKE\\\\nand be difficult to measure.\\\\nD. NIKE makes use of its Protected Information throughout the world. Protected Information of NIKE can be used to NIKEs detriment\\\\nanywhere in the world.\\\\nAGREEMENT:\\\\nIn consideration of the foregoing, and the terms and conditions set forth below, the parties agree as follows:\\\\n1.  Covenant Not to Compete.\\\\n(@) Competition Restriction. During EMPLOYEEs employment by NIKE, under the terms of any employment contract or\\\\notherwise, and for one year thereafter, (the “Restriction Period”), EMPLOYEE will not directly or indirectly, own, manage, control, or participate in\\\\nthe ownership,\\\\nmanagement or control of, or be employed by, consult for, or be connected in any manner with, any business engaged anywhere in the world in the\\\\nathletic footwear, athletic apparel or sports equipment and accessories business, or any other business which directly competes with NIKE or any of\\\\nits parent, subsidiaries or affiliated corporations ( “Competitor”). By way of illustration only, examples of NIKE competitors include, but are not\\\\nlimited to: Adidas, FILA, Reebok, Puma, Champion, Oakley, DKNY, Converse, Asics, Saucony, New Balance, Ralph Lauren/Polo Sport, B.U.M,\\\\nFUBU, The Gap, Tommy Hilfiger, Umbro, Northface, Venator (Foot lockers), Sports Authority, Columbia Sportswear, Wilson, Mizuno, Callaway\\\\nGolf and Titleist. This provision is subject to NIKEs option to waive all or any portion of the Restriction Period as more specifically provided\\\\nbelow.\\\\n(b) Extension of Time. In the event EMPLOYEE breaches this covenant not to compete, the Restriction Period shall automatically\\\\ntoll from the date of the first breach, and all subsequent breaches, until the resolution of the breach through private settlement, judicial or other\\\\naction, including all appeals. The Restriction Period shall continue upon the effective date of any such settlement judicial or other resolution. NIKE\\\\nshall not be obligated to pay EMPLOYEE the additional compensation described in paragraph 1(d) below during any period of time in which this\\\\nAgreement is tolled due to EMPLOYEEs breach. In the event EMPLOYEE receives such additional compensation after any such breach,\\\\nEMPLOYEE must immediately reimburse NIKE in the amount of all such compensation upon the receipt of a written request by NIKE.\\\\n(c) Waiver of Non-Compete. NIKE has the option, in its sole discretion, to elect to waive all or a portion of the Restriction Period or\\\\nto limit the definition of Competitor, by giving EMPLOYEE seven (7) days prior notice of such election. In the event all or a portion of the\\\\nRestriction Period is waived, NIKE shall not be obligated to pay EMPLOYEE for any period of time as to which the covenant not to compete has\\\\nbeen waived.\\\\n(d) Additional Consideration. As additional consideration for the covenant not to compete described above, should NIKE terminate\\\\nEMPLOYEEs employment and elect to enforce the non-competition agreement, NIKE shall pay EMPLOYEE a monthly payment equal to one\\\\nhundred percent (100%) of EMPLOY EEs last monthly base salary while the Restriction Period is in effect. If EMPLOYEE voluntarily terminates\\\\nemployment and NIKE elects to enforce the non-competition agreement, NIKE shall pay EMPLOYEE a monthly severance payment equal to fifty\\\\npercent (50%) of EMPLOYEEs last monthly base salary while the Restriction Period is in effect. The first payment to EMPLOYEE of additional\\\\nconsideration shall follow on the next applicable pay period after the election to enforce the non-competition agreement, payable in accordance with\\\\nNIKE\'s payroll practices.\\\\n2. Subsequent Employer. EMPLOYEE agrees to notify NIKE at the time of separation of employment of the name of EMPLOYEEs new\\\\nemployer, if known. EMPLOYEE further agrees to disclose to NIKE the name of any subsequent employer during the Restriction Period, wherever\\\\nlocated and regardless of whether such employer is a competitor of NIKE.\\\\n3.  Non-Disclosure Agreement.\\\\n(@) Protected Information Defined. “Protected Information” shall mean all proprietary information, in whatever form and format, of\\\\nNIKE and all information provided to NIKE by third parties which NIKE is obligated to keep confidential. EMPLOYEE agrees that any and all\\\\ninformation to which EMPLOYEE has access concerning NIKE projects and internal NIKE information is Protected Information, whether in verbal\\\\nform, machine-readable form, written or other tangible form, and whether designated as confidential or unmarked. Without limiting the foregoing,\\\\nProtected Information includes information relating to NIKEs research and development activities, its intellectual property and the filing or\\\\npendency of patent applications, confidential techniques, methods, styles, designs, design concepts and ideas, customer and vendor lists, contract\\\\nfactory lists, pricing information, manufacturing plans, business and marketing plans, sales information, methods of operation, manufacturing\\\\nprocesses and methods, products, and personnel information.\\\\n(b)  Excluded Information. Notwithstanding paragraph 3(a), Protected Information excludes any information that is or becomes part\\\\nof the public domain through no act or failure to act on the part of EMPLOYEE. Specifically, employees shall be permitted to retain as part of their\\\\npersonal portfolio copies of the employees original artwork and designs, provided the artwork or designs have become part of the public domain. In\\\\nany dispute between the parties with respect to this exclusion, the burden of proof will be on EMPLOYEE and such proof will be by clear and\\\\nconvincing evidence.\\\\n(c) Employees Obligations. During the period of employment by NIKE and for a period of two (2) years thereafter, EMPLOYEE\\\\nwill hold in confidence and protect all Protected Information and will not, at any time, directly or indirectly, use any Protected Information for any\\\\npurpose outside the scope of EMPLOYEEs employment with NIKE or disclose any Protected Information to any third person or organization\\\\nwithout the prior written consent of NIKE. Specifically, but not by way of limitation, EMPLOYEE will not ever copy, transmit, reproduce,\\\\nsummarize, quote, publish or make any commercial or other use whatsoever of any Protected Information without the prior written consent of NIKE.\\\\nEMPLOYEE will also take reasonable security precautions and such other actions as may be necessary to insure that there is no use or disclosure,\\\\nintentional or inadvertent, of Protected Information in violation of this Agreement.\\\\n \\\\n4.  Return of Protected Information. At the request of NIKE at anytime, and in any event, upon termination of employment, EMPLOYEE\\\\nshall immediately return to NIKE all confidential documents, including tapes, notebooks, drawings, computer disks and other similar repositories of\\\\nor containing Protected Information, and all copies thereof, then in EMPLOYEEs possession or under EMPLOYEEs control.\\\\n5. Unauthorized Use. During the period of employment with NIKE and thereafter, EMPLOYEE will notify NIKE immediately if\\\\nEMPLOYEE becomes aware of the unauthorized possession, use or knowledge of any Protected Information by any person employed or not\\\\nemployed by NIKE at the time of such possession, use or knowledge. EMPLOYEE will cooperate with NIKE in the investigation of any such\\\\nincident and will cooperate with NIKE in any litigation with third parties deemed necessary by NIKE to protect the Protected Information. NIKE\\\\nshall provide reasonable reimbursement to EMPLOYEE for each hour so engaged and that amount shall not be diminished by operation of any\\\\npayment under Paragraph 1(d) of this Agreement.\\\\n6. Non-Recruitment. During the term of this Agreement and for a period of one (1) year thereafter, EMPLOYEE will not directly or\\\\nindirectly , solicit, divert or hire away (or attempt to solicit, divert or hire away) to or for himself or any other company or business organization, any\\\\nNIKE employee, whether or not such employee is a full-time employee or temporary employee and whether or not such employment is pursuant to a\\\\nwritten agreement or is at will.\\\\n7.  Accounting of Profits. EMPLOYEE agrees that, if EMPLOYEE should violate any term of this Agreement, NIKE shall be entitled to an\\\\naccounting and repayment of all profits, compensation, commissions, remuneration or benefits which EMPLOYEE directly or indirectly has realized\\\\nand/or may realize as a result of or in connection with any such violation (including the return of any additional consideration paid by NIKE\\\\npursuant to Paragraph 1(d) above). Such remedy shall be in addition to and not in limitation of any injunctive relief or other rights or remedies to\\\\nwhich NIKE may be entitled at law or in equity.\\\\n8.  General Provisions.\\\\n(@) Survival. This Agreement shall continue in effect after the termination of EMPLOYEEs employment, regardless of the reason for\\\\ntermination.\\\\n(b) Waiver. No waiver, amendment, modification or cancellation of any term or condition of this Agreement will be effective unless\\\\nexecuted in writing by both parties. No written waiver will excuse the performance of any act other than the act or acts specifically referred to\\\\ntherein.\\\\n(c)  Severability. Each provision herein will be treated as a separate and independent clause and unenforceability of any one clause\\\\nwill in no way impact the enforceability of any other clause. Should any of the provisions in this Agreement be found to be unreasonable or invalid\\\\nby a court of competent jurisdiction, such provision will be enforceable to the maximum extent enforceable by the law of that jurisdiction.\\\\n(d) Applicable Law/Jurisdiction. This Agreement, and EMPLOYEEs employment hereunder, shall be construed according to the\\\\nlaws of the State of Oregon. EMPLOYEE further hereby submits to the jurisdiction of, and agrees that exclusive jurisdiction over and venue for any\\\\naction or proceeding arising out of or relating to this Agreement shall lie in the state and federal courts located in Oregon.\\\\nEMPLOYEE NIKE, Inc.\\\\n/s/ Eric Dean Sprunk By /s/ Jeffrey M. Cava\\\\nName: Jeffrey M. Cava\\\\nDATE 04/18/01 Title:  Vice President, Global Human Resources\tEX-10.23 5 dlex1023.htm COVENANT NOT TO COMPETE AND NON-DISCLOSURE AGREEMENT\\\\nExhibit 10.23\\\\nCOVENANT NOT TO COMPETE\\\\nAND NON-DISCLOSURE AGREEMENT\\\\nPARTIES:\\\\nEric Dean Sprunk ("EMPLOYEE")\\\\nand\\\\nNIKE, Inc., divisions, subsidiaries\\\\nand affiliates. ("NIKE"):\\\\nRECITALS:\\\\nA. This Covenant Not to Compete and Non-Disclosure Agreement is executed upon initial employment or upon the EMPLOYEE\'S\\\\nadvancement with NIKE and is a condition of such employment or advancement.\\\\nB. Over the course of EMPLOYEE\'S employment with NIKE, EMPLOYEE will be or has been exposed to and/or is in a position to\\\\ndevelop confidential information peculiar to NIKE\'s business and not generally known to the public as defined below ("Protected Information").\\\\nIt\\\\nis\\\\nanticipated that EMPLOYEE will continue to be exposed to Protected Information of greater sensitivity as EMPLOYEE advances in the company.\\\\nC.\\\\nThe nature of NIKE\'s business is highly competitive and disclosure of any Protected Information would result in severe damage to NIKE\\\\nand be difficult to measure.\\\\nD. NIKE makes use of its Protected Information throughout the world. Protected Information of NIKE can be used to NIKE\'s detriment\\\\nanywhere in the world.\\\\nAGREEMENT:\\\\nIn consideration of the foregoing, and the terms and conditions set forth below, the parties agree as follows:\\\\n1. Covenant Not to Compete.\\\\n(a) Competition Restriction During EMPLOYEE\'S employment by NIKE, under the terms of any employment contract\\\\nor\\\\notherwise, and for one year thereafter, (the "Restriction Period"), EMPLOYEE will not directly or indirectly, own, manage, control, or participate in\\\\nthe ownership,\\\\nmanagement or control of, or be employed by, consult for, or be connected in any manner with, any business engaged anywhere in the world in the\\\\nathletic footwear, athletic apparel or sports equipment and accessories business, or any other business which directly competes with NIKE or any\\\\nof\\\\nits parent, subsidiaries or affiliated corporations C "Competitor"). By way of illustration only, examples of NIKE competitors include, but are not\\\\nlimited to: Adidas, FILA, Reebok, Puma, Champion, Oakley, DKNY, Converse, Asics, Saucony, New Balance, Ralph Lauren/Polo Sport, B.U.M,\\\\nFUBU, The Gap, Tommy Hilfiger, Umbro, Northface, Venator (Foot lockers), Sports Authority, Columbia Sportswear, Wilson, Mizuno, Callaway\\\\nGolf and Titleist. This provision is subject to NIKE\'s option to waive all or any portion of the Restriction Period as more specifically provided\\\\nbelow.\\\\n(b)\\\\nExtension of Time. In the event EMPLOYEE breaches this covenant not to compete, the Restriction Period shall automatically\\\\ntoll from the date of the first breach, and all subsequent breaches, until the resolution of the breach through private settlement, judicial or other\\\\naction, including all appeals. The Restriction Period shall continue upon the effective date of any such settlement judicial or other resolution. NIKE\\\\nshall not be obligated to pay EMPLOYEE the additional compensation described in paragraph 1(d) below during any period of time in which this\\\\nAgreement is tolled due to EMPLOYEE\'S breach. In the event EMPLOYEE receives such additional compensation after any such breach,\\\\nEMPLOYEE must immediately reimburse NIKE in the amount of all such compensation upon the receipt of a written request by NIKE.\\\\n(c) Waiver of Non-Compete. NIKE has the option, in its sole discretion, to elect to waive all or a portion of the Restriction Period or\\\\nto limit the definition of Competitor, by giving EMPLOYEE seven (7) days prior notice of such election. In the event all or a portion of the\\\\nRestriction Period is waived, NIKE shall not be obligated to pay EMPLOYEE for any period of time as to which the covenant not to compete has\\\\nbeen waived.\\\\n(d) Additional Consideration. As additional consideration for the covenant not to compete described above, should NIKE terminate\\\\nEMPLOYEE\'s employment and elect to enforce the non-competition agreement, NIKE shall pay EMPLOYEE a monthly payment equal to one\\\\nhundred percent (100%) of EMPLOYEE\'S last monthly base salary while the Restriction Period is in effect. If EMPLOYEE voluntarily terminates\\\\nemployment and NIKE elects to enforce the non-competition agreement, NIKE shall pay EMPLOYEE a monthly severance payment equal to fifty\\\\npercent (50%) of EMPLOYEE\'S last monthly base salary while the Restriction Period is in effect. The first payment to EMPLOYEE of additional\\\\nconsideration shall follow on the next applicable pay period after the election to enforce the non-competition agreement, payable in accordance with\\\\nNIKE\'s payroll practices.\\\\n2. Subsequent Employer. EMPLOYEE agrees to notify NIKE at the time of separation of employment of the name of EMPLOYEE\'S new\\\\nemployer, if known. EMPLOYEE further agrees to disclose to NIKE the name of any subsequent employer during the Restriction Period, wherever\\\\nlocated and regardless of whether such employer is a competitor of NIKE.\\\\n3.\\\\nNon-Disclosure Agreement.\\\\n(a) Protected Information Defined. "Protected Information" shall mean all proprietary information, in whatever form and format, of\\\\nNIKE and all information provided to NIKE by third parties which NIKE is obligated to keep confidential. EMPLOYEE agrees that any and all\\\\ninformation to which EMPLOYEE has access concerning NIKE projects and internal NIKE information is Protected Information, whether in verbal\\\\nform, machine-readable form, written or other tangible form, and whether designated as confidential or unmarked. Without limiting the foregoing,\\\\nProtected Information includes information relating to NIKE\'s research and development activities, its intellectual property and the filing or\\\\npendency of patent applications, confidential techniques, methods, styles, designs, design concepts and ideas, customer and vendor lists, contract\\\\nfactory lists, pricing information, manufacturing plans, business and marketing plans, sales information methods of operation, manufacturing\\\\nprocesses and methods, products, and personnel information.\\\\n(b) Excluded Information. Notwithstanding paragraph 3(a), Protected Information excludes any information that is or becomes part\\\\nof the public domain through no act or failure to act on the part of EMPLOYEE. Specifically, employees shall be permitted to retain as part of their\\\\npersonal portfolio copies of the employees\' original artwork and designs, provided the artwork or designs have become part of the public domain. In\\\\nany dispute between the parties with respect to this exclusion, the burden of proof will be on EMPLOYEE and such proof will be by clear and\\\\nconvincing evidence.\\\\n(c)\\\\nEmployee\'s Obligations. During the period of employment by NIKE and for a period of two (2) years thereafter, EMPLOYEE\\\\nwill hold in confidence and protect all Protected Information and will not, at any time, directly or indirectly, use any Protected Information for any\\\\npurpose outside the scope of EMPLOYEE\'S employment with NIKE or disclose any Protected Information to any third person or organization\\\\nwithout the prior written consent of NIKE. Specifically, but not by way of limitation, EMPLOYEE will not ever copy, transmit, reproduce,\\\\nsummarize, quote, publish or make any commercial or other use whatsoever of any Protected Information without the prior written consent of NIKE.\\\\nEMPLOYEE will also take reasonable security precautions and such other actions as may be necessary to insure that there is no use or disclosure,\\\\nintentional or inadvertent, of Protected Information in violation of this Agreement.\\\\n4. Return of Protected Information. At the request of NIKE at anytime, and in any event, upon termination of employment, EMPLOYEE\\\\nshall immediately return to NIKE all confidential documents, including tapes, notebooks, drawings, computer disks and other similar repositories of\\\\nor\\\\ncontaining Protected Information, and all copies thereof, then in EMPLOYEE\'S possession or under EMPLOYEE\'S control.\\\\n5.\\\\nUnauthorized Use. During the period of employment with NIKE and thereafter, EMPLOYEE wil notify NIKE immediately\\\\nif\\\\nEMPLOYEE becomes aware of the unauthorized possession, use or knowledge of any Protected Information by any person employed or not\\\\nemployed by NIKE at the time of such possession, use or knowledge. EMPLOYEE will cooperate with NIKE in the investigation of any such\\\\nincident and will cooperate with NIKE in any litigation with third parties deemed necessary by NIKE to protect the Protected Information. NIKE\\\\nshall provide reasonable reimbursement to EMPLOYEE for each hour so engaged and that amount shall not be diminished by operation of any\\\\npayment under Paragraph 1(d) of this Agreement.\\\\n6. Non-Recruitment. During the term of this Agreement and for a period of one (1) year thereafter, EMPLOYEE will not directly or\\\\nindirectly solicit, divert or hire away (or attempt to solicit, divert or hire away) to or for himself or any other company or business organization, any\\\\nNIKE employee, whether or not such employee is a full-time employee or temporary employee and whether or not such employment is pursuant to a\\\\nwritten agreement or is at will.\\\\n7. Accounting of Profits. EMPLOYEE agrees that, if EMPLOYEE should violate any term of this Agreement, NIKE shall be entitled to an\\\\naccounting and repayment of all profits, compensation, commissions, remuneration or benefits which EMPLOYEE directly or indirectly has realized\\\\nand/or may realize as a result of or in connection with any such violation (including the return of any additional consideration paid by NIKE\\\\npursuant to Paragraph 1(d) above). Such remedy shall be in addition to and not in limitation of any injunctive relief or other rights or remedies\\\\nto\\\\nwhich NIKE may be entitled at law or in equity.\\\\n8.\\\\nGeneral Provisions.\\\\n(a)\\\\nSurvival. This Agreement shall continue in effect after the termination of EMPLOYEE\'S employment, regardless of the reason for\\\\ntermination.\\\\n(b) Waiver. No waiver, amendment, modification or cancellation of any term or condition of this Agreement will be effective unless\\\\nexecuted in writing by both parties. No written waiver will excuse the performance of any act other than the act or acts specifically referred to\\\\ntherein.\\\\n(c) Severability.. Each provision herein will be treated as a separate and independent clause and unenforceability of any one clause\\\\nwill in no way impact the enforceability of any other clause. Should any of the provisions in this Agreement be found to be unreasonable or invalid\\\\nby a court of competent jurisdiction, such provision will be enforceable to the maximum extent enforceable by the law of that jurisdiction.\\\\n(d) Applicable Law/Jurisdiction. This Agreement, and EMPLOYEE\'S employment hereunder, shall be construed according to the\\\\nlaws\\\\nof\\\\nthe State of Oregon. EMPLOYEE further hereby submits to the jurisdiction of, and agrees that exclusive jurisdiction over and venue for any\\\\naction or proceeding arising out of or relating to this Agreement shall lie in the state and federal courts located in Oregon.\\\\nEMPLOYEE\\\\nNIKE, Inc.\\\\n/s/ Eric Dean Sprunk\\\\nBy\\\\n/s/ Jeffrey M. Cava\\\\nName: Jeffrey M. Cava\\\\nDATE 04/18/01\\\\nTitle: Vice President, Global Human Resources\tEX-10.23 5 dex1023.htm COVENANT NOT TO COMPETE AND NON-DISCLOSURE AGREEMENT\\\\nExhibit 10.23\\\\nCOVENANT NOT TO COMPETE\\\\nAND NON-DISCLOSURE AGREEMENT\\\\nPARTIES:\\\\nEric Dean Sprunk (“EMPLOYEE”)\\\\nand\\\\nNIKE, Inc., divisions, subsidiaries\\\\nand affiliates. (“NIKE”):\\\\nRECITALS:\\\\nA. This Covenant Not to Compete and Non-Disclosure Agreement is executed upon initial employment or upon the EMPLOYEEs\\\\nadvancement with NIKE and is a condition of such employment or advancement.\\\\nB. Over the course of EMPLOYEEs employment with NIKE, EMPLOYEE will be or has been exposed to and/or is in a position to\\\\ndevelop confidential information peculiar to NIKEs business and not generally known to the public as defined below (“Protected Information”). It is\\\\nanticipated that EMPLOYEE will continue to be exposed to Protected Information of greater sensitivity as EMPLOYEE advances in the company.\\\\nC. The nature of NIKEs business is highly competitive and disclosure of any Protected Information would result in severe damage to NIKE\\\\nand be difficult to measure.\\\\nD. NIKE makes use of its Protected Information throughout the world. Protected Information of NIKE can be used to NIKEs detriment\\\\nanywhere in the world.\\\\nAGREEMENT:\\\\nIn consideration of the foregoing, and the terms and conditions set forth below, the parties agree as follows:\\\\n1. Covenant Not to Compete.\\\\n(a) Competition Restriction. During EMPLOYEEs employment by NIKE, under the terms of any employment contract or\\\\notherwise, and for one year thereafter, (the “Restriction Period”), EMPLOYEE will not directly or indirectly, own, manage, control, or participate in\\\\nthe ownership,\\\\nmanagement or control of, or be employed by, consult for, or be connected in any manner with, any business engaged anywhere in the world in the\\\\nathletic footwear, athletic apparel or sports equipment and accessories business, or any other business which directly competes with NIKE or any of\\\\nits parent, subsidiaries or affiliated corporations ( “Competitor”). By way of illustration only, examples of NIKE competitors include, but are not\\\\nlimited to: Adidas, FILA, Reebok, Puma, Champion, Oakley, DKNY, Converse, Asics, Saucony, New Balance, Ralph Lauren/Polo Sport, B.U.M,\\\\nFUBU, The Gap, Tommy Hilfiger, Umbro, Northface, Venator (Foot lockers), Sports Authority, Columbia Sportswear, Wilson, Mizuno, Callaway\\\\nGolf and Titleist. This provision is subject to NIKEs option to waive all or any portion of the Restriction Period as more specifically provided\\\\nbelow.\\\\n(b) Extension of Time. In the event EMPLOYEE breaches this covenant not to compete, the Restriction Period shall automatically\\\\ntoll from the date of the first breach, and all subsequent breaches, until the resolution of the breach through private settlement, judicial or other\\\\naction, including all appeals. The Restriction Period shall continue upon the effective date of any such settlement judicial or other resolution. NIKE\\\\nshall not be obligated to pay EMPLOYEE the additional compensation described in paragraph 1(d) below during any period of time in which this\\\\nAgreement is tolled due to EMPLOYEEs breach. In the event EMPLOYEE receives such additional compensation after any such breach,\\\\nEMPLOYEE must immediately reimburse NIKE in the amount of all such compensation upon the receipt of a written request by NIKE.\\\\n(c) Waiver of Non-Compete. NIKE has the option, in its sole discretion, to elect to waive all or a portion of the Restriction Period or\\\\nto limit the definition of Competitor, by giving EMPLOYEE seven (7) days prior notice of such election. In the event all or a portion of the\\\\nRestriction Period is waived, NIKE shall not be obligated to pay EMPLOYEE for any period of time as to which the covenant not to compete has\\\\nbeen waived.\\\\n(d) Additional Consideration. As additional consideration for the covenant not to compete described above, should NIKE terminate\\\\nEMPLOYEEs employment and elect to enforce the non-competition agreement, NIKE shall pay EMPLOYEE a monthly payment equal to one\\\\nhundred percent (100%) of EMPLOYEEs last monthly base salary while the Restriction Period is in effect. If EMPLOYEE voluntarily terminates\\\\nemployment and NIKE elects to enforce the non-competition agreement, NIKE shall pay EMPLOYEE a monthly severance payment equal to fifty\\\\npercent (50%) of EMPLOYEEs last monthly base salary while the Restriction Period is in effect. The first payment to EMPLOYEE of additional\\\\nconsideration shall follow on the next applicable pay period after the election to enforce the non-competition agreement, payable in accordance with\\\\nNIKEs payroll practices.\\\\n2. Subsequent Employer. EMPLOYEE agrees to notify NIKE at the time of separation of employment of the name of EMPLOYEEs new\\\\nemployer, if known. EMPLOYEE further agrees to disclose to NIKE the name of any subsequent employer during the Restriction Period, wherever\\\\nlocated and regardless of whether such employer is a competitor of NIKE.\\\\n3. Non-Disclosure Agreement.\\\\n(a) Protected Information Defined. “Protected Information” shall mean all proprietary information, in whatever form and format, of\\\\nNIKE and all information provided to NIKE by third parties which NIKE is obligated to keep confidential. EMPLOYEE agrees that any and all\\\\ninformation to which EMPLOYEE has access concerning NIKE projects and internal NIKE information is Protected Information, whether in verbal\\\\nform, machine-readable form, written or other tangible form, and whether designated as confidential or unmarked. Without limiting the foregoing,\\\\nProtected Information includes information relating to NIKEs research and development activities, its intellectual property and the filing or\\\\npendency of patent applications, confidential techniques, methods, styles, designs, design concepts and ideas, customer and vendor lists, contract\\\\nfactory lists, pricing information, manufacturing plans, business and marketing plans, sales information, methods of operation, manufacturing\\\\nprocesses and methods, products, and personnel information.\\\\n(b) Excluded Information. Notwithstanding paragraph 3(a), Protected Information excludes any information that is or becomes part\\\\nof the public domain through no act or failure to act on the part of EMPLOYEE. Specifically, employees shall be permitted to retain as part of their\\\\npersonal portfolio copies of the employees original artwork and designs, provided the artwork or designs have become part of the public domain. In\\\\nany dispute between the parties with respect to this exclusion, the burden of proof will be on EMPLOYEE and such proof will be by clear and\\\\nconvincing evidence.\\\\n(c) Employees Obligations. During the period of employment by NIKE and for a period of two (2) years thereafter, EMPLOYEE\\\\nwill hold in confidence and protect all Protected Information and will not, at any time, directly or indirectly, use any Protected Information for any\\\\npurpose outside the scope of EMPLOYEEs employment with NIKE or disclose any Protected Information to any third person or organization\\\\nwithout the prior written consent of NIKE. Specifically, but not by way of limitation, EMPLOYEE will not ever copy, transmit, reproduce,\\\\nsummarize, quote, publish or make any commercial or other use whatsoever of any Protected Information without the prior written consent of NIKE.\\\\nEMPLOYEE will also take reasonable security precautions and such other actions as may be necessary to insure that there is no use or disclosure,\\\\nintentional or inadvertent, of Protected Information in violation of this Agreement.\\\\n4. Return of Protected Information. At the request of NIKE at anytime, and in any event, upon termination of employment, EMPLOYEE\\\\nshall immediately return to NIKE all confidential documents, including tapes, notebooks, drawings, computer disks and other similar repositories of\\\\nor containing Protected Information, and all copies thereof, then in EMPLOYEEs possession or under EMPLOYEEs control.\\\\n5. Unauthorized Use. During the period of employment with NIKE and thereafter, EMPLOYEE will notify NIKE immediately if\\\\nEMPLOYEE becomes aware of the unauthorized possession, use or knowledge of any Protected Information by any person employed or not\\\\nemployed by NIKE at the time of such possession, use or knowledge. EMPLOYEE will cooperate with NIKE in the investigation of any such\\\\nincident and will cooperate with NIKE in any litigation with third parties deemed necessary by NIKE to protect the Protected Information. NIKE\\\\nshall provide reasonable reimbursement to EMPLOYEE for each hour so engaged and that amount shall not be diminished by operation of any\\\\npayment under Paragraph 1(d) of this Agreement.\\\\n6. Non-Recruitment. During the term of this Agreement and for a period of one (1) year thereafter, EMPLOYEE will not directly or\\\\nindirectly , solicit, divert or hire away (or attempt to solicit, divert or hire away) to or for himself or any other company or business organization, any\\\\nNIKE employee, whether or not such employee is a full-time employee or temporary employee and whether or not such employment is pursuant to a\\\\nwritten agreement or is at will.\\\\n7. Accounting of Profits. EMPLOYEE agrees that, if EMPLOYEE should violate any term of this Agreement, NIKE shall be entitled to an\\\\naccounting and repayment of all profits, compensation, commissions, remuneration or benefits which EMPLOYEE directly or indirectly has realized\\\\nand/or may realize as a result of or in connection with any such violation (including the return of any additional consideration paid by NIKE\\\\npursuant to Paragraph 1(d) above). Such remedy shall be in addition to and not in limitation of any injunctive relief or other rights or remedies to\\\\nwhich NIKE may be entitled at law or in equity.\\\\n8. General Provisions.\\\\n(a) Survival. This Agreement shall continue in effect after the termination of EMPLOYEEs employment, regardless of the reason for\\\\ntermination.\\\\n(b) Waiver. No waiver, amendment, modification or cancellation of any term or condition of this Agreement will be effective unless\\\\nexecuted in writing by both parties. No written waiver will excuse the performance of any act other than the act or acts specifically referred to\\\\ntherein.\\\\n(c) Severability. Each provision herein will be treated as a separate and independent clause and unenforceability of any one clause\\\\nwill in no way impact the enforceability of any other clause. Should any of the provisions in this Agreement be found to be unreasonable or invalid\\\\nby a court of competent jurisdiction, such provision will be enforceable to the maximum extent enforceable by the law of that jurisdiction.\\\\n(d) Applicable Law/Jurisdiction. This Agreement, and EMPLOYEEs employment hereunder, shall be construed according to the\\\\nlaws of the State of Oregon. EMPLOYEE further hereby submits to the jurisdiction of, and agrees that exclusive jurisdiction over and venue for any\\\\naction or proceeding arising out of or relating to this Agreement shall lie in the state and federal courts located in Oregon.\\\\nEMPLOYEE\\\\nNIKE, Inc.\\\\n/s/ Eric Dean Sprunk\\\\nBy\\\\n/s/ Jeffrey M. Cava\\\\nDATE 04/18/01\\\\nName:\\\\nTitle:\\\\nJeffrey M. Cava\\\\nVice President, Global Human Resources'
model.device
device(type='cuda', index=0)
input = train_in[0]

# You can also use "translate English to French" and "translate English to Romanian"
input_ids = tokenizer(input, return_tensors="pt").input_ids[:,:512].to('cuda')  # Batch size 1

outputs = model.generate(input_ids)

decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(decoded)
Token indices sequence length is longer than the specified maximum sequence length for this model (11717 > 512). Running this sequence through the model will result in indexing errors
and non-disclosure Agreement.n(a) Competition Restriction.
input_ids = tokenizer('translate English to German: The house is wonderful.', return_tensors='pt').input_ids.to('cuda')
labels = tokenizer('Das Haus ist wunderbar.', return_tensors='pt').input_ids.to('cuda')
# the forward function automatically creates the correct decoder_input_ids
loss = model(input_ids=input_ids, labels=labels).loss
loss
tensor(0.2543, device='cuda:0', grad_fn=<NllLossBackward>)
from transformers import AdamW

optimizer = AdamW(model.parameters(), lr=5e-5)
model.train()
T5ForConditionalGeneration(
  (shared): Embedding(32128, 512)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 512)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
              (relative_attention_bias): Embedding(32, 8)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (1): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (2): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (3): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (4): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (5): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
    )
    (final_layer_norm): T5LayerNorm()
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (decoder): T5Stack(
    (embed_tokens): Embedding(32128, 512)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
              (relative_attention_bias): Embedding(32, 8)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerCrossAttention(
            (EncDecAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (2): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (1): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerCrossAttention(
            (EncDecAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (2): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (2): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerCrossAttention(
            (EncDecAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (2): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (3): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerCrossAttention(
            (EncDecAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (2): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (4): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerCrossAttention(
            (EncDecAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (2): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (5): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerCrossAttention(
            (EncDecAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (2): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
    )
    (final_layer_norm): T5LayerNorm()
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (lm_head): Linear(in_features=512, out_features=32128, bias=False)
)
for line_in, line_exp in zip(train_in, train_exp):
    input_ids = tokenizer(line_in, return_tensors='pt').input_ids[:,:512].to('cuda')
    labels = tokenizer(line_exp, return_tensors='pt').input_ids.to('cuda')
    # the forward function automatically creates the correct decoder_input_ids
    loss = model(input_ids=input_ids, labels=labels).loss
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    print(loss.item())
13.828309059143066
11.455500602722168
12.591864585876465
11.697681427001953
9.457676887512207
10.367218017578125
7.407022953033447
8.830719947814941
10.031709671020508
6.843804359436035
9.030264854431152
8.841073989868164
9.884418487548828
8.1090087890625
5.866975784301758
8.52608585357666
5.992447853088379
7.147337436676025
6.601171970367432
8.028266906738281
6.183577060699463
5.559406280517578
6.755654335021973
5.919793128967285
5.167813301086426
5.351068496704102
5.7952165603637695
6.730508804321289
5.469816207885742
4.3772478103637695
4.868475914001465
5.726585865020752
3.966099739074707
5.961289405822754
5.155783653259277
4.634646892547607
4.736303806304932
4.152906894683838
4.373996257781982
4.358081340789795
4.958395957946777
3.8232321739196777
4.142550945281982
2.666247606277466
4.235062122344971
4.233397483825684
3.8168039321899414
3.1151959896087646
1.9562475681304932
3.445767641067505
4.4933247566223145
3.4922804832458496
2.250882625579834
2.4218058586120605
2.260007858276367
2.5280778408050537
2.7701780796051025
3.8142340183258057
3.0554733276367188
1.8644142150878906
3.2941484451293945
2.286688804626465
3.366548538208008
1.0562607049942017
1.8493285179138184
2.8790605068206787
4.513855934143066
2.9482157230377197
2.0251893997192383
1.5018310546875
1.8084921836853027
1.7678613662719727
1.0362716913223267
1.6407744884490967
1.2443599700927734
2.2683565616607666
1.4040197134017944
3.9230520725250244
0.8626512289047241
0.7241716384887695
0.8391153812408447
3.9508471488952637
1.4111053943634033
1.333533525466919
0.38448166847229004
2.132805109024048
1.7784374952316284
2.150501251220703
2.3192851543426514
1.4407600164413452
1.4160407781600952
0.5990514159202576
1.2548216581344604
1.1115673780441284
1.957241177558899
1.2597360610961914
1.0772262811660767
1.1419639587402344
0.30694711208343506
2.0387325286865234
2.2052383422851562
4.552682399749756
1.1284838914871216
1.628050446510315
2.827632188796997
1.256350040435791
1.5137629508972168
0.17800401151180267
1.1130807399749756
1.4471491575241089
1.4046872854232788
1.5159196853637695
1.5683913230895996
0.9050359725952148
0.2453073114156723
0.829986572265625
1.342026948928833
0.697879433631897
0.8360342383384705
3.773777723312378
1.0000628232955933
1.163111925125122
0.636287271976471
0.6960057616233826
1.2984236478805542
1.4369347095489502
1.2260591983795166
1.1619309186935425
1.2387232780456543
0.4039798974990845
1.261201024055481
2.0990383625030518
0.6930045485496521
1.9684548377990723
0.41637909412384033
1.5580865144729614
0.935876727104187
0.5318026542663574
1.207798719406128
0.5434905290603638
0.10893465578556061
0.8033742904663086
0.25061750411987305
0.9297510981559753
1.1515181064605713
2.179370641708374
0.912304699420929
0.9962441325187683
1.3243765830993652
1.5690778493881226
1.0356395244598389
1.3098541498184204
0.2543454170227051
0.7984715104103088
0.10885466635227203
1.5388046503067017
1.3934229612350464
1.0405352115631104
1.744563341140747
0.9149143695831299
0.4559175670146942
0.7720739841461182
1.6526525020599365
0.5373530387878418
0.5430313348770142
0.5173842310905457
0.7213934659957886
0.6729367971420288
0.8275019526481628
1.3139863014221191
1.1809828281402588
1.423504114151001
0.4956137537956238
1.2472567558288574
0.3318641185760498
0.3209134638309479
0.09695105999708176
0.6424573063850403
1.224516749382019
0.13458161056041718
1.1670427322387695
1.1272934675216675
1.0477215051651
0.7291663289070129
0.6467929482460022
0.924201488494873
1.455331563949585
0.6269064545631409
0.7512378692626953
0.5907666087150574
0.8808064460754395
0.5326775312423706
0.4754364490509033
0.5422216653823853
0.9144468307495117
0.6809101700782776
0.1790292114019394
0.7104746103286743
0.41490861773490906
1.4695433378219604
1.381641149520874
0.34390121698379517
0.5615295171737671
0.4991306960582733
1.755591630935669
0.02876635640859604
0.06847237050533295
1.4051387310028076
0.3321903944015503
0.5550190210342407
0.8398134708404541
0.6281668543815613
0.7955247759819031
0.4672299921512604
1.0951168537139893
0.6541656255722046
0.8140543699264526
0.043958500027656555
0.04899679496884346
0.8996919989585876
0.275490403175354
0.2666592597961426
0.09318255633115768
0.3718479871749878
1.495982050895691
0.0595063678920269
1.7708230018615723
0.7092909216880798
0.9086990356445312
0.010129873640835285
0.7636302709579468
1.0733331441879272
0.060608845204114914
1.3388985395431519
0.4673462510108948
0.21733486652374268
0.5459968447685242
0.050972938537597656
0.4641537666320801
0.7601963877677917
0.44411876797676086
0.09443528205156326
1.623687982559204
0.5162641406059265
0.6031121611595154
0.8987085223197937
0.3393983840942383
2.8573479652404785
0.8427947759628296
1.0764878988265991
0.4185052812099457
0.6308793425559998
0.01906685344874859
0.141354501247406
model.eval()
T5ForConditionalGeneration(
  (shared): Embedding(32128, 512)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 512)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
              (relative_attention_bias): Embedding(32, 8)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (1): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (2): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (3): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (4): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (5): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
    )
    (final_layer_norm): T5LayerNorm()
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (decoder): T5Stack(
    (embed_tokens): Embedding(32128, 512)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
              (relative_attention_bias): Embedding(32, 8)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerCrossAttention(
            (EncDecAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (2): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (1): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerCrossAttention(
            (EncDecAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (2): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (2): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerCrossAttention(
            (EncDecAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (2): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (3): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerCrossAttention(
            (EncDecAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (2): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (4): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerCrossAttention(
            (EncDecAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (2): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (5): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerCrossAttention(
            (EncDecAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (2): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
    )
    (final_layer_norm): T5LayerNorm()
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (lm_head): Linear(in_features=512, out_features=32128, bias=False)
)
input = dev_in[0]

input_ids = tokenizer(input, return_tensors="pt").input_ids[:,:512].to('cuda')  # Batch size 1

outputs = model.generate(input_ids)

decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(decoded)
jurisdiction: Colorado
dev_exp[0]
'jurisdiction: New_York'
input = dev_in[2]

input_ids = tokenizer(input, return_tensors="pt").input_ids[:,:512].to('cuda')  # Batch size 1

outputs = model.generate(input_ids)

decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(decoded)
jurisdiction: Delaware
dev_exp[2]
'jurisdiction: Delaware'

pytanie:

  • co można poprawić w istniejącym rozwiązaniu?