JARVIS/nlg_train.ipynb

2 lines
24 KiB
Plaintext
Raw Permalink Normal View History

2024-06-04 10:46:12 +02:00
{"cells":[{"cell_type":"code","execution_count":1,"metadata":{"execution":{"iopub.execute_input":"2024-06-03T11:18:55.033345Z","iopub.status.busy":"2024-06-03T11:18:55.032642Z","iopub.status.idle":"2024-06-03T11:19:13.773777Z","shell.execute_reply":"2024-06-03T11:19:13.772989Z","shell.execute_reply.started":"2024-06-03T11:18:55.033313Z"},"trusted":true},"outputs":[{"name":"stderr","output_type":"stream","text":["2024-06-03 11:19:02.256736: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n","2024-06-03 11:19:02.256864: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n","2024-06-03 11:19:02.368948: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n"]}],"source":["from transformers import (\n"," AutoModelForSeq2SeqLM,\n"," AutoTokenizer,\n"," DataCollatorForSeq2Seq,\n"," Seq2SeqTrainer,\n"," Seq2SeqTrainingArguments,\n"," pipeline,\n",")\n","\n","from datasets import load_dataset\n","\n","model_name = \"google/umt5-small\""]},{"cell_type":"code","execution_count":2,"metadata":{"execution":{"iopub.execute_input":"2024-06-03T11:19:13.775904Z","iopub.status.busy":"2024-06-03T11:19:13.775364Z","iopub.status.idle":"2024-06-03T11:19:14.356839Z","shell.execute_reply":"2024-06-03T11:19:14.355976Z","shell.execute_reply.started":"2024-06-03T11:19:13.775878Z"},"trusted":true},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"fdd37b65a44d42b2931bdc0db8229fa7","version_major":2,"version_minor":0},"text/plain":["Generating train split: 0 examples [00:00, ? examples/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"text/plain":["DatasetDict({\n"," train: Dataset({\n"," features: ['mr', 'ref'],\n"," num_rows: 18564\n"," })\n"," test: Dataset({\n"," features: ['mr', 'ref'],\n"," num_rows: 2063\n"," })\n","})"]},"execution_count":2,"metadata":{},"output_type":"execute_result"}],"source":["dataset = load_dataset('csv', data_files='/kaggle/input/ngl-data/nlg_data.csv', split='train').train_test_split(test_size=0.1)\n","dataset"]},{"cell_type":"code","execution_count":3,"metadata":{"execution":{"iopub.execute_input":"2024-06-03T11:19:14.358052Z","iopub.status.busy":"2024-06-03T11:19:14.357803Z","iopub.status.idle":"2024-06-03T11:19:24.614600Z","shell.execute_reply":"2024-06-03T11:19:24.613696Z","shell.execute_reply.started":"2024-06-03T11:19:14.358030Z"},"trusted":true},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"9418d01ad2124c74bcd05cc4d41b9b1d","version_major":2,"version_minor":0},"text/plain":["tokenizer_config.json: 0%| | 0.00/6.84k [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"935f78ee0b3148929c9c0e022d590930","version_major":2,"version_minor":0},"text/plain":["spiece.model: 0%| | 0.00/4.55M [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"7eed04b1f7464253aa0be410233d4be2","version_major":2,"version_minor":0},"text/plain":["tokenizer.json: 0%| | 0.00/16.9M [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"2d97cb1699b64507a746e6ca305e5dc9","version_major":2,"version_minor":0},"text/plain":["special_tokens_map.json: 0%| | 0.00/6.62k [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"c01c194e06124c7098a0488f52459b65","version_major":2,"version_minor":0},"text/plain":["Map: 0%| | 0/18564 [00:00<?, ? examples/s]"]},"metadata":{},"output_type":"display_data"}