prepared for run by prof Filip Gralinski

This commit is contained in:
Patryk Bartkowiak 2025-01-07 11:45:55 +00:00
parent f0679ab861
commit eed2096400
3 changed files with 10 additions and 8 deletions

View File

@ -20,9 +20,11 @@ pdm install
```
### 4. Run training code
```bash
pdm run_training
pdm train
```
or
## Required secrets
```
pdm run src/train_codebert_mlm.py
export HF_TOKEN=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
export WANDB_API_KEY=313671f10f2a389b3171b32da8d4abdad91aaa7c
```

View File

@ -1,9 +1,9 @@
{
"extra_embeddings": false,
"run_name": "original-continued",
"extra_embeddings": true,
"run_name": "tree-continued",
"data_dir": "./data/codeparrot-clean-parsed-starencoder-no-comments/",
"output_dir": "./outputs/no-comments-starencoder-original-2",
"checkpoint": "./outputs/no-comments-starencoder-original/1_epoch_ckpt/",
"output_dir": "./outputs/no-comments-starencoder-tree-2",
"checkpoint": null,
"seed": 420,
"mlm_probability": 0.15,
"batch_size": 32,

View File

@ -50,7 +50,7 @@ def main():
set_seed(config['seed'])
# Initialize W&B
wandb.init(project='codeparrot-starencoder-no-comments', config=config, name=config['run_name'])
wandb.init(project='gralinski', config=config, name=config['run_name'])
# Upload the training files to W&B
wandb.save(__file__)