diff --git a/README.md b/README.md new file mode 100644 index 0000000..a392c51 --- /dev/null +++ b/README.md @@ -0,0 +1,14 @@ +# Transformer Encoder - RoBERTa +## Modyfikacje +1. ???? + +# Transformer Decoder - GPT-2 +## Modyfikacje +1. ???? + + +# Transformer Encoder-Decoder - T5 +## Modyfikacje +1. Zamrożenie pierwszych 20 warstw + +# Transformer w trybie few-shot/zero-shot learning - ????? \ No newline at end of file diff --git a/run_translation_freezing.py b/run_translation_freezing.py index b2e7e2b..bdeb26d 100644 --- a/run_translation_freezing.py +++ b/run_translation_freezing.py @@ -260,8 +260,12 @@ class DataTrainingArguments: def freeze_model_weights(model: torch.nn.Module) -> None: count = 0 for param in model.parameters(): - logger.info(count) - param.requires_grad = False + count += 1 + if count < 20: + logger.info(f'Freezing layer {count}') + param.requires_grad = False + else: + logger.info(f'Ignoring layer {count}') def main():