diff --git a/donut-eval.py b/donut-eval.py index bdf7c93..acc102b 100644 --- a/donut-eval.py +++ b/donut-eval.py @@ -15,12 +15,17 @@ from sconf import Config def main(config): + image_size = [1920, 2560] config_vision = VisionEncoderDecoderConfig.from_pretrained(config.pretrained_model_path) - config_vision.encoder.image_size = [1920, 2560] # (height, width) + config_vision.encoder.image_size = image_size # (height, width) config_vision.decoder.max_length = 768 processor = DonutProcessor.from_pretrained(config.pretrained_processor_path) model = VisionEncoderDecoderModel.from_pretrained(config.pretrained_model_path, config=config_vision) + + processor.image_processor.size = image_size[::-1] # should be (width, height) + processor.image_processor.do_align_long_axis = False + dataset = load_dataset(config.validation_dataset_path, split=config.validation_dataset_split) device = "cuda" if torch.cuda.is_available() else "cpu" model.eval()