import os from pathlib import Path from typing import List from loguru import logger import pandas as pd from PIL import Image import torch import typer try: from cloud_dataset import CloudDataset from cloud_model import CloudModel except ImportError: from benchmark.cloud_dataset import CloudDataset from benchmark.cloud_model import CloudModel ROOT_DIRECTORY = Path("/codeexecution") PREDICTIONS_DIRECTORY = ROOT_DIRECTORY / "predictions" ASSETS_DIRECTORY = Path("./submission/assets") DATA_DIRECTORY = ROOT_DIRECTORY / "data" INPUT_IMAGES_DIRECTORY = DATA_DIRECTORY / "test_features" # Set the pytorch cache directory and include cached models in your submission.zip os.environ["TORCH_HOME"] = str(ASSETS_DIRECTORY / "assets/torch") def get_metadata(features_dir: os.PathLike, bands: List[str]): """ Given a folder of feature data, return a dataframe where the index is the chip id and there is a column for the path to each band's TIF image. Args: features_dir (os.PathLike): path to the directory of feature data, which should have a folder for each chip bands (list[str]): list of bands provided for each chip """ chip_metadata = pd.DataFrame(index=[f"{band}_path" for band in bands]) chip_ids = ( pth.name for pth in features_dir.iterdir() if not pth.name.startswith(".") ) for chip_id in chip_ids: chip_bands = [features_dir / chip_id / f"{band}.tif" for band in bands] chip_metadata[chip_id] = chip_bands return chip_metadata.transpose().reset_index().rename(columns={"index": "chip_id"}) def make_predictions( model: CloudModel, x_paths: pd.DataFrame, bands: List[str], predictions_dir: os.PathLike, ): """Predicts cloud cover and saves results to the predictions directory. Args: model (CloudModel): an instantiated CloudModel based on pl.LightningModule x_paths (pd.DataFrame): a dataframe with a row for each chip. There must be a column for chip_id, and a column with the path to the TIF for each of bands provided bands (list[str]): list of bands provided for each chip predictions_dir (os.PathLike): Destination directory to save the predicted TIF masks """ test_dataset = CloudDataset(x_paths=x_paths, bands=bands) test_dataloader = torch.utils.data.DataLoader( test_dataset, batch_size=model.batch_size, num_workers=model.num_workers, shuffle=False, pin_memory=True, ) for batch_index, batch in enumerate(test_dataloader): logger.debug(f"Predicting batch {batch_index} of {len(test_dataloader)}") x = batch["chip"] preds = model.forward(x) preds = torch.softmax(preds, dim=1)[:, 1] preds = (preds > 0.5).detach().numpy().astype("uint8") for chip_id, pred in zip(batch["chip_id"], preds): chip_pred_path = predictions_dir / f"{chip_id}.tif" chip_pred_im = Image.fromarray(pred) chip_pred_im.save(chip_pred_path) def main( model_weights_path: Path = ASSETS_DIRECTORY / "cloud_model.pt", test_features_dir: Path = DATA_DIRECTORY / "test_features", predictions_dir: Path = PREDICTIONS_DIRECTORY, bands: List[str] = ["B02", "B03", "B04", "B08"], fast_dev_run: bool = False, ): """ Generate predictions for the chips in test_features_dir using the model saved at model_weights_path. Predictions are saved in predictions_dir. The default paths to all three files are based on the structure of the code execution runtime. Args: model_weights_path (os.PathLike): Path to the weights of a trained CloudModel. test_features_dir (os.PathLike, optional): Path to the features for the test data. Defaults to 'data/test_features' in the same directory as main.py predictions_dir (os.PathLike, optional): Destination directory to save the predicted TIF masks Defaults to 'predictions' in the same directory as main.py bands (List[str], optional): List of bands provided for each chip """ if not test_features_dir.exists(): raise ValueError( f"The directory for test feature images must exist and {test_features_dir} does not exist" ) predictions_dir.mkdir(exist_ok=True, parents=True) logger.info("Loading model") model = CloudModel(bands=bands, hparams={"weights": None}) try: model.load_state_dict(torch.load(model_weights_path)) except RuntimeError: model.load_state_dict(torch.load(model_weights_path, map_location=torch.device('cpu'))) logger.info("Loading test metadata") test_metadata = get_metadata(test_features_dir, bands=bands) train_metadata = get_metadata(Path('data/train_features'), bands=bands) if fast_dev_run: test_metadata = test_metadata.head() logger.info(f"Found {len(test_metadata)} chips") logger.info("Generating predictions in batches") make_predictions(model, test_metadata, bands, predictions_dir) make_predictions(model, train_metadata, bands, Path('data/predictions')) logger.info(f"""Saved {len(list(predictions_dir.glob("*.tif")))} predictions""") if __name__ == "__main__": typer.run(main)