From 8a408ffbfca5f0419a71291f8825887aa7677145 Mon Sep 17 00:00:00 2001 From: patrycjalazna Date: Tue, 15 Feb 2022 16:42:28 +0100 Subject: [PATCH 1/2] added benchmark solution --- .../__pycache__/cloud_dataset.cpython-39.pyc | Bin 0 -> 2432 bytes benchmark/__pycache__/losses.cpython-39.pyc | Bin 0 -> 790 bytes benchmark/cloud_dataset.py | 68 ++++++ benchmark/cloud_model.py | 197 ++++++++++++++++++ benchmark/losses.py | 23 ++ benchmark/main.py | 135 ++++++++++++ 6 files changed, 423 insertions(+) create mode 100644 benchmark/__pycache__/cloud_dataset.cpython-39.pyc create mode 100644 benchmark/__pycache__/losses.cpython-39.pyc create mode 100644 benchmark/cloud_dataset.py create mode 100644 benchmark/cloud_model.py create mode 100644 benchmark/losses.py create mode 100644 benchmark/main.py diff --git a/benchmark/__pycache__/cloud_dataset.cpython-39.pyc b/benchmark/__pycache__/cloud_dataset.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..32cef0a7b6c6fb0bf3b3097e0566e58c28a1607b GIT binary patch literal 2432 zcmZuz&2Jnv6d%vW&g@4*_)3X{kU@a5QrPfuD-|kg)T*coQYoq!RE?amvpbvFnW^nf zveE3RxfZx^qDToh{to^NUpeK@i5}oRZ?ee-yc*lje)Hb%ea^C_rIrKZ^FP`~LQM!{)Fx zY(Mkj4kzC@-QW!5Ion;=L|<1rD+`tz|3Rj7isx5H1Bl!3s0T0!t37r(0qt$!4+GH< z;hG~N+}pLe23lYE+~>h@5Vv@PhsRFb79AeFAiT+2$9}xTUBJ*WojZ9s=37iNCG^qH z4+Z0jW(CbgtS{6C)shuzx0EAAtL#wZbf4v8ykG^VO2~boC~Hv|JWH(=azfLx&@3ym zqEFL7R?&>Z3sxf4qEMxry>DO3*$zk@F}0`Y`oTa*LD@db;5ZlEo@LqY`X)R~Dy9d@ z1UsNvnQ(GWw=NN;0r=-);NFGj4m|2@m}*ixiBo$pliG*btzDQs8w{5vz-+alE%yth zS)ntg1=RyVuR4jQIa8|FnlpbX`|9SL!q?BS()Eh>(6x6Z8;R~s$|whF7b$v>=>Z(b z@_>SSsbJ~AI;%$?StpFfO4A*126!_cj|$6$-b^ySv7ik#Vl?KQRy4v!k5OG(6CT~a zd#R}0rPhrLa_k9R&mjfBS6V&+z__QtjKXHuS&`;rE;!~0o}xhuQqJSDX!`~&&r|&W zy|CZ`Ys}1Ob=B$xH5W!-H*=OwXsq=@UzTHVjh18ZM2e{n?%Wr0LDppk&8%NQJ9oil zA%I6&RppZl*X|0Y$B?(^3%V})l#Tl%0VRO-pt^Hin8!XcnZUx75u~NW>h1={S0dk) zn6>sqhPXq9-yPY3#*iw?u^M#M81Rj8S3Hkv}utx zExuOWTEN%-55B(^@PU4iw`y^;=Q!GRyN-D4mQzO%mG-s?Y_mMW*qLxQFPZ-A)3bkI zfRBB4m?>BT+o0tQ;OTm2=yZwv@^w6LO?cjWjE@_Mk?+1@;|vimQ=7CVl1ynKv(orw zB?{w9_@0;s98M|$YXyhgxJ6|GXt3#?O>on+1z44k-fO54Wt)~Q5sSOqnI+Pzc(thb(&yvO-q8jt6_7du{}DLK4+K+|w0EO|O4NHuK@v#j_ELgc>Z?ZzmE|ORgS0*em ziGNwJuGK1Th~I=%e(AbClz2oMFFQ9P&+>GaOi+wR)kMAvjHXe6E18l#AS8Y=O`uDe y4om~W2Oo0_Wri#m>nvBZ@P~*RdW#i#1&T#xOFp_av;GTAD23sT00{n8yng{bqaDj1_qC{dOkKT;^=FQlqvssM592`d+OSHf| z;N%-x1RQ@rS@-}u=!*H1j}`$4VO_#9*Wh}*1MVJg13ZW zpCjo~NOO?87PiyPNO98gDMTP>m8EhzjF&FI_%gmtZ->z)j6-UA^x-!=fFrE!vLhEX z!p&-GD!~gQBwUmWtb%mwPNZ6ujLHJ8&Bc#_(vFy}&LS83$zAAYUCi&OEO{|BRQPf( z8IR$jp^ 0.5) * 1 # convert to int + + # Log batch IOU + batch_iou = intersection_over_union(preds, y) + self.log( + "iou", batch_iou, on_step=True, on_epoch=True, prog_bar=True, logger=True + ) + return batch_iou + + def train_dataloader(self): + # DataLoader class for training + return torch.utils.data.DataLoader( + self.train_dataset, + batch_size=self.batch_size, + num_workers=self.num_workers, + shuffle=True, + pin_memory=True, + ) + + def val_dataloader(self): + # DataLoader class for validation + return torch.utils.data.DataLoader( + self.val_dataset, + batch_size=self.batch_size, + num_workers=0, + shuffle=False, + pin_memory=True, + ) + + def configure_optimizers(self): + opt = torch.optim.Adam(self.model.parameters(), lr=self.learning_rate) + sch = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=10) + return [opt], [sch] + + ## Convenience Methods ## + + def _prepare_model(self): + # Instantiate U-Net model + unet_model = smp.Unet( + encoder_name=self.backbone, + encoder_weights=self.weights, + in_channels=4, + classes=2, + ) + if self.gpu: + unet_model.cuda() + + return unet_model \ No newline at end of file diff --git a/benchmark/losses.py b/benchmark/losses.py new file mode 100644 index 0000000..72ff917 --- /dev/null +++ b/benchmark/losses.py @@ -0,0 +1,23 @@ +import numpy as np + + +def intersection_over_union(pred, true): + """ + Calculates intersection and union for a batch of images. + + Args: + pred (torch.Tensor): a tensor of predictions + true (torc.Tensor): a tensor of labels + + Returns: + intersection (int): total intersection of pixels + union (int): total union of pixels + """ + valid_pixel_mask = true.ne(255) # valid pixel mask + true = true.masked_select(valid_pixel_mask).to("cpu") + pred = pred.masked_select(valid_pixel_mask).to("cpu") + + # Intersection and union totals + intersection = np.logical_and(true, pred) + union = np.logical_or(true, pred) + return intersection.sum() / union.sum() \ No newline at end of file diff --git a/benchmark/main.py b/benchmark/main.py new file mode 100644 index 0000000..6f240bc --- /dev/null +++ b/benchmark/main.py @@ -0,0 +1,135 @@ +import os +from pathlib import Path +from typing import List + +from loguru import logger +import pandas as pd +from PIL import Image +import torch +import typer + +try: + from cloud_dataset import CloudDataset + from cloud_model import CloudModel +except ImportError: + from benchmark.cloud_dataset import CloudDataset + from benchmark.cloud_model import CloudModel + + +ROOT_DIRECTORY = Path("/codeexecution") +PREDICTIONS_DIRECTORY = ROOT_DIRECTORY / "predictions" +ASSETS_DIRECTORY = Path("./submission/assets") +DATA_DIRECTORY = ROOT_DIRECTORY / "data" +INPUT_IMAGES_DIRECTORY = DATA_DIRECTORY / "test_features" + +# Set the pytorch cache directory and include cached models in your submission.zip +os.environ["TORCH_HOME"] = str(ASSETS_DIRECTORY / "assets/torch") + + +def get_metadata(features_dir: os.PathLike, bands: List[str]): + """ + Given a folder of feature data, return a dataframe where the index is the chip id + and there is a column for the path to each band's TIF image. + Args: + features_dir (os.PathLike): path to the directory of feature data, which should have + a folder for each chip + bands (list[str]): list of bands provided for each chip + """ + chip_metadata = pd.DataFrame(index=[f"{band}_path" for band in bands]) + chip_ids = ( + pth.name for pth in features_dir.iterdir() if not pth.name.startswith(".") + ) + + for chip_id in chip_ids: + chip_bands = [features_dir / chip_id / f"{band}.tif" for band in bands] + chip_metadata[chip_id] = chip_bands + + return chip_metadata.transpose().reset_index().rename(columns={"index": "chip_id"}) + + +def make_predictions( + model: CloudModel, + x_paths: pd.DataFrame, + bands: List[str], + predictions_dir: os.PathLike, +): + """Predicts cloud cover and saves results to the predictions directory. + Args: + model (CloudModel): an instantiated CloudModel based on pl.LightningModule + x_paths (pd.DataFrame): a dataframe with a row for each chip. There must be a column for chip_id, + and a column with the path to the TIF for each of bands provided + bands (list[str]): list of bands provided for each chip + predictions_dir (os.PathLike): Destination directory to save the predicted TIF masks + """ + test_dataset = CloudDataset(x_paths=x_paths, bands=bands) + test_dataloader = torch.utils.data.DataLoader( + test_dataset, + batch_size=model.batch_size, + num_workers=model.num_workers, + shuffle=False, + pin_memory=True, + ) + + for batch_index, batch in enumerate(test_dataloader): + logger.debug(f"Predicting batch {batch_index} of {len(test_dataloader)}") + x = batch["chip"] + preds = model.forward(x) + preds = torch.softmax(preds, dim=1)[:, 1] + preds = (preds > 0.5).detach().numpy().astype("uint8") + for chip_id, pred in zip(batch["chip_id"], preds): + chip_pred_path = predictions_dir / f"{chip_id}.tif" + chip_pred_im = Image.fromarray(pred) + chip_pred_im.save(chip_pred_path) + + +def main( + model_weights_path: Path = ASSETS_DIRECTORY / "cloud_model.pt", + test_features_dir: Path = DATA_DIRECTORY / "test_features", + predictions_dir: Path = PREDICTIONS_DIRECTORY, + bands: List[str] = ["B02", "B03", "B04", "B08"], + fast_dev_run: bool = False, +): + """ + Generate predictions for the chips in test_features_dir using the model saved at + model_weights_path. + Predictions are saved in predictions_dir. The default paths to all three files are based on + the structure of the code execution runtime. + Args: + model_weights_path (os.PathLike): Path to the weights of a trained CloudModel. + test_features_dir (os.PathLike, optional): Path to the features for the test data. Defaults + to 'data/test_features' in the same directory as main.py + predictions_dir (os.PathLike, optional): Destination directory to save the predicted TIF masks + Defaults to 'predictions' in the same directory as main.py + bands (List[str], optional): List of bands provided for each chip + """ + if not test_features_dir.exists(): + raise ValueError( + f"The directory for test feature images must exist and {test_features_dir} does not exist" + ) + predictions_dir.mkdir(exist_ok=True, parents=True) + + logger.info("Loading model") + model = CloudModel(bands=bands, hparams={"weights": None}) + try: + model.load_state_dict(torch.load(model_weights_path)) + except RuntimeError: + model.load_state_dict(torch.load(model_weights_path, map_location=torch.device('cpu'))) + + logger.info("Loading test metadata") + test_metadata = get_metadata(test_features_dir, bands=bands) + train_metadata = get_metadata(Path('data/train_features'), bands=bands) + + if fast_dev_run: + test_metadata = test_metadata.head() + logger.info(f"Found {len(test_metadata)} chips") + + logger.info("Generating predictions in batches") + make_predictions(model, test_metadata, bands, predictions_dir) + + make_predictions(model, train_metadata, bands, Path('data/predictions')) + + logger.info(f"""Saved {len(list(predictions_dir.glob("*.tif")))} predictions""") + + +if __name__ == "__main__": + typer.run(main) \ No newline at end of file From b7fff3245e4d04ae67e9ba8b0b4bf11b675cf8ed Mon Sep 17 00:00:00 2001 From: patrycjalazna Date: Tue, 15 Feb 2022 16:48:18 +0100 Subject: [PATCH 2/2] corrected .gitignore --- .gitignore | 2 +- .../__pycache__/cloud_dataset.cpython-39.pyc | Bin 2432 -> 0 bytes benchmark/__pycache__/losses.cpython-39.pyc | Bin 790 -> 0 bytes 3 files changed, 1 insertion(+), 1 deletion(-) delete mode 100644 benchmark/__pycache__/cloud_dataset.cpython-39.pyc delete mode 100644 benchmark/__pycache__/losses.cpython-39.pyc diff --git a/.gitignore b/.gitignore index 2d5a4f7..92ae505 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,4 @@ venv/ **/images* **/data/train_features* **/data/train_labels* -**/__pychache__* +**/__pycache__* diff --git a/benchmark/__pycache__/cloud_dataset.cpython-39.pyc b/benchmark/__pycache__/cloud_dataset.cpython-39.pyc deleted file mode 100644 index 32cef0a7b6c6fb0bf3b3097e0566e58c28a1607b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2432 zcmZuz&2Jnv6d%vW&g@4*_)3X{kU@a5QrPfuD-|kg)T*coQYoq!RE?amvpbvFnW^nf zveE3RxfZx^qDToh{to^NUpeK@i5}oRZ?ee-yc*lje)Hb%ea^C_rIrKZ^FP`~LQM!{)Fx zY(Mkj4kzC@-QW!5Ion;=L|<1rD+`tz|3Rj7isx5H1Bl!3s0T0!t37r(0qt$!4+GH< z;hG~N+}pLe23lYE+~>h@5Vv@PhsRFb79AeFAiT+2$9}xTUBJ*WojZ9s=37iNCG^qH z4+Z0jW(CbgtS{6C)shuzx0EAAtL#wZbf4v8ykG^VO2~boC~Hv|JWH(=azfLx&@3ym zqEFL7R?&>Z3sxf4qEMxry>DO3*$zk@F}0`Y`oTa*LD@db;5ZlEo@LqY`X)R~Dy9d@ z1UsNvnQ(GWw=NN;0r=-);NFGj4m|2@m}*ixiBo$pliG*btzDQs8w{5vz-+alE%yth zS)ntg1=RyVuR4jQIa8|FnlpbX`|9SL!q?BS()Eh>(6x6Z8;R~s$|whF7b$v>=>Z(b z@_>SSsbJ~AI;%$?StpFfO4A*126!_cj|$6$-b^ySv7ik#Vl?KQRy4v!k5OG(6CT~a zd#R}0rPhrLa_k9R&mjfBS6V&+z__QtjKXHuS&`;rE;!~0o}xhuQqJSDX!`~&&r|&W zy|CZ`Ys}1Ob=B$xH5W!-H*=OwXsq=@UzTHVjh18ZM2e{n?%Wr0LDppk&8%NQJ9oil zA%I6&RppZl*X|0Y$B?(^3%V})l#Tl%0VRO-pt^Hin8!XcnZUx75u~NW>h1={S0dk) zn6>sqhPXq9-yPY3#*iw?u^M#M81Rj8S3Hkv}utx zExuOWTEN%-55B(^@PU4iw`y^;=Q!GRyN-D4mQzO%mG-s?Y_mMW*qLxQFPZ-A)3bkI zfRBB4m?>BT+o0tQ;OTm2=yZwv@^w6LO?cjWjE@_Mk?+1@;|vimQ=7CVl1ynKv(orw zB?{w9_@0;s98M|$YXyhgxJ6|GXt3#?O>on+1z44k-fO54Wt)~Q5sSOqnI+Pzc(thb(&yvO-q8jt6_7du{}DLK4+K+|w0EO|O4NHuK@v#j_ELgc>Z?ZzmE|ORgS0*em ziGNwJuGK1Th~I=%e(AbClz2oMFFQ9P&+>GaOi+wR)kMAvjHXe6E18l#AS8Y=O`uDe y4om~W2Oo0_Wri#m>nvBZ@P~*RdW#i#1&T#xOFp_av;GTAD23sT00{n8yng{bqaDj1_qC{dOkKT;^=FQlqvssM592`d+OSHf| z;N%-x1RQ@rS@-}u=!*H1j}`$4VO_#9*Wh}*1MVJg13ZW zpCjo~NOO?87PiyPNO98gDMTP>m8EhzjF&FI_%gmtZ->z)j6-UA^x-!=fFrE!vLhEX z!p&-GD!~gQBwUmWtb%mwPNZ6ujLHJ8&Bc#_(vFy}&LS83$zAAYUCi&OEO{|BRQPf( z8IR$jp^