From c1f6f4b3c3e2da1639c91a6e25578d21cb0705b1 Mon Sep 17 00:00:00 2001 From: Rahul Somani Date: Tue, 15 Jun 2021 08:04:51 +0530 Subject: [PATCH 1/4] add exdark dataset --- icedata/datasets/exdark/EXDARK/__init__.py | 3 + icedata/datasets/exdark/EXDARK/data.py | 15 +++ icedata/datasets/exdark/EXDARK/dataset.py | 30 +++++ icedata/datasets/exdark/EXDARK/parser.py | 145 +++++++++++++++++++++ 4 files changed, 193 insertions(+) create mode 100644 icedata/datasets/exdark/EXDARK/__init__.py create mode 100644 icedata/datasets/exdark/EXDARK/data.py create mode 100644 icedata/datasets/exdark/EXDARK/dataset.py create mode 100644 icedata/datasets/exdark/EXDARK/parser.py diff --git a/icedata/datasets/exdark/EXDARK/__init__.py b/icedata/datasets/exdark/EXDARK/__init__.py new file mode 100644 index 0000000..91a24dd --- /dev/null +++ b/icedata/datasets/exdark/EXDARK/__init__.py @@ -0,0 +1,3 @@ +from icedata.datasets.exdark.data import * +from icedata.datasets.exdark.parser import * +from icedata.datasets.exdark.dataset import * diff --git a/icedata/datasets/exdark/EXDARK/data.py b/icedata/datasets/exdark/EXDARK/data.py new file mode 100644 index 0000000..15ab364 --- /dev/null +++ b/icedata/datasets/exdark/EXDARK/data.py @@ -0,0 +1,15 @@ +from icevision.imports import * +from icevision.utils import * +from icevision.core import * + + +def load_data(force_download=False): + base_url = "https://github.com/ai-fast-track/icedata/releases/download/datasets/ExDark-Trimmed.zip" + save_dir = get_data_dir() / "exdark-trimmed" + save_dir.mkdir(exist_ok=True) + + save_path = save_dir / "exdark-trimmed.zip" + if not save_path.exists() or force_download: + download_and_extract(url=base_url, save_path=save_path) + + return save_dir diff --git a/icedata/datasets/exdark/EXDARK/dataset.py b/icedata/datasets/exdark/EXDARK/dataset.py new file mode 100644 index 0000000..49baa83 --- /dev/null +++ b/icedata/datasets/exdark/EXDARK/dataset.py @@ -0,0 +1,30 @@ +__all__ = ["dataset"] + +from icevision.all import * +from torch.utils import data +from icedata.datasets.exdark.parser import * +import icevision.tfms as tfms + + +def dataset( + data_dir: Path, + size: int = 384, + presize: int = 512, + data_splitter: Optional[DataSplitter] = None, +) -> Tuple[Dataset, Dataset]: + + _parser = parser(data_dir=data_dir) + + train_records, valid_records = _parser.parse(data_splitter=data_splitter) + train_tfms = tfms.A.Adapter( + [ + *tfms.A.aug_tfms(size=size, presize=presize, lightning=None), + tfms.A.Normalize(), + ] + ) + valid_tfms = tfms.A.Adapter([*tfms.A.resize_and_pad(size), tfms.A.Normalize()]) + + train_ds = Dataset(train_records, tfm=train_tfms) + valid_ds = Dataset(valid_records, valid_tfms) + + return train_ds, valid_ds diff --git a/icedata/datasets/exdark/EXDARK/parser.py b/icedata/datasets/exdark/EXDARK/parser.py new file mode 100644 index 0000000..c49afea --- /dev/null +++ b/icedata/datasets/exdark/EXDARK/parser.py @@ -0,0 +1,145 @@ +# AUTHOR: Lucas Vazquez +# Minor modifications made by Rahul Somani +# * Removed `object_class` as a classification task +# * Some minor changes to make compatible with icevision 0.7+ +# * Mild reorganisation + +__all__ = ["parser"] + +from icevision.all import * + + +EXDARK_CLASSIFIER_NAMES = ["lighting", "location"] +EXDARK_TEMPLATE_RECORD = BaseRecord( + [ + FilepathRecordComponent(), + *[ + ClassificationLabelsRecordComponent(task=tasks.Task(name=o)) + for o in EXDARK_CLASSIFIER_NAMES + ], + BBoxesRecordComponent(), + InstancesLabelsRecordComponent(), + ] +) + + +# TODO: Use splits specific in imageclasslist.txt +class ExDarkParser(Parser): + # fmt: off + LOCATION_CLASSES = [None, "Indoor", "Outdoor"] + LIGHTING_CLASSES = [ + None, "Low", "Ambient", "Object", "Single", "Weak", + "Strong", "Screen", "Window", "Shadow", "Twilight", + ] + OBJECT_CLASSES = [ + None,"Bicycle", "Boat", "Bottle", "Bus", "Car", "Cat", + "Chair", "Cup", "Dog", "Motorbike", "People", "Table", + ] + + # fmt: on + + def __init__( + self, + template_record, + instances_annotations_dir, + classification_annotation_filepath, + imgs_dir, + ): + super().__init__(template_record=template_record) + + self.instances_annotations_dir = instances_annotations_dir + self.classification_annotation_filepath = classification_annotation_filepath + self.imgs_dir = imgs_dir + + self.instances_annotations_filepaths = get_files( + instances_annotations_dir, extensions=[".txt"] + ) + self.classification_annotation_lines = ( + classification_annotation_filepath.read_text().strip().split("\n")[1:] + ) + + self.object_class_map = ClassMap(self.OBJECT_CLASSES, background=None) + self.lighting_class_map = ClassMap(self.LIGHTING_CLASSES, background=None) + self.location_class_map = ClassMap(self.LOCATION_CLASSES, background=None) + + def __iter__(self) -> Any: + for line in self.classification_annotation_lines: + yield line, tasks.classification + + for filepath in self.instances_annotations_filepaths: + yield filepath, tasks.detection + + def __len__(self): + return len(self.instances_annotations_filepaths) + len( + self.classification_annotation_lines + ) + + def record_id(self, o) -> Hashable: + item, task = o + if task == tasks.detection: + return Path( + item.stem + ).stem # item is `2015_05235.jpg.txt`, stem will give `2015_05235.jpg`, stem again for `2015_05235` + if task == tasks.classification: + return Path(item.split()[0]).stem # will also give `2015_05235` + # before were using with file extension, but one can have .JPG while the other .jpg + + def parse_fields(self, o, record, is_new): + item, task = o + + if task == tasks.detection: + self.parse_detection(item, record) + elif task == tasks.classification: + self.parse_classification(item, record) + else: + raise ValueError + + def parse_detection(self, filepath, record): + # the following doesn't work, filepath.stem can be `2015_00391.jpg` but actual image name is `2015_00391.JPG` + # img_filepath = self.imgs_dir / filepath.parent.stem / filepath.stem + # record.set_filepath(img_filepath) + # record.set_img_size(get_img_size(img_filepath)) + record.detection.set_class_map(self.object_class_map) + + lines = filepath.read_text().strip().split("\n")[1:] + for line in lines: + tokens = line.split() + + object_class = tokens[0] + xywh = [int(coord) for coord in tokens[1:5]] + bbox = BBox.from_xywh(*xywh) + + record.detection.add_labels([object_class]) + record.detection.add_bboxes([bbox]) + + def parse_classification(self, line, record): + tokens = line.split() + img_name = tokens[0] + object_class, lighting, location, _ = [int(id) for id in tokens[1:]] + + # common + object_class_name = self.object_class_map.get_by_id(object_class) + filepath = self.imgs_dir / object_class_name / img_name + record.set_filepath(filepath) + record.set_img_size(get_img_size(filepath)) + + # classification + # record.object_class.set_class_map(self.object_class_map) + # record.object_class.add_labels_by_id([object_class]) + + record.lighting.set_class_map(self.lighting_class_map) + record.lighting.add_labels_by_id([lighting]) + + record.location.set_class_map(self.location_class_map) + record.location.add_labels_by_id([location]) + + +def parser(data_dir: Path): + parser = ExDarkParser( + template_record=EXDARK_TEMPLATE_RECORD, + instances_annotations_dir=data_dir / "annotations", + classification_annotation_filepath=data_dir / "imageclasslist.txt", + imgs_dir=data_dir / "images", + ) + + return parser From ff43ab9d35dca4836813e8c02f5278c603a3ae48 Mon Sep 17 00:00:00 2001 From: Rahul Somani Date: Tue, 15 Jun 2021 08:07:05 +0530 Subject: [PATCH 2/4] rename to exdark_trimmed --- icedata/datasets/exdark/EXDARK/__init__.py | 3 --- icedata/datasets/exdark_trimmed/__init__.py | 3 +++ icedata/datasets/{exdark/EXDARK => exdark_trimmed}/data.py | 0 icedata/datasets/{exdark/EXDARK => exdark_trimmed}/dataset.py | 2 +- icedata/datasets/{exdark/EXDARK => exdark_trimmed}/parser.py | 0 5 files changed, 4 insertions(+), 4 deletions(-) delete mode 100644 icedata/datasets/exdark/EXDARK/__init__.py create mode 100644 icedata/datasets/exdark_trimmed/__init__.py rename icedata/datasets/{exdark/EXDARK => exdark_trimmed}/data.py (100%) rename icedata/datasets/{exdark/EXDARK => exdark_trimmed}/dataset.py (93%) rename icedata/datasets/{exdark/EXDARK => exdark_trimmed}/parser.py (100%) diff --git a/icedata/datasets/exdark/EXDARK/__init__.py b/icedata/datasets/exdark/EXDARK/__init__.py deleted file mode 100644 index 91a24dd..0000000 --- a/icedata/datasets/exdark/EXDARK/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from icedata.datasets.exdark.data import * -from icedata.datasets.exdark.parser import * -from icedata.datasets.exdark.dataset import * diff --git a/icedata/datasets/exdark_trimmed/__init__.py b/icedata/datasets/exdark_trimmed/__init__.py new file mode 100644 index 0000000..d6ca80b --- /dev/null +++ b/icedata/datasets/exdark_trimmed/__init__.py @@ -0,0 +1,3 @@ +from icedata.datasets.exdark_trimmed.data import * +from icedata.datasets.exdark_trimmed.parser import * +from icedata.datasets.exdark_trimmed.dataset import * diff --git a/icedata/datasets/exdark/EXDARK/data.py b/icedata/datasets/exdark_trimmed/data.py similarity index 100% rename from icedata/datasets/exdark/EXDARK/data.py rename to icedata/datasets/exdark_trimmed/data.py diff --git a/icedata/datasets/exdark/EXDARK/dataset.py b/icedata/datasets/exdark_trimmed/dataset.py similarity index 93% rename from icedata/datasets/exdark/EXDARK/dataset.py rename to icedata/datasets/exdark_trimmed/dataset.py index 49baa83..b3bf326 100644 --- a/icedata/datasets/exdark/EXDARK/dataset.py +++ b/icedata/datasets/exdark_trimmed/dataset.py @@ -2,7 +2,7 @@ from icevision.all import * from torch.utils import data -from icedata.datasets.exdark.parser import * +from icedata.datasets.exdark_trimmed.parser import * import icevision.tfms as tfms diff --git a/icedata/datasets/exdark/EXDARK/parser.py b/icedata/datasets/exdark_trimmed/parser.py similarity index 100% rename from icedata/datasets/exdark/EXDARK/parser.py rename to icedata/datasets/exdark_trimmed/parser.py From 19b8d38e21962e2b8313c7b612b792e64edff6da Mon Sep 17 00:00:00 2001 From: Rahul Somani Date: Tue, 15 Jun 2021 08:35:38 +0530 Subject: [PATCH 3/4] fix path --- icedata/datasets/exdark_trimmed/data.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/icedata/datasets/exdark_trimmed/data.py b/icedata/datasets/exdark_trimmed/data.py index 15ab364..4ce520d 100644 --- a/icedata/datasets/exdark_trimmed/data.py +++ b/icedata/datasets/exdark_trimmed/data.py @@ -3,7 +3,17 @@ from icevision.core import * -def load_data(force_download=False): +def load_data(force_download=False) -> Path: + """ + Downloads a trimmed version of the ExDark Dataset to "~/.icevision/data/exdark-trimmed" + and returns the path it was downloaded to. + + Args: + force_download (bool, optional): Defaults to False. + + Returns: + Path: Path to load the dataset from + """ base_url = "https://github.com/ai-fast-track/icedata/releases/download/datasets/ExDark-Trimmed.zip" save_dir = get_data_dir() / "exdark-trimmed" save_dir.mkdir(exist_ok=True) @@ -12,4 +22,4 @@ def load_data(force_download=False): if not save_path.exists() or force_download: download_and_extract(url=base_url, save_path=save_path) - return save_dir + return save_dir / "ExDark-Trimmed" From bfddd3a34b3edf422f4bd9230ac3633e3e969ab8 Mon Sep 17 00:00:00 2001 From: Rahul Somani Date: Tue, 15 Jun 2021 10:44:09 +0530 Subject: [PATCH 4/4] convenience container for all class maps --- icedata/datasets/exdark_trimmed/parser.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/icedata/datasets/exdark_trimmed/parser.py b/icedata/datasets/exdark_trimmed/parser.py index c49afea..e53ce0c 100644 --- a/icedata/datasets/exdark_trimmed/parser.py +++ b/icedata/datasets/exdark_trimmed/parser.py @@ -62,6 +62,13 @@ def __init__( self.lighting_class_map = ClassMap(self.LIGHTING_CLASSES, background=None) self.location_class_map = ClassMap(self.LOCATION_CLASSES, background=None) + # Convenience container + self.CLASS_MAPS = dict( + detection=self.object_class_map, + lighting=self.lighting_class_map, + location=self.location_class_map, + ) + def __iter__(self) -> Any: for line in self.classification_annotation_lines: yield line, tasks.classification