Download as pdf or txt
Download as pdf or txt
You are on page 1of 5

8/3/23, 10:17 AM cocodataset.

ipynb - Colaboratory

1 # https://machinelearningspace.com/coco-dataset-a-step-by-step-guide-to-loading-and-visual
2 # 1. Downloading and Extracting the COCO dataset
3 # 2. Understanding`| the structure of the COCO Format
4 # 3. Creating the COCOParser Class
5 # 4. Loading and Visualizing the Dataset
6
7
8 !wget http://images.cocodataset.org/zips/train2017.zip -O coco_train2017.zip
9 !wget http://images.cocodataset.org/zips/val2017.zip -O coco_val2017.zip
10 !wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip -O coco_ann20

--2023-07-04 01:39:48-- http://images.cocodataset.org/zips/train2017.zip


Resolving images.cocodataset.org (images.cocodataset.org)... 54.231.160.161, 54.231.195
Connecting to images.cocodataset.org (images.cocodataset.org)|54.231.160.161|:80... conn
HTTP request sent, awaiting response... 200 OK
Length: 19336861798 (18G) [application/zip]
Saving to: ‘coco_train2017.zip’

coco_train2017.zip 100%[===================>] 18.01G 51.3MB/s in 5m 0s

2023-07-04 01:44:48 (61.5 MB/s) - ‘coco_train2017.zip’ saved [19336861798/19336861798]

--2023-07-04 01:44:48-- http://images.cocodataset.org/zips/val2017.zip


Resolving images.cocodataset.org (images.cocodataset.org)... 52.217.80.28, 3.5.7.17, 54
Connecting to images.cocodataset.org (images.cocodataset.org)|52.217.80.28|:80... connec
HTTP request sent, awaiting response... 200 OK
Length: 815585330 (778M) [application/zip]
Saving to: ‘coco_val2017.zip’

coco_val2017.zip 100%[===================>] 777.80M 62.5MB/s in 10s

2023-07-04 01:44:58 (75.4 MB/s) - ‘coco_val2017.zip’ saved [815585330/815585330]

--2023-07-04 01:44:58-- http://images.cocodataset.org/annotations/annotations_trainval2


Resolving images.cocodataset.org (images.cocodataset.org)... 3.5.29.41, 3.5.29.124, 52.2
Connecting to images.cocodataset.org (images.cocodataset.org)|3.5.29.41|:80... connected
HTTP request sent, awaiting response... 200 OK
Length: 252907541 (241M) [application/zip]
Saving to: ‘coco_ann2017.zip’

coco_ann2017.zip 100%[===================>] 241.19M 75.1MB/s in 3.7s

2023-07-04 01:45:02 (66.0 MB/s) - ‘coco_ann2017.zip’ saved [252907541/252907541]

1 from zipfile import ZipFile, BadZipFile


2 import os
3 def extract_zip_file(extract_path):
4 try:
5 with ZipFile(extract_path+".zip") as zfile:
6 zfile.extractall(extract_path)
https://colab.research.google.com/drive/1g1kG0eozZCZWdUFbcav9KhXz8Ebsw1ji?hl=id#scrollTo=NXBY90f6GdW1&printMode=true 1/5
8/3/23, 10:17 AM cocodataset.ipynb - Colaboratory

7 # remove zipfile
8 zfileTOremove=f"{extract_path}"+".zip"
9 if os.path.isfile(zfileTOremove):
10 os.remove(zfileTOremove)
11 else:
12 print("Error: %s file not found" % zfileTOremove)
13 except BadZipFile as e:
14 print("Error:", e)
15 extract_train_path = "./coco_train2017"
16 extract_val_path = "./coco_val2017"
17 extract_ann_path="./coco_ann2017"
18 extract_zip_file(extract_train_path)
19 extract_zip_file(extract_val_path)
20 extract_zip_file(extract_ann_path)

1 from collections import defaultdict


2 import json
3 import numpy as np
4 class COCOParser:
5 def __init__(self, anns_file, imgs_dir):
6 with open(anns_file, 'r') as f:
7 coco = json.load(f)
8
9 self.annIm_dict = defaultdict(list)
10 self.cat_dict = {}
11 self.annId_dict = {}
12 self.im_dict = {}
13 self.licenses_dict = {}
14 for ann in coco['annotations']:
15 self.annIm_dict[ann['image_id']].append(ann)
16 self.annId_dict[ann['id']]=ann
17 for img in coco['images']:
18 self.im_dict[img['id']] = img
19 for cat in coco['categories']:
20 self.cat_dict[cat['id']] = cat
21 for license in coco['licenses']:
22 self.licenses_dict[license['id']] = license
23 def get_imgIds(self):
24 return list(self.im_dict.keys())
25 def get_annIds(self, im_ids):
26 im_ids=im_ids if isinstance(im_ids, list) else [im_ids]
27 return [ann['id'] for im_id in im_ids for ann in self.annIm_dict[im_id
28 def load_anns(self, ann_ids):
29 im_ids=ann_ids if isinstance(ann_ids, list) else [ann_ids]
30 return [self.annId_dict[ann_id] for ann_id in ann_ids]
31 def load_cats(self, class_ids):
32 class_ids=class_ids if isinstance(class_ids, list) else [class_ids]
33 return [self.cat_dict[class_id] for class_id in class_ids]
34 def get_imgLicenses(self,im_ids):
35 im_ids=im_ids if isinstance(im_ids, list) else [im_ids]
36 lic_ids = [self.im_dict[im_id]["license"] for im_id in im_ids]
https://colab.research.google.com/drive/1g1kG0eozZCZWdUFbcav9KhXz8Ebsw1ji?hl=id#scrollTo=NXBY90f6GdW1&printMode=true 2/5
8/3/23, 10:17 AM cocodataset.ipynb - Colaboratory

37 return [self.licenses_dict[lic_id] for lic_id in lic_ids]

Loading and Visualizing the Dataset


Let’s start by creating the object of the COCOParser() class.

Klik dua kali (atau tekan Enter) untuk mengedit

1 coco_annotations_file="/content/coco_ann2017/annotations/instances_val2017.jso
2 coco_images_dir="/content/coco_val2017/val2017"
3 coco= COCOParser(coco_annotations_file, coco_images_dir)

The line coco = COCOParser(coco_annotations_file, coco_images_dir) creates an instance of the


COCOParser() class, passing the file paths stored in coco_annotations_file and coco_images_dir as
arguments. This instance can now be used to access and manipulate the annotations and images
in the COCO dataset.

1 import matplotlib.pyplot as plt


2 from PIL import Image
3 import numpy as np
4 # define a list of colors for drawing bounding boxes
5 color_list = ["pink", "red", "teal", "blue", "orange", "yellow", "black", "mag
6 num_imgs_to_disp = 4
7
8 total_images = len(coco.get_imgIds()) # total number of images
9 sel_im_idxs = np.random.permutation(total_images)[:num_imgs_to_disp]
10 img_ids = coco.get_imgIds()
11 print("img_ids :",img_ids)
12 print("sel_im_idxs :",sel_im_idxs)
13 selected_img_ids = [img_ids[i] for i in sel_im_idxs]
14 ann_ids = coco.get_annIds(selected_img_ids)
15 im_licenses = coco.get_imgLicenses(selected_img_ids)
16 fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(15,10))
17 ax = ax.ravel()
18 for i, im in enumerate(selected_img_ids):
19 image = Image.open(f"{coco_images_dir}/{str(im).zfill(12)}.jpg")
20 ann_ids = coco.get_annIds(im)
21 annotations = coco.load_anns(ann_ids)
22 print(len(annotations))
23 for ann in annotations:
24 bbox = ann['bbox']
25 x, y, w, h = [int(b) for b in bbox]
26 class_id = ann["category_id"]
27 class_name = coco.load_cats(class_id)[0]["name"]
28 license = coco.get_imgLicenses(im)[0]["name"]
29 color = color list[class id]
https://colab.research.google.com/drive/1g1kG0eozZCZWdUFbcav9KhXz8Ebsw1ji?hl=id#scrollTo=NXBY90f6GdW1&printMode=true 3/5
8/3/23, 10:17 AM cocodataset.ipynb - Colaboratory
29 color_ = color_list[class_id]
30 rect = plt.Rectangle((x, y), w, h, linewidth=2, edgecolor=color_, face
31 t_box=ax[i].text(x, y, class_name, color='red', fontsize=10)
32 t_box.set_bbox(dict(boxstyle='square, pad=0',facecolor='white', alpha=
33 ax[i].add_patch(rect)
34
35
36 ax[i].axis('on')
37 ax[i].imshow(image)
38 ax[i].set_xlabel('Longitude')
39 ax[i].set_title(f"License: {license}")
40
41 plt.tight_layout()
42 plt.show()

img_ids : [397133, 37777, 252219, 87038, 174482, 403385, 6818, 480985, 458054, 331352, 2
sel_im_idxs : [2312 2702 2390 3853]
2
4
20
36

https://colab.research.google.com/drive/1g1kG0eozZCZWdUFbcav9KhXz8Ebsw1ji?hl=id#scrollTo=NXBY90f6GdW1&printMode=true 4/5
8/3/23, 10:17 AM cocodataset.ipynb - Colaboratory

https://colab.research.google.com/drive/1g1kG0eozZCZWdUFbcav9KhXz8Ebsw1ji?hl=id#scrollTo=NXBY90f6GdW1&printMode=true 5/5

You might also like