Faster RCNN Final

Faster rcnn
import torch
from torchvision import transforms as T
from PIL import Image
import cv2
from google.colab.patches import cv2_imshow
import torchvision
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained = True)
/usr/local/lib/python3.10/dist-packages/torchvision/models/_utils.py:208:
UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed
in the future, please use 'weights' instead.
warnings.warn(
/usr/local/lib/python3.10/dist-packages/torchvision/models/_utils.py:223:
UserWarning: Arguments other than a weight enum or `None` for 'weights' are
deprecated since 0.13 and may be removed in the future. The current behavior is
equivalent to passing `weights=FasterRCNN_ResNet50_FPN_Weights.COCO_V1`. You can
also use `weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT` to get the most up-to-
date weights.
warnings.warn(msg)
Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-
258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-
258fb6c6.pth
100%|██████████| 160M/160M [00:01<00:00, 118MB/s]
model.eval()
FasterRCNN(
(transform): GeneralizedRCNNTransform(
Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
Resize(min_size=(800,), max_size=1333, mode='bilinear')
)
(backbone): BackboneWithFPN(
(body): IntermediateLayerGetter(
(conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3),
bias=False)
(bn1): FrozenBatchNorm2d(64, eps=0.0)
(relu): ReLU(inplace=True)
(maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1,
ceil_mode=False)
(layer1): Sequential(
(0): Bottleneck(
(conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1),
bias=False)
padding=(1, 1), bias=False)
bias=False)
(downsample): Sequential(
(0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): FrozenBatchNorm2d(256, eps=0.0)
)
)
(1): Bottleneck(
bias=False)
bias=False)
)
(2): Bottleneck(
bias=False)
bias=False)
)
)
(0): Bottleneck(
bias=False)
bias=False)
(0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2),
bias=False)
)
)
(1): Bottleneck(
bias=False)
bias=False)
)
(2): Bottleneck(
bias=False)
bias=False)
)
(3): Bottleneck(
bias=False)
bias=False)
)
)
(0): Bottleneck(
bias=False)
bias=False)
bias=False)
)
)
(1): Bottleneck(
bias=False)
bias=False)
)
(2): Bottleneck(
bias=False)
bias=False)
)
(3): Bottleneck(
bias=False)
bias=False)
)
(4): Bottleneck(
bias=False)
bias=False)
)
(5): Bottleneck(
bias=False)
bias=False)
)
)
(0): Bottleneck(
bias=False)
bias=False)
bias=False)
)
)
(1): Bottleneck(
bias=False)
bias=False)
)
(2): Bottleneck(
bias=False)
bias=False)
)
)
)
(fpn): FeaturePyramidNetwork(
(inner_blocks): ModuleList(
(0): Conv2dNormActivation(
(0): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
)
)
)
)
)
(layer_blocks): ModuleList(
(0-3): 4 x Conv2dNormActivation(
(0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1,
1))
)
)
(extra_blocks): LastLevelMaxPool()
)
)
(rpn): RegionProposalNetwork(
(anchor_generator): AnchorGenerator()
(head): RPNHead(
(conv): Sequential(
(0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1,
1))
(1): ReLU(inplace=True)
)
)
(cls_logits): Conv2d(256, 3, kernel_size=(1, 1), stride=(1, 1))
(bbox_pred): Conv2d(256, 12, kernel_size=(1, 1), stride=(1, 1))
)
)
(roi_heads): RoIHeads(
(box_roi_pool): MultiScaleRoIAlign(featmap_names=['0', '1', '2', '3'],
output_size=(7, 7), sampling_ratio=2)
(box_head): TwoMLPHead(
(fc6): Linear(in_features=12544, out_features=1024, bias=True)
(fc7): Linear(in_features=1024, out_features=1024, bias=True)
)
(box_predictor): FastRCNNPredictor(
(cls_score): Linear(in_features=1024, out_features=91, bias=True)
(bbox_pred): Linear(in_features=1024, out_features=364, bias=True)
)
)
)
!wget 'http://images.cocodataset.org/val2017/000000037777.jpg'
--2023-10-22 05:12:46-- http://images.cocodataset.org/val2017/000000037777.jpg

Resolving images.cocodataset.org (images.cocodataset.org)... 3.5.0.160,
52.217.46.68, 54.231.161.25, ...
Connecting to images.cocodataset.org (images.cocodataset.org)|3.5.0.160|:80...
connected.
HTTP request sent, awaiting response... 200 OK
Length: 40833 (40K) [image/jpeg]
Saving to: ‘000000037777.jpg’
000000037777.jpg 0%[ ] 0 --.-KB/s

000000037777.jpg 100%[===================>] 39.88K --.-KB/s in 0.03s
2023-10-22 05:12:46 (1.50 MB/s) - ‘000000037777.jpg’ saved [40833/40833]
image = Image.open("/content/000000037777.jpg")
transform = T.ToTensor()
img = transform(image)
with torch.no_grad():
pred = model([img])
pred
[{'boxes': tensor([[291.8007, 75.4907, 351.0379, 229.7333],

[137.4995, 126.4641, 196.7451, 192.7782],
[233.8998, 187.4761, 257.3713, 206.0314],
[ 90.1806, 183.4493, 288.7284, 227.4105],
[215.5326, 186.7111, 232.5018, 201.2200],
[229.1946, 178.4547, 242.4028, 189.1366],
[204.3889, 187.5528, 220.3058, 207.7724],
[231.4346, 200.6802, 247.8696, 217.5509],
[217.1698, 200.3502, 231.5009, 214.5559],
[204.0359, 181.5956, 255.1724, 215.4856],
[ 98.9669, 120.1149, 110.5809, 174.5164],
[102.2467, 119.0014, 109.8358, 135.8731],
[264.9863, 133.9124, 294.1249, 137.5284],
[232.7792, 188.8490, 246.7510, 200.8903],
[256.4788, 132.5554, 295.4839, 138.3106],
[175.0202, 110.8700, 185.6927, 137.3661],
[273.3569, 135.1965, 294.8690, 137.8414],
[263.4073, 128.6209, 298.7946, 137.3732],
[197.2355, 181.2733, 263.8224, 219.3763],
[218.0015, 199.0363, 246.3781, 217.2725],
[233.2911, 127.2777, 299.2677, 138.3053],
[196.7906, 176.0138, 295.1327, 227.2277],
[277.4945, 128.7831, 294.3444, 136.7246],
[195.4701, 120.3333, 202.1639, 134.7107],
[205.7191, 182.7520, 232.8646, 212.0839],
[215.5735, 192.0031, 232.6144, 212.4386],
[126.2595, 98.0017, 196.9859, 190.0268],
[335.9925, 53.8941, 351.7091, 78.3954],
[230.3266, 188.0459, 241.4556, 196.1423],
[ 23.3921, 215.4327, 85.7529, 228.8503],
[231.4097, 188.5247, 242.9691, 197.6583],
[199.5782, 132.2746, 230.9552, 136.7621],
[ 99.3698, 132.9896, 110.3126, 177.3067],
[274.6114, 132.1647, 295.4245, 136.9303],
[243.4663, 188.0158, 255.1226, 205.9618],
[106.2549, 126.8241, 109.5654, 136.0857],
[209.2611, 122.8130, 216.1806, 133.9069],
[232.4752, 190.0423, 254.0875, 211.6656],
[186.6181, 113.1141, 192.0916, 134.3102],
[121.4489, 149.9124, 297.1120, 225.3528],
[ 57.3310, 178.1882, 295.3813, 227.8415],
[205.4935, 181.3018, 256.9351, 214.5954],
[242.4294, 72.0229, 348.6021, 219.6535],
[264.2516, 131.8361, 293.0418, 136.0612],
[276.1863, 135.8619, 300.7418, 139.0029],
[277.2612, 110.8911, 290.0176, 128.4139],
[223.5605, 181.9018, 258.9954, 207.2584],
[209.2122, 185.9772, 238.5243, 203.0282],
[237.8130, 189.1031, 252.1272, 203.9443],
[212.5358, 179.8578, 246.5361, 206.7563],
[228.0379, 180.7806, 243.1379, 195.6810],
[189.4451, 130.4031, 232.2002, 137.4645],
[194.0911, 121.2590, 202.3865, 134.3217],
[223.8388, 187.7670, 233.4278, 198.8905],
[290.3062, 118.2465, 295.2960, 128.2851],
[214.8254, 120.7471, 230.2146, 132.2280],
[115.7271, 190.2451, 166.4006, 211.7705],
[195.3436, 122.6958, 202.3656, 134.7221],
[226.6122, 129.5320, 269.3200, 136.9075],
[ 79.5318, 186.9848, 223.3760, 226.7934],
[221.1238, 185.8760, 241.8407, 199.4312],
[214.8148, 126.3200, 228.9625, 132.1013],
[220.3028, 180.3849, 230.4728, 189.2468],
[277.3170, 110.8236, 290.2443, 128.7589]]),
'labels': tensor([82, 79, 52, 67, 55, 55, 55, 55, 55, 55, 64, 64, 81, 52, 81,
44, 81, 81,
51, 55, 81, 67, 81, 44, 55, 55, 82, 84, 55, 62, 52, 81, 86, 81, 52,
86,
47, 55, 44, 67, 63, 52, 82, 81, 81, 44, 52, 55, 52, 55, 55, 81, 80,
55,
44, 80, 62, 47, 81, 63, 55, 81, 55, 47]),
'scores': tensor([0.9943, 0.9886, 0.9625, 0.9046, 0.9044, 0.9027, 0.8800,
0.8290, 0.8278,
0.8212, 0.7828, 0.7669, 0.6758, 0.6336, 0.5739, 0.5215, 0.5204,
0.4837,
0.4741, 0.4501, 0.4474, 0.4294, 0.4084, 0.4043, 0.3769, 0.3628,
0.3377,
0.3350, 0.3132, 0.2829, 0.2650, 0.2508, 0.2496, 0.2413, 0.2190,
0.2135,
0.1736, 0.1730, 0.1327, 0.1309, 0.1267, 0.1263, 0.1160, 0.1145,
0.1102,
0.1016, 0.0982, 0.0886, 0.0869, 0.0858, 0.0808, 0.0737, 0.0728,
0.0678,
0.0673, 0.0658, 0.0650, 0.0622, 0.0616, 0.0575, 0.0547, 0.0534,
0.0515,
0.0501])}]
type(pred)
list
len(pred)
pred[0].keys()
dict_keys(['boxes', 'labels', 'scores'])
bboxes, labels, scores = pred[0]["boxes"], pred[0]["labels"], pred[0]

["scores"],
scores
tensor([0.9943, 0.9886, 0.9625, 0.9046, 0.9044, 0.9027, 0.8800, 0.8290, 0.8278,

0.8212, 0.7828, 0.7669, 0.6758, 0.6336, 0.5739, 0.5215, 0.5204, 0.4837,
0.4741, 0.4501, 0.4474, 0.4294, 0.4084, 0.4043, 0.3769, 0.3628, 0.3377,
0.3350, 0.3132, 0.2829, 0.2650, 0.2508, 0.2496, 0.2413, 0.2190, 0.2135,
0.1736, 0.1730, 0.1327, 0.1309, 0.1267, 0.1263, 0.1160, 0.1145, 0.1102,
0.1016, 0.0982, 0.0886, 0.0869, 0.0858, 0.0808, 0.0737, 0.0728, 0.0678,
0.0673, 0.0658, 0.0650, 0.0622, 0.0616, 0.0575, 0.0547, 0.0534, 0.0515,
0.0501])
num = torch.argwhere(scores > 0.9).shape[0]
num
coco_names = ["person" , "bicycle" , "car" , "motorcycle" , "airplane" ,

"bus" , "train" , "truck" , "boat" , "traffic light" , "fire hydrant" , "street
sign" , "stop sign" , "parking meter" , "bench" , "bird" , "cat" , "dog" ,
"horse" , "sheep" , "cow" , "elephant" , "bear" , "zebra" , "giraffe" , "hat" ,
"backpack" , "umbrella" , "shoe" , "eye glasses" , "handbag" , "tie" , "suitcase" ,
"frisbee" , "skis" , "snowboard" , "sports ball" , "kite" , "baseball bat" ,
"baseball glove" , "skateboard" , "surfboard" , "tennis racket" , "bottle" ,
"plate" , "wine glass" , "cup" , "fork" , "knife" , "spoon" , "bowl" ,
"banana" , "apple" , "sandwich" , "orange" , "broccoli" , "carrot" , "hot
dog" ,
"pizza" , "donut" , "cake" , "chair" , "couch" , "potted plant" , "bed" ,
"mirror" , "dining table" , "window" , "desk" , "toilet" , "door" , "tv" ,
"laptop" , "mouse" , "remote" , "keyboard" , "cell phone" , "microwave" ,
"oven" , "toaster" , "sink" , "refrigerator" , "blender" , "book" ,
"clock" , "vase" , "scissors" , "teddy bear" , "hair drier" , "toothbrush" ,
"hair brush"]
font = cv2.FONT_HERSHEY_SIMPLEX
igg = cv2.imread("/content/000000037777.jpg")
for i in range(num):
x1, y1, x2, y2 =bboxes[i].numpy().astype("int")
class_name = coco_names[labels.numpy()[i] - 1]
igg = cv2.rectangle(igg, (x1, y1), (x2, y2), (0, 255, 0), 1)
igg = cv2.putText(igg, class_name, (x1, y1 - 10), font, 0.6, (255, 0, 0), 1,
cv2.LINE_AA)
cv2_imshow(igg)
[]
bboxes
tensor([[291.8007, 75.4907, 351.0379, 229.7333],

[137.4995, 126.4641, 196.7451, 192.7782],
[233.8998, 187.4761, 257.3713, 206.0314],
[ 90.1806, 183.4493, 288.7284, 227.4105],
[215.5326, 186.7111, 232.5018, 201.2200],
[229.1946, 178.4547, 242.4028, 189.1366],
[204.3889, 187.5528, 220.3058, 207.7724],
[231.4346, 200.6802, 247.8696, 217.5509],
[217.1698, 200.3502, 231.5009, 214.5559],
[204.0359, 181.5956, 255.1724, 215.4856],
[ 98.9669, 120.1149, 110.5809, 174.5164],
[102.2467, 119.0014, 109.8358, 135.8731],
[264.9863, 133.9124, 294.1249, 137.5284],
[232.7792, 188.8490, 246.7510, 200.8903],
[256.4788, 132.5554, 295.4839, 138.3106],
[175.0202, 110.8700, 185.6927, 137.3661],
[273.3569, 135.1965, 294.8690, 137.8414],
[263.4073, 128.6209, 298.7946, 137.3732],
[197.2355, 181.2733, 263.8224, 219.3763],
[218.0015, 199.0363, 246.3781, 217.2725],
[233.2911, 127.2777, 299.2677, 138.3053],
[196.7906, 176.0138, 295.1327, 227.2277],
[277.4945, 128.7831, 294.3444, 136.7246],
[195.4701, 120.3333, 202.1639, 134.7107],
[205.7191, 182.7520, 232.8646, 212.0839],
[215.5735, 192.0031, 232.6144, 212.4386],
[126.2595, 98.0017, 196.9859, 190.0268],
[335.9925, 53.8941, 351.7091, 78.3954],
[230.3266, 188.0459, 241.4556, 196.1423],
[ 23.3921, 215.4327, 85.7529, 228.8503],
[231.4097, 188.5247, 242.9691, 197.6583],
[199.5782, 132.2746, 230.9552, 136.7621],
[ 99.3698, 132.9896, 110.3126, 177.3067],
[274.6114, 132.1647, 295.4245, 136.9303],
[243.4663, 188.0158, 255.1226, 205.9618],
[106.2549, 126.8241, 109.5654, 136.0857],
[209.2611, 122.8130, 216.1806, 133.9069],
[232.4752, 190.0423, 254.0875, 211.6656],
[186.6181, 113.1141, 192.0916, 134.3102],
[121.4489, 149.9124, 297.1120, 225.3528],
[ 57.3310, 178.1882, 295.3813, 227.8415],
[205.4935, 181.3018, 256.9351, 214.5954],
[242.4294, 72.0229, 348.6021, 219.6535],
[264.2516, 131.8361, 293.0418, 136.0612],
[276.1863, 135.8619, 300.7418, 139.0029],
[277.2612, 110.8911, 290.0176, 128.4139],
[223.5605, 181.9018, 258.9954, 207.2584],
[209.2122, 185.9772, 238.5243, 203.0282],
[237.8130, 189.1031, 252.1272, 203.9443],
[212.5358, 179.8578, 246.5361, 206.7563],
[228.0379, 180.7806, 243.1379, 195.6810],
[189.4451, 130.4031, 232.2002, 137.4645],
[194.0911, 121.2590, 202.3865, 134.3217],
[223.8388, 187.7670, 233.4278, 198.8905],
[290.3062, 118.2465, 295.2960, 128.2851],
[214.8254, 120.7471, 230.2146, 132.2280],
[115.7271, 190.2451, 166.4006, 211.7705],
[195.3436, 122.6958, 202.3656, 134.7221],
[226.6122, 129.5320, 269.3200, 136.9075],
[ 79.5318, 186.9848, 223.3760, 226.7934],
[221.1238, 185.8760, 241.8407, 199.4312],
[214.8148, 126.3200, 228.9625, 132.1013],
[220.3028, 180.3849, 230.4728, 189.2468],
[277.3170, 110.8236, 290.2443, 128.7589]])

Faster RCNN Final

Uploaded by

Copyright:

Available Formats

You might also like

Faster RCNN Final

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Faster RCNN Final

Uploaded by

Copyright:

Available Formats

Faster rcnn

--2023-10-22 05:12:46-- http://images.cocodataset.org/val2017/000000037777.jpg

000000037777.jpg 0%[ ] 0 --.-KB/s

2023-10-22 05:12:46 (1.50 MB/s) - ‘000000037777.jpg’ saved [40833/40833]

[{'boxes': tensor([[291.8007, 75.4907, 351.0379, 229.7333],

dict_keys(['boxes', 'labels', 'scores'])

bboxes, labels, scores = pred[0]["boxes"], pred[0]["labels"], pred[0]

tensor([0.9943, 0.9886, 0.9625, 0.9046, 0.9044, 0.9027, 0.8800, 0.8290, 0.8278,

num = torch.argwhere(scores > 0.9).shape[0]

coco_names = ["person" , "bicycle" , "car" , "motorcycle" , "airplane" ,

tensor([[291.8007, 75.4907, 351.0379, 229.7333],

You might also like