Cat_Detection/cat_detect.ipynb
2025-01-23 02:15:48 +02:00

705 lines
28 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: ultralytics in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (8.3.65)\n",
"Requirement already satisfied: numpy>=1.23.0 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from ultralytics) (1.26.3)\n",
"Requirement already satisfied: matplotlib>=3.3.0 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from ultralytics) (3.10.0)\n",
"Requirement already satisfied: opencv-python>=4.6.0 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from ultralytics) (4.7.0.68)\n",
"Requirement already satisfied: pillow>=7.1.2 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from ultralytics) (9.4.0)\n",
"Requirement already satisfied: pyyaml>=5.3.1 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from ultralytics) (6.0.1)\n",
"Requirement already satisfied: requests>=2.23.0 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from ultralytics) (2.31.0)\n",
"Requirement already satisfied: scipy>=1.4.1 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from ultralytics) (1.14.1)\n",
"Requirement already satisfied: torch>=1.8.0 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from ultralytics) (2.5.1+cu124)\n",
"Requirement already satisfied: torchvision>=0.9.0 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from ultralytics) (0.20.1+cu124)\n",
"Requirement already satisfied: tqdm>=4.64.0 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from ultralytics) (4.66.5)\n",
"Requirement already satisfied: psutil in c:\\users\\danie\\appdata\\roaming\\python\\python310\\site-packages (from ultralytics) (5.9.5)\n",
"Requirement already satisfied: py-cpuinfo in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from ultralytics) (9.0.0)\n",
"Requirement already satisfied: pandas>=1.1.4 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from ultralytics) (2.0.1)\n",
"Requirement already satisfied: seaborn>=0.11.0 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from ultralytics) (0.13.2)\n",
"Requirement already satisfied: ultralytics-thop>=2.0.0 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from ultralytics) (2.0.14)\n",
"Requirement already satisfied: contourpy>=1.0.1 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from matplotlib>=3.3.0->ultralytics) (1.0.7)\n",
"Requirement already satisfied: cycler>=0.10 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from matplotlib>=3.3.0->ultralytics) (0.11.0)\n",
"Requirement already satisfied: fonttools>=4.22.0 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from matplotlib>=3.3.0->ultralytics) (4.39.3)\n",
"Requirement already satisfied: kiwisolver>=1.3.1 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from matplotlib>=3.3.0->ultralytics) (1.4.4)\n",
"Requirement already satisfied: packaging>=20.0 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from matplotlib>=3.3.0->ultralytics) (23.0)\n",
"Requirement already satisfied: pyparsing>=2.3.1 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from matplotlib>=3.3.0->ultralytics) (3.0.9)\n",
"Requirement already satisfied: python-dateutil>=2.7 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from matplotlib>=3.3.0->ultralytics) (2.8.2)\n",
"Requirement already satisfied: pytz>=2020.1 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from pandas>=1.1.4->ultralytics) (2023.3)\n",
"Requirement already satisfied: tzdata>=2022.1 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from pandas>=1.1.4->ultralytics) (2023.3)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from requests>=2.23.0->ultralytics) (3.1.0)\n",
"Requirement already satisfied: idna<4,>=2.5 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from requests>=2.23.0->ultralytics) (3.4)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from requests>=2.23.0->ultralytics) (2.0.2)\n",
"Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from requests>=2.23.0->ultralytics) (2022.12.7)\n",
"Requirement already satisfied: filelock in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from torch>=1.8.0->ultralytics) (3.13.1)\n",
"Requirement already satisfied: typing-extensions>=4.8.0 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from torch>=1.8.0->ultralytics) (4.9.0)\n",
"Requirement already satisfied: networkx in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from torch>=1.8.0->ultralytics) (3.2.1)\n",
"Requirement already satisfied: jinja2 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from torch>=1.8.0->ultralytics) (3.1.2)\n",
"Requirement already satisfied: fsspec in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from torch>=1.8.0->ultralytics) (2024.2.0)\n",
"Requirement already satisfied: sympy==1.13.1 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from torch>=1.8.0->ultralytics) (1.13.1)\n",
"Requirement already satisfied: mpmath<1.4,>=1.1.0 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from sympy==1.13.1->torch>=1.8.0->ultralytics) (1.3.0)\n",
"Requirement already satisfied: colorama in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from tqdm>=4.64.0->ultralytics) (0.4.6)\n",
"Requirement already satisfied: six>=1.5 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from python-dateutil>=2.7->matplotlib>=3.3.0->ultralytics) (1.16.0)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from jinja2->torch>=1.8.0->ultralytics) (2.1.2)\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"%pip install ultralytics "
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import cv2\n",
"import numpy as np\n",
"from ultralytics import YOLO\n",
"from torchvision import models, transforms\n",
"from sklearn.cluster import KMeans\n",
"import torch\n",
"from matplotlib import pyplot as plt\n",
"from PIL import Image"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt'...\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 6.25M/6.25M [00:00<00:00, 10.3MB/s]\n"
]
}
],
"source": [
"model = YOLO(\"yolov8n.pt\").to('cuda')"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"cuda:0\n"
]
}
],
"source": [
"print(model.device)"
]
},
{
"cell_type": "code",
"execution_count": 175,
"metadata": {},
"outputs": [],
"source": [
"im = Image.open(\"dataset/1818949000-IMG-20240118-WA0001.jpg\")"
]
},
{
"cell_type": "code",
"execution_count": 176,
"metadata": {},
"outputs": [],
"source": [
"im = transforms.ToTensor()(im)\n",
"# im = im.numpy()"
]
},
{
"cell_type": "code",
"execution_count": 130,
"metadata": {},
"outputs": [],
"source": [
"im = im.reshape(-1, im.shape[0], im.shape[1], im.shape[2])\n",
"\n",
"# apply resize to image 3, 640, 640\n"
]
},
{
"cell_type": "code",
"execution_count": 161,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(1599, 899, 3)"
]
},
"execution_count": 161,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"im.shape"
]
},
{
"cell_type": "code",
"execution_count": 116,
"metadata": {},
"outputs": [],
"source": [
"def resize_and_pad_image(im, stride=32):\n",
" # Get original dimensions\n",
" _, _, h, w = im.shape\n",
"\n",
" # Calculate the nearest divisible dimensions\n",
" new_h = int(np.ceil(h / stride) * stride)\n",
" new_w = int(np.ceil(w / stride) * stride)\n",
"\n",
" # Resize the image while maintaining aspect ratio\n",
" resize_transform = transforms.Compose([\n",
" transforms.ToPILImage(),\n",
" transforms.Resize((new_h, new_w)), # Resize to divisible dimensions\n",
" transforms.ToTensor()\n",
" ])\n",
"\n",
" # Apply transform to the tensor\n",
" im_resized = resize_transform(im.squeeze(0)) # Remove batch dimension for processing\n",
"\n",
" # Add batch dimension back\n",
" im_resized = im_resized.unsqueeze(0)\n",
" return im_resized"
]
},
{
"cell_type": "code",
"execution_count": 117,
"metadata": {},
"outputs": [],
"source": [
"im = resize_and_pad_image(im)"
]
},
{
"cell_type": "code",
"execution_count": 177,
"metadata": {},
"outputs": [],
"source": [
"im = im.numpy()"
]
},
{
"cell_type": "code",
"execution_count": 180,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(1599, 899, 3)"
]
},
"execution_count": 180,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"im.shape"
]
},
{
"cell_type": "code",
"execution_count": 179,
"metadata": {},
"outputs": [],
"source": [
"im = im.transpose(1,2,0)"
]
},
{
"cell_type": "code",
"execution_count": 181,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"YOLO(\n",
" (model): DetectionModel(\n",
" (model): Sequential(\n",
" (0): Conv(\n",
" (conv): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (1): Conv(\n",
" (conv): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (2): C2f(\n",
" (cv1): Conv(\n",
" (conv): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (cv2): Conv(\n",
" (conv): Conv2d(48, 32, kernel_size=(1, 1), stride=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (m): ModuleList(\n",
" (0): Bottleneck(\n",
" (cv1): Conv(\n",
" (conv): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (cv2): Conv(\n",
" (conv): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" )\n",
" )\n",
" )\n",
" (3): Conv(\n",
" (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (4): C2f(\n",
" (cv1): Conv(\n",
" (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (cv2): Conv(\n",
" (conv): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (m): ModuleList(\n",
" (0-1): 2 x Bottleneck(\n",
" (cv1): Conv(\n",
" (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (cv2): Conv(\n",
" (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" )\n",
" )\n",
" )\n",
" (5): Conv(\n",
" (conv): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (6): C2f(\n",
" (cv1): Conv(\n",
" (conv): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (cv2): Conv(\n",
" (conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (m): ModuleList(\n",
" (0-1): 2 x Bottleneck(\n",
" (cv1): Conv(\n",
" (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (cv2): Conv(\n",
" (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" )\n",
" )\n",
" )\n",
" (7): Conv(\n",
" (conv): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (8): C2f(\n",
" (cv1): Conv(\n",
" (conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (cv2): Conv(\n",
" (conv): Conv2d(384, 256, kernel_size=(1, 1), stride=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (m): ModuleList(\n",
" (0): Bottleneck(\n",
" (cv1): Conv(\n",
" (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (cv2): Conv(\n",
" (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" )\n",
" )\n",
" )\n",
" (9): SPPF(\n",
" (cv1): Conv(\n",
" (conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (cv2): Conv(\n",
" (conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (m): MaxPool2d(kernel_size=5, stride=1, padding=2, dilation=1, ceil_mode=False)\n",
" )\n",
" (10): Upsample(scale_factor=2.0, mode='nearest')\n",
" (11): Concat()\n",
" (12): C2f(\n",
" (cv1): Conv(\n",
" (conv): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (cv2): Conv(\n",
" (conv): Conv2d(192, 128, kernel_size=(1, 1), stride=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (m): ModuleList(\n",
" (0): Bottleneck(\n",
" (cv1): Conv(\n",
" (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (cv2): Conv(\n",
" (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" )\n",
" )\n",
" )\n",
" (13): Upsample(scale_factor=2.0, mode='nearest')\n",
" (14): Concat()\n",
" (15): C2f(\n",
" (cv1): Conv(\n",
" (conv): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (cv2): Conv(\n",
" (conv): Conv2d(96, 64, kernel_size=(1, 1), stride=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (m): ModuleList(\n",
" (0): Bottleneck(\n",
" (cv1): Conv(\n",
" (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (cv2): Conv(\n",
" (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" )\n",
" )\n",
" )\n",
" (16): Conv(\n",
" (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (17): Concat()\n",
" (18): C2f(\n",
" (cv1): Conv(\n",
" (conv): Conv2d(192, 128, kernel_size=(1, 1), stride=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (cv2): Conv(\n",
" (conv): Conv2d(192, 128, kernel_size=(1, 1), stride=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (m): ModuleList(\n",
" (0): Bottleneck(\n",
" (cv1): Conv(\n",
" (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (cv2): Conv(\n",
" (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" )\n",
" )\n",
" )\n",
" (19): Conv(\n",
" (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (20): Concat()\n",
" (21): C2f(\n",
" (cv1): Conv(\n",
" (conv): Conv2d(384, 256, kernel_size=(1, 1), stride=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (cv2): Conv(\n",
" (conv): Conv2d(384, 256, kernel_size=(1, 1), stride=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (m): ModuleList(\n",
" (0): Bottleneck(\n",
" (cv1): Conv(\n",
" (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (cv2): Conv(\n",
" (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" )\n",
" )\n",
" )\n",
" (22): Detect(\n",
" (cv2): ModuleList(\n",
" (0): Sequential(\n",
" (0): Conv(\n",
" (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (1): Conv(\n",
" (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (2): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))\n",
" )\n",
" (1): Sequential(\n",
" (0): Conv(\n",
" (conv): Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (1): Conv(\n",
" (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (2): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))\n",
" )\n",
" (2): Sequential(\n",
" (0): Conv(\n",
" (conv): Conv2d(256, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (1): Conv(\n",
" (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (2): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))\n",
" )\n",
" )\n",
" (cv3): ModuleList(\n",
" (0): Sequential(\n",
" (0): Conv(\n",
" (conv): Conv2d(64, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (1): Conv(\n",
" (conv): Conv2d(80, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (2): Conv2d(80, 80, kernel_size=(1, 1), stride=(1, 1))\n",
" )\n",
" (1): Sequential(\n",
" (0): Conv(\n",
" (conv): Conv2d(128, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (1): Conv(\n",
" (conv): Conv2d(80, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (2): Conv2d(80, 80, kernel_size=(1, 1), stride=(1, 1))\n",
" )\n",
" (2): Sequential(\n",
" (0): Conv(\n",
" (conv): Conv2d(256, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (1): Conv(\n",
" (conv): Conv2d(80, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (2): Conv2d(80, 80, kernel_size=(1, 1), stride=(1, 1))\n",
" )\n",
" )\n",
" (dfl): DFL(\n",
" (conv): Conv2d(16, 1, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" )\n",
" )\n",
" )\n",
" )\n",
")"
]
},
"execution_count": 181,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.eval()"
]
},
{
"cell_type": "code",
"execution_count": 234,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"0: 640x384 (no detections), 23.0ms\n",
"Speed: 23.6ms preprocess, 23.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 384)\n"
]
}
],
"source": [
"with torch.no_grad():\n",
" pred = model(im)"
]
},
{
"cell_type": "code",
"execution_count": 241,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"image 1/1 e:\\Facultate\\Master\\Anul 1\\CV\\Project\\dataset\\1818949000-IMG-20240118-WA0001.jpg: 640x384 1 cat, 1 chair, 29.5ms\n",
"Speed: 5.0ms preprocess, 29.5ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 384)\n"
]
}
],
"source": [
"results = model.predict(source=\"dataset/1818949000-IMG-20240118-WA0001.jpg\", save=False)"
]
},
{
"cell_type": "code",
"execution_count": 242,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Class: cat, Confidence: 0.79, Box: 1.4070484638214111, 389.7481994628906, 766.0083618164062, 1191.5501708984375\n",
"Class: chair, Confidence: 0.27, Box: 0.0, 255.45545959472656, 884.272216796875, 1599.0\n"
]
}
],
"source": [
"for result in results:\n",
" boxes = result.boxes # Bounding box information\n",
"\n",
" for box in boxes:\n",
" # Box coordinates\n",
" x_min, y_min, x_max, y_max = box.xyxy[0] # Format: [x_min, y_min, x_max, y_max]\n",
"\n",
" # Confidence score\n",
" confidence = box.conf[0]\n",
"\n",
" # Class ID or name\n",
" class_id = box.cls[0]\n",
" class_name = model.names[int(class_id)] # Convert class ID to class name\n",
"\n",
" print(f\"Class: {class_name}, Confidence: {confidence:.2f}, Box: {x_min}, {y_min}, {x_max}, {y_max}\")"
]
},
{
"cell_type": "code",
"execution_count": 251,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 251,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"image = cv2.imread(\"dataset/1818949000-IMG-20240118-WA0001.jpg\")\n",
"for result in results:\n",
" boxes = result.boxes\n",
"\n",
" for box in boxes:\n",
" x_min, y_min, x_max, y_max = map(int, box.xyxy[0])\n",
" class_id = int(box.cls[0])\n",
" class_name = model.names[class_id]\n",
" confidence = box.conf[0]\n",
"\n",
" if confidence > 0.7:\n",
"\n",
" # Format the label with class name and confidence\n",
" label = f\"{class_name} {confidence:.2f}\"\n",
"\n",
" # Draw the bounding box\n",
" cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)\n",
" cv2.putText(image, label, (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)\n",
"\n",
"# Save or display the image\n",
"cv2.imwrite(\"output/output.jpg\", image)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}