Cat_Detection/cat_detect.ipynb
2025-01-23 02:14:37 +02:00

663 lines
34 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: ultralytics in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (8.3.65)\n",
"Requirement already satisfied: numpy>=1.23.0 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from ultralytics) (1.26.3)\n",
"Requirement already satisfied: matplotlib>=3.3.0 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from ultralytics) (3.10.0)\n",
"Requirement already satisfied: opencv-python>=4.6.0 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from ultralytics) (4.7.0.68)\n",
"Requirement already satisfied: pillow>=7.1.2 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from ultralytics) (9.4.0)\n",
"Requirement already satisfied: pyyaml>=5.3.1 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from ultralytics) (6.0.1)\n",
"Requirement already satisfied: requests>=2.23.0 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from ultralytics) (2.31.0)\n",
"Requirement already satisfied: scipy>=1.4.1 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from ultralytics) (1.14.1)\n",
"Requirement already satisfied: torch>=1.8.0 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from ultralytics) (2.5.1+cu124)\n",
"Requirement already satisfied: torchvision>=0.9.0 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from ultralytics) (0.20.1+cu124)\n",
"Requirement already satisfied: tqdm>=4.64.0 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from ultralytics) (4.66.5)\n",
"Requirement already satisfied: psutil in c:\\users\\danie\\appdata\\roaming\\python\\python310\\site-packages (from ultralytics) (5.9.5)\n",
"Requirement already satisfied: py-cpuinfo in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from ultralytics) (9.0.0)\n",
"Requirement already satisfied: pandas>=1.1.4 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from ultralytics) (2.0.1)\n",
"Requirement already satisfied: seaborn>=0.11.0 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from ultralytics) (0.13.2)\n",
"Requirement already satisfied: ultralytics-thop>=2.0.0 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from ultralytics) (2.0.14)\n",
"Requirement already satisfied: contourpy>=1.0.1 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from matplotlib>=3.3.0->ultralytics) (1.0.7)\n",
"Requirement already satisfied: cycler>=0.10 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from matplotlib>=3.3.0->ultralytics) (0.11.0)\n",
"Requirement already satisfied: fonttools>=4.22.0 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from matplotlib>=3.3.0->ultralytics) (4.39.3)\n",
"Requirement already satisfied: kiwisolver>=1.3.1 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from matplotlib>=3.3.0->ultralytics) (1.4.4)\n",
"Requirement already satisfied: packaging>=20.0 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from matplotlib>=3.3.0->ultralytics) (23.0)\n",
"Requirement already satisfied: pyparsing>=2.3.1 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from matplotlib>=3.3.0->ultralytics) (3.0.9)\n",
"Requirement already satisfied: python-dateutil>=2.7 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from matplotlib>=3.3.0->ultralytics) (2.8.2)\n",
"Requirement already satisfied: pytz>=2020.1 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from pandas>=1.1.4->ultralytics) (2023.3)\n",
"Requirement already satisfied: tzdata>=2022.1 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from pandas>=1.1.4->ultralytics) (2023.3)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from requests>=2.23.0->ultralytics) (3.1.0)\n",
"Requirement already satisfied: idna<4,>=2.5 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from requests>=2.23.0->ultralytics) (3.4)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from requests>=2.23.0->ultralytics) (2.0.2)\n",
"Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from requests>=2.23.0->ultralytics) (2022.12.7)\n",
"Requirement already satisfied: filelock in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from torch>=1.8.0->ultralytics) (3.13.1)\n",
"Requirement already satisfied: typing-extensions>=4.8.0 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from torch>=1.8.0->ultralytics) (4.9.0)\n",
"Requirement already satisfied: networkx in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from torch>=1.8.0->ultralytics) (3.2.1)\n",
"Requirement already satisfied: jinja2 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from torch>=1.8.0->ultralytics) (3.1.2)\n",
"Requirement already satisfied: fsspec in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from torch>=1.8.0->ultralytics) (2024.2.0)\n",
"Requirement already satisfied: sympy==1.13.1 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from torch>=1.8.0->ultralytics) (1.13.1)\n",
"Requirement already satisfied: mpmath<1.4,>=1.1.0 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from sympy==1.13.1->torch>=1.8.0->ultralytics) (1.3.0)\n",
"Requirement already satisfied: colorama in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from tqdm>=4.64.0->ultralytics) (0.4.6)\n",
"Requirement already satisfied: six>=1.5 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from python-dateutil>=2.7->matplotlib>=3.3.0->ultralytics) (1.16.0)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in c:\\users\\danie\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from jinja2->torch>=1.8.0->ultralytics) (2.1.2)\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"%pip install ultralytics "
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import cv2\n",
"import numpy as np\n",
"from ultralytics import YOLO\n",
"from torchvision import models, transforms\n",
"from sklearn.cluster import KMeans\n",
"import torch\n",
"from matplotlib import pyplot as plt\n",
"from PIL import Image"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt'...\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 6.25M/6.25M [00:00<00:00, 10.3MB/s]\n"
]
}
],
"source": [
"model = YOLO(\"yolov8n.pt\").to('cuda')"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"cuda:0\n"
]
}
],
"source": [
"print(model.device)"
]
},
{
"cell_type": "code",
"execution_count": 175,
"metadata": {},
"outputs": [],
"source": [
"im = Image.open(\"dataset/1818949000-IMG-20240118-WA0001.jpg\")"
]
},
{
"cell_type": "code",
"execution_count": 176,
"metadata": {},
"outputs": [],
"source": [
"im = transforms.ToTensor()(im)\n",
"# im = im.numpy()"
]
},
{
"cell_type": "code",
"execution_count": 130,
"metadata": {},
"outputs": [],
"source": [
"im = im.reshape(-1, im.shape[0], im.shape[1], im.shape[2])\n",
"\n",
"# apply resize to image 3, 640, 640\n"
]
},
{
"cell_type": "code",
"execution_count": 161,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(1599, 899, 3)"
]
},
"execution_count": 161,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"im.shape"
]
},
{
"cell_type": "code",
"execution_count": 116,
"metadata": {},
"outputs": [],
"source": [
"def resize_and_pad_image(im, stride=32):\n",
" # Get original dimensions\n",
" _, _, h, w = im.shape\n",
"\n",
" # Calculate the nearest divisible dimensions\n",
" new_h = int(np.ceil(h / stride) * stride)\n",
" new_w = int(np.ceil(w / stride) * stride)\n",
"\n",
" # Resize the image while maintaining aspect ratio\n",
" resize_transform = transforms.Compose([\n",
" transforms.ToPILImage(),\n",
" transforms.Resize((new_h, new_w)), # Resize to divisible dimensions\n",
" transforms.ToTensor()\n",
" ])\n",
"\n",
" # Apply transform to the tensor\n",
" im_resized = resize_transform(im.squeeze(0)) # Remove batch dimension for processing\n",
"\n",
" # Add batch dimension back\n",
" im_resized = im_resized.unsqueeze(0)\n",
" return im_resized"
]
},
{
"cell_type": "code",
"execution_count": 117,
"metadata": {},
"outputs": [],
"source": [
"im = resize_and_pad_image(im)"
]
},
{
"cell_type": "code",
"execution_count": 177,
"metadata": {},
"outputs": [],
"source": [
"im = im.numpy()"
]
},
{
"cell_type": "code",
"execution_count": 180,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(1599, 899, 3)"
]
},
"execution_count": 180,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"im.shape"
]
},
{
"cell_type": "code",
"execution_count": 179,
"metadata": {},
"outputs": [],
"source": [
"im = im.transpose(1,2,0)"
]
},
{
"cell_type": "code",
"execution_count": 181,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"YOLO(\n",
" (model): DetectionModel(\n",
" (model): Sequential(\n",
" (0): Conv(\n",
" (conv): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (1): Conv(\n",
" (conv): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (2): C2f(\n",
" (cv1): Conv(\n",
" (conv): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (cv2): Conv(\n",
" (conv): Conv2d(48, 32, kernel_size=(1, 1), stride=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (m): ModuleList(\n",
" (0): Bottleneck(\n",
" (cv1): Conv(\n",
" (conv): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (cv2): Conv(\n",
" (conv): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" )\n",
" )\n",
" )\n",
" (3): Conv(\n",
" (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (4): C2f(\n",
" (cv1): Conv(\n",
" (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (cv2): Conv(\n",
" (conv): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (m): ModuleList(\n",
" (0-1): 2 x Bottleneck(\n",
" (cv1): Conv(\n",
" (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (cv2): Conv(\n",
" (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" )\n",
" )\n",
" )\n",
" (5): Conv(\n",
" (conv): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (6): C2f(\n",
" (cv1): Conv(\n",
" (conv): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (cv2): Conv(\n",
" (conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (m): ModuleList(\n",
" (0-1): 2 x Bottleneck(\n",
" (cv1): Conv(\n",
" (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (cv2): Conv(\n",
" (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" )\n",
" )\n",
" )\n",
" (7): Conv(\n",
" (conv): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (8): C2f(\n",
" (cv1): Conv(\n",
" (conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (cv2): Conv(\n",
" (conv): Conv2d(384, 256, kernel_size=(1, 1), stride=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (m): ModuleList(\n",
" (0): Bottleneck(\n",
" (cv1): Conv(\n",
" (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (cv2): Conv(\n",
" (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" )\n",
" )\n",
" )\n",
" (9): SPPF(\n",
" (cv1): Conv(\n",
" (conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (cv2): Conv(\n",
" (conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (m): MaxPool2d(kernel_size=5, stride=1, padding=2, dilation=1, ceil_mode=False)\n",
" )\n",
" (10): Upsample(scale_factor=2.0, mode='nearest')\n",
" (11): Concat()\n",
" (12): C2f(\n",
" (cv1): Conv(\n",
" (conv): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (cv2): Conv(\n",
" (conv): Conv2d(192, 128, kernel_size=(1, 1), stride=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (m): ModuleList(\n",
" (0): Bottleneck(\n",
" (cv1): Conv(\n",
" (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (cv2): Conv(\n",
" (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" )\n",
" )\n",
" )\n",
" (13): Upsample(scale_factor=2.0, mode='nearest')\n",
" (14): Concat()\n",
" (15): C2f(\n",
" (cv1): Conv(\n",
" (conv): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (cv2): Conv(\n",
" (conv): Conv2d(96, 64, kernel_size=(1, 1), stride=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (m): ModuleList(\n",
" (0): Bottleneck(\n",
" (cv1): Conv(\n",
" (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (cv2): Conv(\n",
" (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" )\n",
" )\n",
" )\n",
" (16): Conv(\n",
" (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (17): Concat()\n",
" (18): C2f(\n",
" (cv1): Conv(\n",
" (conv): Conv2d(192, 128, kernel_size=(1, 1), stride=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (cv2): Conv(\n",
" (conv): Conv2d(192, 128, kernel_size=(1, 1), stride=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (m): ModuleList(\n",
" (0): Bottleneck(\n",
" (cv1): Conv(\n",
" (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (cv2): Conv(\n",
" (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" )\n",
" )\n",
" )\n",
" (19): Conv(\n",
" (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (20): Concat()\n",
" (21): C2f(\n",
" (cv1): Conv(\n",
" (conv): Conv2d(384, 256, kernel_size=(1, 1), stride=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (cv2): Conv(\n",
" (conv): Conv2d(384, 256, kernel_size=(1, 1), stride=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (m): ModuleList(\n",
" (0): Bottleneck(\n",
" (cv1): Conv(\n",
" (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (cv2): Conv(\n",
" (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" )\n",
" )\n",
" )\n",
" (22): Detect(\n",
" (cv2): ModuleList(\n",
" (0): Sequential(\n",
" (0): Conv(\n",
" (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (1): Conv(\n",
" (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (2): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))\n",
" )\n",
" (1): Sequential(\n",
" (0): Conv(\n",
" (conv): Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (1): Conv(\n",
" (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (2): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))\n",
" )\n",
" (2): Sequential(\n",
" (0): Conv(\n",
" (conv): Conv2d(256, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (1): Conv(\n",
" (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (2): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))\n",
" )\n",
" )\n",
" (cv3): ModuleList(\n",
" (0): Sequential(\n",
" (0): Conv(\n",
" (conv): Conv2d(64, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (1): Conv(\n",
" (conv): Conv2d(80, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (2): Conv2d(80, 80, kernel_size=(1, 1), stride=(1, 1))\n",
" )\n",
" (1): Sequential(\n",
" (0): Conv(\n",
" (conv): Conv2d(128, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (1): Conv(\n",
" (conv): Conv2d(80, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (2): Conv2d(80, 80, kernel_size=(1, 1), stride=(1, 1))\n",
" )\n",
" (2): Sequential(\n",
" (0): Conv(\n",
" (conv): Conv2d(256, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (1): Conv(\n",
" (conv): Conv2d(80, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
" (act): SiLU(inplace=True)\n",
" )\n",
" (2): Conv2d(80, 80, kernel_size=(1, 1), stride=(1, 1))\n",
" )\n",
" )\n",
" (dfl): DFL(\n",
" (conv): Conv2d(16, 1, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
" )\n",
" )\n",
" )\n",
" )\n",
")"
]
},
"execution_count": 181,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.eval()"
]
},
{
"cell_type": "code",
"execution_count": 182,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"0: 640x384 (no detections), 40.5ms\n",
"Speed: 21.0ms preprocess, 40.5ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 384)\n"
]
}
],
"source": [
"with torch.no_grad():\n",
" pred = model(im)"
]
},
{
"cell_type": "code",
"execution_count": 196,
"metadata": {},
"outputs": [
{
"ename": "AttributeError",
"evalue": "'Results' object has no attribute 'ascontiguousarray'. See valid attributes below.\n\n A class for storing and manipulating inference results.\n\n This class encapsulates the functionality for handling detection, segmentation, pose estimation,\n and classification results from YOLO models.\n\n Attributes:\n orig_img (numpy.ndarray): Original image as a numpy array.\n orig_shape (Tuple[int, int]): Original image shape in (height, width) format.\n boxes (Boxes | None): Object containing detection bounding boxes.\n masks (Masks | None): Object containing detection masks.\n probs (Probs | None): Object containing class probabilities for classification tasks.\n keypoints (Keypoints | None): Object containing detected keypoints for each object.\n obb (OBB | None): Object containing oriented bounding boxes.\n speed (Dict[str, float | None]): Dictionary of preprocess, inference, and postprocess speeds.\n names (Dict[int, str]): Dictionary mapping class IDs to class names.\n path (str): Path to the image file.\n _keys (Tuple[str, ...]): Tuple of attribute names for internal use.\n\n Methods:\n update: Updates object attributes with new detection results.\n cpu: Returns a copy of the Results object with all tensors on CPU memory.\n numpy: Returns a copy of the Results object with all tensors as numpy arrays.\n cuda: Returns a copy of the Results object with all tensors on GPU memory.\n to: Returns a copy of the Results object with tensors on a specified device and dtype.\n new: Returns a new Results object with the same image, path, and names.\n plot: Plots detection results on an input image, returning an annotated image.\n show: Shows annotated results on screen.\n save: Saves annotated results to file.\n verbose: Returns a log string for each task, detailing detections and classifications.\n save_txt: Saves detection results to a text file.\n save_crop: Saves cropped detection images.\n tojson: Converts detection results to JSON format.\n\n Examples:\n >>> results = model(\"path/to/image.jpg\")\n >>> for result in results:\n ... print(result.boxes) # Print detection boxes\n ... result.show() # Display the annotated image\n ... result.save(filename=\"result.jpg\") # Save annotated image\n ",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[196], line 7\u001b[0m\n\u001b[0;32m 5\u001b[0m probs \u001b[38;5;241m=\u001b[39m result\u001b[38;5;241m.\u001b[39mprobs \u001b[38;5;66;03m# Probs object for classification outputs\u001b[39;00m\n\u001b[0;32m 6\u001b[0m obb \u001b[38;5;241m=\u001b[39m result\u001b[38;5;241m.\u001b[39mobb \u001b[38;5;66;03m# Oriented boxes object for OBB outputs\u001b[39;00m\n\u001b[1;32m----> 7\u001b[0m \u001b[43mresult\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mascontiguousarray\u001b[49m() \u001b[38;5;66;03m# display to screen\u001b[39;00m\n\u001b[0;32m 8\u001b[0m result\u001b[38;5;241m.\u001b[39msave(filename\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mresult.jpg\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;66;03m# save to disk\u001b[39;00m\n",
"File \u001b[1;32mc:\\Users\\danie\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\ultralytics\\utils\\__init__.py:235\u001b[0m, in \u001b[0;36mSimpleClass.__getattr__\u001b[1;34m(self, attr)\u001b[0m\n\u001b[0;32m 233\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Custom attribute access error message with helpful information.\"\"\"\u001b[39;00m\n\u001b[0;32m 234\u001b[0m name \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\n\u001b[1;32m--> 235\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m object has no attribute \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mattr\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m. See valid attributes below.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__doc__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n",
"\u001b[1;31mAttributeError\u001b[0m: 'Results' object has no attribute 'ascontiguousarray'. See valid attributes below.\n\n A class for storing and manipulating inference results.\n\n This class encapsulates the functionality for handling detection, segmentation, pose estimation,\n and classification results from YOLO models.\n\n Attributes:\n orig_img (numpy.ndarray): Original image as a numpy array.\n orig_shape (Tuple[int, int]): Original image shape in (height, width) format.\n boxes (Boxes | None): Object containing detection bounding boxes.\n masks (Masks | None): Object containing detection masks.\n probs (Probs | None): Object containing class probabilities for classification tasks.\n keypoints (Keypoints | None): Object containing detected keypoints for each object.\n obb (OBB | None): Object containing oriented bounding boxes.\n speed (Dict[str, float | None]): Dictionary of preprocess, inference, and postprocess speeds.\n names (Dict[int, str]): Dictionary mapping class IDs to class names.\n path (str): Path to the image file.\n _keys (Tuple[str, ...]): Tuple of attribute names for internal use.\n\n Methods:\n update: Updates object attributes with new detection results.\n cpu: Returns a copy of the Results object with all tensors on CPU memory.\n numpy: Returns a copy of the Results object with all tensors as numpy arrays.\n cuda: Returns a copy of the Results object with all tensors on GPU memory.\n to: Returns a copy of the Results object with tensors on a specified device and dtype.\n new: Returns a new Results object with the same image, path, and names.\n plot: Plots detection results on an input image, returning an annotated image.\n show: Shows annotated results on screen.\n save: Saves annotated results to file.\n verbose: Returns a log string for each task, detailing detections and classifications.\n save_txt: Saves detection results to a text file.\n save_crop: Saves cropped detection images.\n tojson: Converts detection results to JSON format.\n\n Examples:\n >>> results = model(\"path/to/image.jpg\")\n >>> for result in results:\n ... print(result.boxes) # Print detection boxes\n ... result.show() # Display the annotated image\n ... result.save(filename=\"result.jpg\") # Save annotated image\n "
]
}
],
"source": [
"for result in pred:\n",
" boxes = result.boxes # Boxes object for bounding box outputs\n",
" masks = result.masks # Masks object for segmentation masks outputs\n",
" keypoints = result.keypoints # Keypoints object for pose outputs\n",
" probs = result.probs # Probs object for classification outputs\n",
" obb = result.obb # Oriented boxes object for OBB outputs\n",
" result.show() # display to screen\n",
" result.save(filename=\"result.jpg\") # save to disk"
]
},
{
"cell_type": "code",
"execution_count": 194,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([], device='cuda:0', size=(0, 4))"
]
},
"execution_count": 194,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pred[0].boxes.xyxy"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}