test bbox
This commit is contained in:
parent
7286cad733
commit
495c75a86d
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
/dataset/cats
|
||||||
|
/dataset/cats.zip
|
||||||
560
cat_detect.ipynb
560
cat_detect.ipynb
@ -58,19 +58,9 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 1,
|
"execution_count": 3,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Creating new Ultralytics Settings v0.0.6 file \n",
|
|
||||||
"View Ultralytics Settings with 'yolo settings' or at 'C:\\Users\\danie\\AppData\\Roaming\\Ultralytics\\settings.json'\n",
|
|
||||||
"Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
"import os\n",
|
"import os\n",
|
||||||
"import cv2\n",
|
"import cv2\n",
|
||||||
@ -85,19 +75,559 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 1,
|
"execution_count": 4,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"YOLO\n"
|
"Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt'...\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"100%|██████████| 6.25M/6.25M [00:00<00:00, 10.3MB/s]\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"print(\"YOLO\")"
|
"model = YOLO(\"yolov8n.pt\").to('cuda')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 11,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"cuda:0\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"print(model.device)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 175,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"im = Image.open(\"dataset/1818949000-IMG-20240118-WA0001.jpg\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 176,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"im = transforms.ToTensor()(im)\n",
|
||||||
|
"# im = im.numpy()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 130,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"im = im.reshape(-1, im.shape[0], im.shape[1], im.shape[2])\n",
|
||||||
|
"\n",
|
||||||
|
"# apply resize to image 3, 640, 640\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 161,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"(1599, 899, 3)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 161,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"im.shape"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 116,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def resize_and_pad_image(im, stride=32):\n",
|
||||||
|
" # Get original dimensions\n",
|
||||||
|
" _, _, h, w = im.shape\n",
|
||||||
|
"\n",
|
||||||
|
" # Calculate the nearest divisible dimensions\n",
|
||||||
|
" new_h = int(np.ceil(h / stride) * stride)\n",
|
||||||
|
" new_w = int(np.ceil(w / stride) * stride)\n",
|
||||||
|
"\n",
|
||||||
|
" # Resize the image while maintaining aspect ratio\n",
|
||||||
|
" resize_transform = transforms.Compose([\n",
|
||||||
|
" transforms.ToPILImage(),\n",
|
||||||
|
" transforms.Resize((new_h, new_w)), # Resize to divisible dimensions\n",
|
||||||
|
" transforms.ToTensor()\n",
|
||||||
|
" ])\n",
|
||||||
|
"\n",
|
||||||
|
" # Apply transform to the tensor\n",
|
||||||
|
" im_resized = resize_transform(im.squeeze(0)) # Remove batch dimension for processing\n",
|
||||||
|
"\n",
|
||||||
|
" # Add batch dimension back\n",
|
||||||
|
" im_resized = im_resized.unsqueeze(0)\n",
|
||||||
|
" return im_resized"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 117,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"im = resize_and_pad_image(im)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 177,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"im = im.numpy()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 180,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"(1599, 899, 3)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 180,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"im.shape"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 179,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"im = im.transpose(1,2,0)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 181,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"YOLO(\n",
|
||||||
|
" (model): DetectionModel(\n",
|
||||||
|
" (model): Sequential(\n",
|
||||||
|
" (0): Conv(\n",
|
||||||
|
" (conv): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (1): Conv(\n",
|
||||||
|
" (conv): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (2): C2f(\n",
|
||||||
|
" (cv1): Conv(\n",
|
||||||
|
" (conv): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (cv2): Conv(\n",
|
||||||
|
" (conv): Conv2d(48, 32, kernel_size=(1, 1), stride=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (m): ModuleList(\n",
|
||||||
|
" (0): Bottleneck(\n",
|
||||||
|
" (cv1): Conv(\n",
|
||||||
|
" (conv): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (cv2): Conv(\n",
|
||||||
|
" (conv): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" )\n",
|
||||||
|
" )\n",
|
||||||
|
" )\n",
|
||||||
|
" (3): Conv(\n",
|
||||||
|
" (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (4): C2f(\n",
|
||||||
|
" (cv1): Conv(\n",
|
||||||
|
" (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (cv2): Conv(\n",
|
||||||
|
" (conv): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (m): ModuleList(\n",
|
||||||
|
" (0-1): 2 x Bottleneck(\n",
|
||||||
|
" (cv1): Conv(\n",
|
||||||
|
" (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (cv2): Conv(\n",
|
||||||
|
" (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" )\n",
|
||||||
|
" )\n",
|
||||||
|
" )\n",
|
||||||
|
" (5): Conv(\n",
|
||||||
|
" (conv): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (6): C2f(\n",
|
||||||
|
" (cv1): Conv(\n",
|
||||||
|
" (conv): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (cv2): Conv(\n",
|
||||||
|
" (conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (m): ModuleList(\n",
|
||||||
|
" (0-1): 2 x Bottleneck(\n",
|
||||||
|
" (cv1): Conv(\n",
|
||||||
|
" (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (cv2): Conv(\n",
|
||||||
|
" (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" )\n",
|
||||||
|
" )\n",
|
||||||
|
" )\n",
|
||||||
|
" (7): Conv(\n",
|
||||||
|
" (conv): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (8): C2f(\n",
|
||||||
|
" (cv1): Conv(\n",
|
||||||
|
" (conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (cv2): Conv(\n",
|
||||||
|
" (conv): Conv2d(384, 256, kernel_size=(1, 1), stride=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (m): ModuleList(\n",
|
||||||
|
" (0): Bottleneck(\n",
|
||||||
|
" (cv1): Conv(\n",
|
||||||
|
" (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (cv2): Conv(\n",
|
||||||
|
" (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" )\n",
|
||||||
|
" )\n",
|
||||||
|
" )\n",
|
||||||
|
" (9): SPPF(\n",
|
||||||
|
" (cv1): Conv(\n",
|
||||||
|
" (conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (cv2): Conv(\n",
|
||||||
|
" (conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (m): MaxPool2d(kernel_size=5, stride=1, padding=2, dilation=1, ceil_mode=False)\n",
|
||||||
|
" )\n",
|
||||||
|
" (10): Upsample(scale_factor=2.0, mode='nearest')\n",
|
||||||
|
" (11): Concat()\n",
|
||||||
|
" (12): C2f(\n",
|
||||||
|
" (cv1): Conv(\n",
|
||||||
|
" (conv): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (cv2): Conv(\n",
|
||||||
|
" (conv): Conv2d(192, 128, kernel_size=(1, 1), stride=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (m): ModuleList(\n",
|
||||||
|
" (0): Bottleneck(\n",
|
||||||
|
" (cv1): Conv(\n",
|
||||||
|
" (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (cv2): Conv(\n",
|
||||||
|
" (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" )\n",
|
||||||
|
" )\n",
|
||||||
|
" )\n",
|
||||||
|
" (13): Upsample(scale_factor=2.0, mode='nearest')\n",
|
||||||
|
" (14): Concat()\n",
|
||||||
|
" (15): C2f(\n",
|
||||||
|
" (cv1): Conv(\n",
|
||||||
|
" (conv): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (cv2): Conv(\n",
|
||||||
|
" (conv): Conv2d(96, 64, kernel_size=(1, 1), stride=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (m): ModuleList(\n",
|
||||||
|
" (0): Bottleneck(\n",
|
||||||
|
" (cv1): Conv(\n",
|
||||||
|
" (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (cv2): Conv(\n",
|
||||||
|
" (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" )\n",
|
||||||
|
" )\n",
|
||||||
|
" )\n",
|
||||||
|
" (16): Conv(\n",
|
||||||
|
" (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (17): Concat()\n",
|
||||||
|
" (18): C2f(\n",
|
||||||
|
" (cv1): Conv(\n",
|
||||||
|
" (conv): Conv2d(192, 128, kernel_size=(1, 1), stride=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (cv2): Conv(\n",
|
||||||
|
" (conv): Conv2d(192, 128, kernel_size=(1, 1), stride=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (m): ModuleList(\n",
|
||||||
|
" (0): Bottleneck(\n",
|
||||||
|
" (cv1): Conv(\n",
|
||||||
|
" (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (cv2): Conv(\n",
|
||||||
|
" (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" )\n",
|
||||||
|
" )\n",
|
||||||
|
" )\n",
|
||||||
|
" (19): Conv(\n",
|
||||||
|
" (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (20): Concat()\n",
|
||||||
|
" (21): C2f(\n",
|
||||||
|
" (cv1): Conv(\n",
|
||||||
|
" (conv): Conv2d(384, 256, kernel_size=(1, 1), stride=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (cv2): Conv(\n",
|
||||||
|
" (conv): Conv2d(384, 256, kernel_size=(1, 1), stride=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (m): ModuleList(\n",
|
||||||
|
" (0): Bottleneck(\n",
|
||||||
|
" (cv1): Conv(\n",
|
||||||
|
" (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (cv2): Conv(\n",
|
||||||
|
" (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" )\n",
|
||||||
|
" )\n",
|
||||||
|
" )\n",
|
||||||
|
" (22): Detect(\n",
|
||||||
|
" (cv2): ModuleList(\n",
|
||||||
|
" (0): Sequential(\n",
|
||||||
|
" (0): Conv(\n",
|
||||||
|
" (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (1): Conv(\n",
|
||||||
|
" (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (2): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))\n",
|
||||||
|
" )\n",
|
||||||
|
" (1): Sequential(\n",
|
||||||
|
" (0): Conv(\n",
|
||||||
|
" (conv): Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (1): Conv(\n",
|
||||||
|
" (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (2): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))\n",
|
||||||
|
" )\n",
|
||||||
|
" (2): Sequential(\n",
|
||||||
|
" (0): Conv(\n",
|
||||||
|
" (conv): Conv2d(256, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (1): Conv(\n",
|
||||||
|
" (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (2): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))\n",
|
||||||
|
" )\n",
|
||||||
|
" )\n",
|
||||||
|
" (cv3): ModuleList(\n",
|
||||||
|
" (0): Sequential(\n",
|
||||||
|
" (0): Conv(\n",
|
||||||
|
" (conv): Conv2d(64, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (1): Conv(\n",
|
||||||
|
" (conv): Conv2d(80, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (2): Conv2d(80, 80, kernel_size=(1, 1), stride=(1, 1))\n",
|
||||||
|
" )\n",
|
||||||
|
" (1): Sequential(\n",
|
||||||
|
" (0): Conv(\n",
|
||||||
|
" (conv): Conv2d(128, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (1): Conv(\n",
|
||||||
|
" (conv): Conv2d(80, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (2): Conv2d(80, 80, kernel_size=(1, 1), stride=(1, 1))\n",
|
||||||
|
" )\n",
|
||||||
|
" (2): Sequential(\n",
|
||||||
|
" (0): Conv(\n",
|
||||||
|
" (conv): Conv2d(256, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (1): Conv(\n",
|
||||||
|
" (conv): Conv2d(80, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
|
||||||
|
" (act): SiLU(inplace=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (2): Conv2d(80, 80, kernel_size=(1, 1), stride=(1, 1))\n",
|
||||||
|
" )\n",
|
||||||
|
" )\n",
|
||||||
|
" (dfl): DFL(\n",
|
||||||
|
" (conv): Conv2d(16, 1, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
|
||||||
|
" )\n",
|
||||||
|
" )\n",
|
||||||
|
" )\n",
|
||||||
|
" )\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 181,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"model.eval()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 182,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"\n",
|
||||||
|
"0: 640x384 (no detections), 40.5ms\n",
|
||||||
|
"Speed: 21.0ms preprocess, 40.5ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 384)\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"with torch.no_grad():\n",
|
||||||
|
" pred = model(im)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 196,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"ename": "AttributeError",
|
||||||
|
"evalue": "'Results' object has no attribute 'ascontiguousarray'. See valid attributes below.\n\n A class for storing and manipulating inference results.\n\n This class encapsulates the functionality for handling detection, segmentation, pose estimation,\n and classification results from YOLO models.\n\n Attributes:\n orig_img (numpy.ndarray): Original image as a numpy array.\n orig_shape (Tuple[int, int]): Original image shape in (height, width) format.\n boxes (Boxes | None): Object containing detection bounding boxes.\n masks (Masks | None): Object containing detection masks.\n probs (Probs | None): Object containing class probabilities for classification tasks.\n keypoints (Keypoints | None): Object containing detected keypoints for each object.\n obb (OBB | None): Object containing oriented bounding boxes.\n speed (Dict[str, float | None]): Dictionary of preprocess, inference, and postprocess speeds.\n names (Dict[int, str]): Dictionary mapping class IDs to class names.\n path (str): Path to the image file.\n _keys (Tuple[str, ...]): Tuple of attribute names for internal use.\n\n Methods:\n update: Updates object attributes with new detection results.\n cpu: Returns a copy of the Results object with all tensors on CPU memory.\n numpy: Returns a copy of the Results object with all tensors as numpy arrays.\n cuda: Returns a copy of the Results object with all tensors on GPU memory.\n to: Returns a copy of the Results object with tensors on a specified device and dtype.\n new: Returns a new Results object with the same image, path, and names.\n plot: Plots detection results on an input image, returning an annotated image.\n show: Shows annotated results on screen.\n save: Saves annotated results to file.\n verbose: Returns a log string for each task, detailing detections and classifications.\n save_txt: Saves detection results to a text file.\n save_crop: Saves cropped detection images.\n tojson: Converts detection results to JSON format.\n\n Examples:\n >>> results = model(\"path/to/image.jpg\")\n >>> for result in results:\n ... print(result.boxes) # Print detection boxes\n ... result.show() # Display the annotated image\n ... result.save(filename=\"result.jpg\") # Save annotated image\n ",
|
||||||
|
"output_type": "error",
|
||||||
|
"traceback": [
|
||||||
|
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
||||||
|
"\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
|
||||||
|
"Cell \u001b[1;32mIn[196], line 7\u001b[0m\n\u001b[0;32m 5\u001b[0m probs \u001b[38;5;241m=\u001b[39m result\u001b[38;5;241m.\u001b[39mprobs \u001b[38;5;66;03m# Probs object for classification outputs\u001b[39;00m\n\u001b[0;32m 6\u001b[0m obb \u001b[38;5;241m=\u001b[39m result\u001b[38;5;241m.\u001b[39mobb \u001b[38;5;66;03m# Oriented boxes object for OBB outputs\u001b[39;00m\n\u001b[1;32m----> 7\u001b[0m \u001b[43mresult\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mascontiguousarray\u001b[49m() \u001b[38;5;66;03m# display to screen\u001b[39;00m\n\u001b[0;32m 8\u001b[0m result\u001b[38;5;241m.\u001b[39msave(filename\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mresult.jpg\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;66;03m# save to disk\u001b[39;00m\n",
|
||||||
|
"File \u001b[1;32mc:\\Users\\danie\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\ultralytics\\utils\\__init__.py:235\u001b[0m, in \u001b[0;36mSimpleClass.__getattr__\u001b[1;34m(self, attr)\u001b[0m\n\u001b[0;32m 233\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Custom attribute access error message with helpful information.\"\"\"\u001b[39;00m\n\u001b[0;32m 234\u001b[0m name \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\n\u001b[1;32m--> 235\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m object has no attribute \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mattr\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m. See valid attributes below.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__doc__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n",
|
||||||
|
"\u001b[1;31mAttributeError\u001b[0m: 'Results' object has no attribute 'ascontiguousarray'. See valid attributes below.\n\n A class for storing and manipulating inference results.\n\n This class encapsulates the functionality for handling detection, segmentation, pose estimation,\n and classification results from YOLO models.\n\n Attributes:\n orig_img (numpy.ndarray): Original image as a numpy array.\n orig_shape (Tuple[int, int]): Original image shape in (height, width) format.\n boxes (Boxes | None): Object containing detection bounding boxes.\n masks (Masks | None): Object containing detection masks.\n probs (Probs | None): Object containing class probabilities for classification tasks.\n keypoints (Keypoints | None): Object containing detected keypoints for each object.\n obb (OBB | None): Object containing oriented bounding boxes.\n speed (Dict[str, float | None]): Dictionary of preprocess, inference, and postprocess speeds.\n names (Dict[int, str]): Dictionary mapping class IDs to class names.\n path (str): Path to the image file.\n _keys (Tuple[str, ...]): Tuple of attribute names for internal use.\n\n Methods:\n update: Updates object attributes with new detection results.\n cpu: Returns a copy of the Results object with all tensors on CPU memory.\n numpy: Returns a copy of the Results object with all tensors as numpy arrays.\n cuda: Returns a copy of the Results object with all tensors on GPU memory.\n to: Returns a copy of the Results object with tensors on a specified device and dtype.\n new: Returns a new Results object with the same image, path, and names.\n plot: Plots detection results on an input image, returning an annotated image.\n show: Shows annotated results on screen.\n save: Saves annotated results to file.\n verbose: Returns a log string for each task, detailing detections and classifications.\n save_txt: Saves detection results to a text file.\n save_crop: Saves cropped detection images.\n tojson: Converts detection results to JSON format.\n\n Examples:\n >>> results = model(\"path/to/image.jpg\")\n >>> for result in results:\n ... print(result.boxes) # Print detection boxes\n ... result.show() # Display the annotated image\n ... result.save(filename=\"result.jpg\") # Save annotated image\n "
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"for result in pred:\n",
|
||||||
|
" boxes = result.boxes # Boxes object for bounding box outputs\n",
|
||||||
|
" masks = result.masks # Masks object for segmentation masks outputs\n",
|
||||||
|
" keypoints = result.keypoints # Keypoints object for pose outputs\n",
|
||||||
|
" probs = result.probs # Probs object for classification outputs\n",
|
||||||
|
" obb = result.obb # Oriented boxes object for OBB outputs\n",
|
||||||
|
" result.show() # display to screen\n",
|
||||||
|
" result.save(filename=\"result.jpg\") # save to disk"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 194,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"tensor([], device='cuda:0', size=(0, 4))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 194,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"pred[0].boxes.xyxy"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
BIN
dataset/1818949000-IMG-20240118-WA0001.jpg
Normal file
BIN
dataset/1818949000-IMG-20240118-WA0001.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 83 KiB |
0
dataset_download.py
Normal file
0
dataset_download.py
Normal file
BIN
output.jpg
Normal file
BIN
output.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 170 KiB |
BIN
yolov8n.pt
Normal file
BIN
yolov8n.pt
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user