🎯 教學目標
-
使用
PiCamera2擷取即時影像 -
用 OpenCV 的
EAST模型偵測文字位置 -
在畫面上即時顯示偵測框
🧰 環境準備
✅ 安裝必要套件
sudo apt update
sudo apt install python3-opencv python3-picamera2 libatlas-base-dev -y
pip install numpy
✅ 下載 EAST 模型(frozen graph)
mkdir -p ~/east_cam && cd ~/east_cam wget https://github.com/oyyd/frozen_east_text_detection.pb/raw/master/frozen_east_text_detection.pb -O frozen_east_text_detection.pb
🧪 主程式:east_cam.py
import cv2
import numpy as np
from picamera2 import Picamera2
# 初始化 EAST 模型
net = cv2.dnn.readNet("frozen_east_text_detection.pb")
# EAST 模型輸出層
layerNames = [
"feature_fusion/Conv_7/Sigmoid",
"feature_fusion/concat_3"
]
# 初始化 PiCamera2
picam2 = Picamera2()
picam2.preview_configuration.main.size = (640, 480)
picam2.preview_configuration.main.format = "RGB888"
picam2.start()
# 解碼函式
def decode(scores, geometry, scoreThresh):
(numRows, numCols) = scores.shape[2:4]
boxes = []
confidences = []
for y in range(numRows):
scoresData = scores[0, 0, y]
x0 = geometry[0, 0, y]
x1 = geometry[0, 1, y]
x2 = geometry[0, 2, y]
x3 = geometry[0, 3, y]
anglesData = geometry[0, 4, y]
for x in range(numCols):
if scoresData[x] < scoreThresh:
continue
offsetX = x * 4.0
offsetY = y * 4.0
angle = anglesData[x]
cos = np.cos(angle)
sin = np.sin(angle)
h = x0[x] + x2[x]
w = x1[x] + x3[x]
endX = int(offsetX + cos * x1[x] + sin * x2[x])
endY = int(offsetY - sin * x1[x] + cos * x2[x])
startX = int(endX - w)
startY = int(endY - h)
boxes.append([startX, startY, endX, endY])
confidences.append(float(scoresData[x]))
return boxes, confidences
print("📸 PiCamera2 + EAST 文字偵測開始中,按 'q' 鍵結束")
while True:
frame = picam2.capture_array()
orig = frame.copy()
(H, W) = frame.shape[:2]
# Resize 影像為 EAST 輸入(320x320)
newW, newH = (320, 320)
rW, rH = W / float(newW), H / float(newH)
resized = cv2.resize(frame, (newW, newH))
blob = cv2.dnn.blobFromImage(resized, 1.0, (newW, newH),
(123.68, 116.78, 103.94), swapRB=True, crop=False)
net.setInput(blob)
(scores, geometry) = net.forward(layerNames)
(boxes, confidences) = decode(scores, geometry, 0.5)
# 轉換為 [x, y, w, h] 格式以符合 NMSBoxes
rects = []
for (startX, startY, endX, endY) in boxes:
rects.append([startX, startY, endX - startX, endY - startY])
indices = cv2.dnn.NMSBoxes(rects, confidences, score_threshold=0.5, nms_threshold=0.4)
# 畫出偵測到的文字框
if len(indices) > 0:
for i in indices.flatten():
(startX, startY, endX, endY) = boxes[i]
startX = int(startX * rW)
startY = int(startY * rH)
endX = int(endX * rW)
endY = int(endY * rH)
cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 255, 0), 2)
cv2.imshow("EAST Text Detection (PiCamera2)", orig)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cv2.destroyAllWindows()
picam2.stop()
▶️ 執行方式
cd ~/east_cam
python3 east_cam.py
按下 q 鍵可退出。
📌 說明重點
| 元件 | 用途 |
|---|---|
cv2.dnn.readNet() |
載入 EAST 模型 |
cv2.dnn.blobFromImage() |
前處理圖像 |
feature_fusion 輸出層 |
提供分數與幾何資訊 |
decode() 函式 |
從輸出中還原出框框位置 |
NMSBoxes() |
避免重複偵測的框框 |
文章標籤
全站熱搜
