🎯 教學目標

  • 使用 PiCamera2 擷取即時影像

  • 用 OpenCV 的 EAST 模型偵測文字位置

  • 在畫面上即時顯示偵測框


🧰 環境準備

✅ 安裝必要套件

sudo apt update
sudo apt install python3-opencv python3-picamera2 libatlas-base-dev -y
pip install numpy

✅ 下載 EAST 模型(frozen graph)

mkdir -p ~/east_cam && cd ~/east_cam
wget https://github.com/oyyd/frozen_east_text_detection.pb/raw/master/frozen_east_text_detection.pb -O frozen_east_text_detection.pb


🧪 主程式:east_cam.py

import cv2
import numpy as np
from picamera2 import Picamera2

# 初始化 EAST 模型
net = cv2.dnn.readNet("frozen_east_text_detection.pb")

# EAST 模型輸出層
layerNames = [
    "feature_fusion/Conv_7/Sigmoid",
    "feature_fusion/concat_3"
]

# 初始化 PiCamera2
picam2 = Picamera2()
picam2.preview_configuration.main.size = (640, 480)
picam2.preview_configuration.main.format = "RGB888"
picam2.start()

# 解碼函式
def decode(scores, geometry, scoreThresh):
    (numRows, numCols) = scores.shape[2:4]
    boxes = []
    confidences = []

    for y in range(numRows):
        scoresData = scores[0, 0, y]
        x0 = geometry[0, 0, y]
        x1 = geometry[0, 1, y]
        x2 = geometry[0, 2, y]
        x3 = geometry[0, 3, y]
        anglesData = geometry[0, 4, y]

        for x in range(numCols):
            if scoresData[x] < scoreThresh:
                continue

            offsetX = x * 4.0
            offsetY = y * 4.0
            angle = anglesData[x]
            cos = np.cos(angle)
            sin = np.sin(angle)
            h = x0[x] + x2[x]
            w = x1[x] + x3[x]
            endX = int(offsetX + cos * x1[x] + sin * x2[x])
            endY = int(offsetY - sin * x1[x] + cos * x2[x])
            startX = int(endX - w)
            startY = int(endY - h)

            boxes.append([startX, startY, endX, endY])
            confidences.append(float(scoresData[x]))

    return boxes, confidences

print("📸 PiCamera2 + EAST 文字偵測開始中,按 'q' 鍵結束")

while True:
    frame = picam2.capture_array()
    orig = frame.copy()
    (H, W) = frame.shape[:2]

    # Resize 影像為 EAST 輸入(320x320)
    newW, newH = (320, 320)
    rW, rH = W / float(newW), H / float(newH)

    resized = cv2.resize(frame, (newW, newH))
    blob = cv2.dnn.blobFromImage(resized, 1.0, (newW, newH),
                                 (123.68, 116.78, 103.94), swapRB=True, crop=False)
    net.setInput(blob)
    (scores, geometry) = net.forward(layerNames)
    (boxes, confidences) = decode(scores, geometry, 0.5)

    # 轉換為 [x, y, w, h] 格式以符合 NMSBoxes
    rects = []
    for (startX, startY, endX, endY) in boxes:
        rects.append([startX, startY, endX - startX, endY - startY])

    indices = cv2.dnn.NMSBoxes(rects, confidences, score_threshold=0.5, nms_threshold=0.4)

    # 畫出偵測到的文字框
    if len(indices) > 0:
        for i in indices.flatten():
            (startX, startY, endX, endY) = boxes[i]
            startX = int(startX * rW)
            startY = int(startY * rH)
            endX = int(endX * rW)
            endY = int(endY * rH)
            cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 255, 0), 2)

    cv2.imshow("EAST Text Detection (PiCamera2)", orig)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cv2.destroyAllWindows()
picam2.stop()

▶️ 執行方式

cd ~/east_cam
python3 east_cam.py

按下 q 鍵可退出。


📌 說明重點

元件 用途
cv2.dnn.readNet() 載入 EAST 模型
cv2.dnn.blobFromImage() 前處理圖像
feature_fusion 輸出層 提供分數與幾何資訊
decode() 函式 從輸出中還原出框框位置
NMSBoxes() 避免重複偵測的框框

文章標籤
全站熱搜
創作者介紹
創作者 liusming 的頭像
liusming

劉老師的跨域創想工坊

liusming 發表在 痞客邦 留言(0) 人氣(19)