🎯 Raspberry Pi 5 上使用 EAST + Tesseract 進行即時文字辨識(含 PiCamera2 即時預覽)


🧰 教學前提

項目 說明
📦 裝置 Raspberry Pi 5(安裝 Raspberry Pi OS Bookworm)
📷 相機 原廠相容 Camera Module(需開啟 camera 支援)
🧠 模型 TensorFlow EAST (frozen_east_text_detection.pb)
🔤 OCR Tesseract + pytesseract
🧪 顯示 使用 OpenCV 即時視窗顯示辨識結果

🪛 第一步:安裝必要套件

# 安裝 Tesseract OCR + 開發套件
sudo apt update
sudo apt install -y tesseract-ocr libtesseract-dev

# 安裝 OpenCV 與 PiCamera2 + pytesseract
sudo apt install -y python3-opencv python3-picamera2
pip install pytesseract numpy

📥 第二步:下載 EAST 模型

mkdir -p ~/east_ocr && cd ~/east_ocr
wget https://github.com/oyyd/frozen_east_text_detection.pb/raw/master/frozen_east_text_detection.pb

🧪 第三步:建立主程式 east_tesseract_realtime.py

import cv2
import numpy as np
from picamera2 import Picamera2
import pytesseract

# 載入 EAST 模型
net = cv2.dnn.readNet("frozen_east_text_detection.pb")
layerNames = ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"]

# 啟用 PiCamera2
picam2 = Picamera2()
picam2.preview_configuration.main.size = (640, 480)
picam2.preview_configuration.main.format = "RGB888"
picam2.start()

# 解碼函式:將 scores/geometry 轉成 box + confidence
def decode(scores, geometry, scoreThresh):
    (numRows, numCols) = scores.shape[2:4]
    boxes = []
    confidences = []
    for y in range(numRows):
        scoresData = scores[0, 0, y]
        x0 = geometry[0, 0, y]
        x1 = geometry[0, 1, y]
        x2 = geometry[0, 2, y]
        x3 = geometry[0, 3, y]
        anglesData = geometry[0, 4, y]
        for x in range(numCols):
            if scoresData[x] < scoreThresh:
                continue
            offsetX = x * 4.0
            offsetY = y * 4.0
            angle = anglesData[x]
            cos = np.cos(angle)
            sin = np.sin(angle)
            h = x0[x] + x2[x]
            w = x1[x] + x3[x]
            endX = int(offsetX + cos * x1[x] + sin * x2[x])
            endY = int(offsetY - sin * x1[x] + cos * x2[x])
            startX = int(endX - w)
            startY = int(endY - h)
            boxes.append([startX, startY, endX, endY])
            confidences.append(float(scoresData[x]))
    return boxes, confidences

# 主循環
print("📸 EAST + Tesseract OCR 啟動中,按 'q' 離開")
while True:
    frame = picam2.capture_array()
    orig = frame.copy()
    (H, W) = frame.shape[:2]
    newW, newH = (320, 320)
    rW, rH = W / float(newW), H / float(newH)

    resized = cv2.resize(frame, (newW, newH))
    blob = cv2.dnn.blobFromImage(resized, 1.0, (newW, newH),
                                 (123.68, 116.78, 103.94), swapRB=True, crop=False)
    net.setInput(blob)
    (scores, geometry) = net.forward(layerNames)
    (boxes, confidences) = decode(scores, geometry, 0.5)

    rects = []
    for (startX, startY, endX, endY) in boxes:
        rects.append([startX, startY, endX - startX, endY - startY])
    indices = cv2.dnn.NMSBoxes(rects, confidences, 0.5, 0.4)

    if len(indices) > 0:
        for i in indices.flatten():
            (startX, startY, endX, endY) = boxes[i]
            startX = int(startX * rW)
            startY = int(startY * rH)
            endX = int(endX * rW)
            endY = int(endY * rH)

            roi = orig[startY:endY, startX:endX]
            if roi.shape[0] > 0 and roi.shape[1] > 0:
                gray_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
                text = pytesseract.image_to_string(gray_roi, config="--psm 6")
                text = text.strip()
            else:
                text = ""

            # 顯示結果
            cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 255, 0), 2)
            if text:
                cv2.putText(orig, text, (startX, startY - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)

    cv2.imshow("EAST + Tesseract OCR", orig)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cv2.destroyAllWindows()
picam2.stop()

▶️ 第四步:執行程式

cd ~/east_ocr
python3 east_tesseract_realtime.py

q 可結束程式。


✅ 成果預覽

功能 效果
📷 即時畫面 來自 PiCamera2
🟩 綠色框 EAST 偵測到的文字區域
🔤 紅色字 Tesseract 辨識出的文字內容
🔚 離開方式 q 結束程式

🎁 Bonus:可擴充功能建議

功能 實作方式
多語言辨識 pytesseract.image_to_string(..., lang="chi_tra+eng")
語音播報 使用 pyttsx3espeak
自動截圖 對每個有文字的 ROI 儲存為 img_{timestamp}.jpg
串流上網頁 搭配 Flask + OpenCV 打造 Web 影像伺服器

如需我再寫成 Flask API 或加入語音讀出,歡迎提出需求,我可再幫你擴寫第二階段應用教學!

文章標籤
全站熱搜
創作者介紹
創作者 liusming 的頭像
liusming

劉老師的跨域創想工坊

liusming 發表在 痞客邦 留言(0) 人氣(23)