🎯 Raspberry Pi 5 上使用 EAST + Tesseract 進行即時文字辨識(含 PiCamera2 即時預覽)
🧰 教學前提
| 項目 | 說明 |
|---|---|
| 📦 裝置 | Raspberry Pi 5(安裝 Raspberry Pi OS Bookworm) |
| 📷 相機 | 原廠相容 Camera Module(需開啟 camera 支援) |
| 🧠 模型 | TensorFlow EAST (frozen_east_text_detection.pb) |
| 🔤 OCR | Tesseract + pytesseract |
| 🧪 顯示 | 使用 OpenCV 即時視窗顯示辨識結果 |
🪛 第一步:安裝必要套件
# 安裝 Tesseract OCR + 開發套件
sudo apt update
sudo apt install -y tesseract-ocr libtesseract-dev
# 安裝 OpenCV 與 PiCamera2 + pytesseract
sudo apt install -y python3-opencv python3-picamera2
pip install pytesseract numpy
📥 第二步:下載 EAST 模型
mkdir -p ~/east_ocr && cd ~/east_ocr
wget https://github.com/oyyd/frozen_east_text_detection.pb/raw/master/frozen_east_text_detection.pb
🧪 第三步:建立主程式 east_tesseract_realtime.py
import cv2
import numpy as np
from picamera2 import Picamera2
import pytesseract
# 載入 EAST 模型
net = cv2.dnn.readNet("frozen_east_text_detection.pb")
layerNames = ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"]
# 啟用 PiCamera2
picam2 = Picamera2()
picam2.preview_configuration.main.size = (640, 480)
picam2.preview_configuration.main.format = "RGB888"
picam2.start()
# 解碼函式:將 scores/geometry 轉成 box + confidence
def decode(scores, geometry, scoreThresh):
(numRows, numCols) = scores.shape[2:4]
boxes = []
confidences = []
for y in range(numRows):
scoresData = scores[0, 0, y]
x0 = geometry[0, 0, y]
x1 = geometry[0, 1, y]
x2 = geometry[0, 2, y]
x3 = geometry[0, 3, y]
anglesData = geometry[0, 4, y]
for x in range(numCols):
if scoresData[x] < scoreThresh:
continue
offsetX = x * 4.0
offsetY = y * 4.0
angle = anglesData[x]
cos = np.cos(angle)
sin = np.sin(angle)
h = x0[x] + x2[x]
w = x1[x] + x3[x]
endX = int(offsetX + cos * x1[x] + sin * x2[x])
endY = int(offsetY - sin * x1[x] + cos * x2[x])
startX = int(endX - w)
startY = int(endY - h)
boxes.append([startX, startY, endX, endY])
confidences.append(float(scoresData[x]))
return boxes, confidences
# 主循環
print("📸 EAST + Tesseract OCR 啟動中,按 'q' 離開")
while True:
frame = picam2.capture_array()
orig = frame.copy()
(H, W) = frame.shape[:2]
newW, newH = (320, 320)
rW, rH = W / float(newW), H / float(newH)
resized = cv2.resize(frame, (newW, newH))
blob = cv2.dnn.blobFromImage(resized, 1.0, (newW, newH),
(123.68, 116.78, 103.94), swapRB=True, crop=False)
net.setInput(blob)
(scores, geometry) = net.forward(layerNames)
(boxes, confidences) = decode(scores, geometry, 0.5)
rects = []
for (startX, startY, endX, endY) in boxes:
rects.append([startX, startY, endX - startX, endY - startY])
indices = cv2.dnn.NMSBoxes(rects, confidences, 0.5, 0.4)
if len(indices) > 0:
for i in indices.flatten():
(startX, startY, endX, endY) = boxes[i]
startX = int(startX * rW)
startY = int(startY * rH)
endX = int(endX * rW)
endY = int(endY * rH)
roi = orig[startY:endY, startX:endX]
if roi.shape[0] > 0 and roi.shape[1] > 0:
gray_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
text = pytesseract.image_to_string(gray_roi, config="--psm 6")
text = text.strip()
else:
text = ""
# 顯示結果
cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 255, 0), 2)
if text:
cv2.putText(orig, text, (startX, startY - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
cv2.imshow("EAST + Tesseract OCR", orig)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cv2.destroyAllWindows()
picam2.stop()
▶️ 第四步:執行程式
cd ~/east_ocr
python3 east_tesseract_realtime.py
按 q 可結束程式。
✅ 成果預覽
| 功能 | 效果 |
|---|---|
| 📷 即時畫面 | 來自 PiCamera2 |
| 🟩 綠色框 | EAST 偵測到的文字區域 |
| 🔤 紅色字 | Tesseract 辨識出的文字內容 |
| 🔚 離開方式 | 按 q 結束程式 |
🎁 Bonus:可擴充功能建議
| 功能 | 實作方式 |
|---|---|
| 多語言辨識 | pytesseract.image_to_string(..., lang="chi_tra+eng") |
| 語音播報 | 使用 pyttsx3 或 espeak |
| 自動截圖 | 對每個有文字的 ROI 儲存為 img_{timestamp}.jpg |
| 串流上網頁 | 搭配 Flask + OpenCV 打造 Web 影像伺服器 |
如需我再寫成 Flask API 或加入語音讀出,歡迎提出需求,我可再幫你擴寫第二階段應用教學!
文章標籤
全站熱搜
