From c082b525b5e501dfa24038e3a85e444d88bfb1d0 Mon Sep 17 00:00:00 2001
From: admin <weikou2014>
Date: 星期五, 28 四月 2023 19:33:15 +0800
Subject: [PATCH] 开盘啦采集工具/看盘页面优化

---
 opencv_util.py |  240 ++++++++++++++++++++++++++++++++++++++++++++++++++++--------
 1 files changed, 208 insertions(+), 32 deletions(-)

diff --git a/opencv_util.py b/opencv_util.py
index d99fc5b..c87fe45 100644
--- a/opencv_util.py
+++ b/opencv_util.py
@@ -1,69 +1,245 @@
 # 浜屽�煎寲鍥惧儚
+import os
+import random
+
 import cv2
+import matplotlib.pyplot as plt
+import numpy
+
+SHOW_PLT = False
 
 
 def gray_img(img):
+    if img.ndim == 2:
+        return img
     result = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
     return result
 
 
-def is_col_empty(img, col, thresh_hold=0):
-    for r in img:
-        if r[col] > thresh_hold:
+def is_col_empty(img, col, start_row, end_row, thresh_hold=0):
+    for r in range(start_row, end_row + 1):
+        if img[r][col] > thresh_hold:
             return False
     return True
 
 
-def is_col_full(img, col, thresh_hold_start=64, thresh_hold_end=255):
+def is_col_full(img, col, start_row, end_row, thresh_hold_start=64, thresh_hold_end=255, thresh_empty_count=1):
     rows, cols = img.shape
-    for r in img:
-        if r[col] < thresh_hold_start or r[col] > thresh_hold_end:
+    empty_count = 0
+    for r in range(start_row, end_row + 1):
+        if img[r][col] < thresh_hold_start or img[r][col] > thresh_hold_end:
+            empty_count += 1
+            if empty_count > thresh_empty_count:
+                return False
+    return True
+
+
+# 琛屾槸鍚︽槸婊$殑
+def is_row_full(img, row, start_col, end_col, thresh_hold_start=64, thresh_hold_end=255):
+    rows, cols = img.shape
+    for c in range(start_col, end_col + 1):
+        if img[row][c] < thresh_hold_start or img[row][c] > thresh_hold_end:
             return False
     return True
+
+
+def format_num_img(img):
+    # 鍏堣皟鏁翠负鏂瑰舰
+    rows, cols = img.shape
+    img_new = numpy.zeros((max(rows, cols), max(rows, cols)), numpy.uint8)
+    img_new.fill(0)
+    start_col = (max(rows, cols) - cols) // 2
+    if rows > cols:
+        for r in range(rows):
+            for c in range(cols):
+                img_new[r][c + start_col] = img[r][c]
+    else:
+        pass
+    # 璁剧疆涓�28X28鍍忕礌
+    return cv2.resize(img_new, (28, 28), interpolation=cv2.INTER_AREA)
+
+
+# 鍒嗛殧鏁板瓧
+def split_nums_img(img):
+    # 鑾峰彇浠g爜鐨勬渶澶ц壊鍊�
+    rows, cols = img.shape
+    max_color = 0
+    for r in range(0, rows):
+        for c in range(0, cols):
+            if img[r][c] > max_color:
+                max_color = img[r][c]
+
+    start_index = -1
+    end_index = -1
+    codes_pos = []
+    for col in range(0, cols):
+        if not is_col_empty(img, col, 0, rows - 1, max_color * 2 // 3):
+            if start_index < 0:
+                start_index = col
+            end_index = col
+        else:
+            if end_index >= 0 and start_index >= 0:
+                codes_pos.append((start_index, end_index + 1))
+                end_index = -1
+                start_index = -1
+    img_detail = []
+    for i in range(len(codes_pos)):
+        temp_img = img[0:rows - 1, codes_pos[i][0]:codes_pos[i][1]]
+        temp_img = format_num_img(temp_img)
+        img_detail.append(temp_img)
+    return img_detail
 
 
 # 鍒嗙鍚岃姳椤虹殑浠g爜
 def clip_ths_code_area(img):
     img = gray_img(img)
     rows, cols = img.shape
-    end_col = cols - 1
-    for col in range(0, cols, 1):
-        if is_col_full(img, col, 38, 38):
+
+    # 琛屽垎闅�
+    full_row = -1
+    start_row = -1
+    end_row = -1
+    for r in range(0, rows, 1):
+        if is_row_full(img, r, 0, cols // 2, 38, 38):
             # print("鎵惧埌鍒嗛殧锛�", col)
-            end_col = col - 1
+            full_row = r
+            if start_row >= 0 and r - start_row > 10:
+                end_row = r - 1
+                break
+
+        else:
+            if full_row > -1:
+                full_row = -1
+                if start_row < 0:
+                    start_row = r
+    if end_row < 0:
+        end_row = rows - 1
+
+    if start_row < 0:
+        raise Exception("娌℃壘鍒颁笂鍒嗗壊绾�")
+
+    end_col = cols - 1
+    for c in range(cols - 1, -1, -1):
+        if is_col_full(img, c, start_row, end_row, 38, 38, 2):
+            # print("鎵惧埌鍒嗛殧锛�", col)
+            end_col = c - 1
             break
     # 寰�鍓嶆壘鏁板瓧鍒嗛殧
     content_start = -1
     empty_start = -1
     empty_end = -1
-    start_index = 0
-    end_index = end_col
+    start_index = -1
+    end_index = -1
+    codes_pos = []
+    # 鑾峰彇浠g爜鐨勬渶澶ц壊鍊�
+    max_color = 0
+    for r in range(start_row, end_row + 1):
+        for c in range(end_col, end_col // 2, -1):
+            if img[r][c] > max_color:
+                max_color = img[r][c]
+
     for col in range(end_col, 0, -1):
-        if not is_col_empty(img, col):
-            if content_start < 0:
-                content_start = col
-            empty_start = -1
-            empty_end = -1
+        if not is_col_empty(img, col, start_row, end_row, max_color * 2 // 3):
+            if start_index < 0:
+                start_index = col
+            end_index = col
         else:
-            if empty_start < 0:
-                empty_start = col
-                empty_end = col
-            else:
-                empty_end = col
-            if empty_start - empty_end > 2 and content_start > 0:
-                start_index = col + (empty_start - empty_end + 1)
-                end_index = content_start
-                # print("浠g爜鑼冨洿锛�", start_index, end_index)
-                break
-    clip_img = img[0:rows, start_index:end_index]
+            if end_index >= 0 and start_index >= 0:
+                codes_pos.append((end_index - 1, start_index + 1))
+                end_index = -1
+                start_index = -1
+    codes_pos = codes_pos[:6]
+    codes_pos.sort(key=lambda x: x[0])
+
+    if SHOW_PLT:
+        plt.figure(figsize=(10, 4))
+    img_detail = []
+    for i in range(len(codes_pos)):
+        if SHOW_PLT:
+            plt.subplot(2, 5, i + 1)
+            plt.title(f'pred {i}')
+            plt.axis('off')
+        temp_img = img[start_row:end_row, codes_pos[i][0]:codes_pos[i][1]]
+        temp_img = format_num_img(temp_img)
+        img_detail.append(temp_img)
+        if SHOW_PLT:
+            plt.imshow(temp_img, cmap='gray')
+    if SHOW_PLT:
+        plt.show()
+
+    clip_img = img[start_row:end_row, codes_pos[0][0]:codes_pos[-1][1]]
     # cv2.imwrite("test1.png", clip_img)
-    return clip_img
+    return clip_img, img_detail
 
     # print(clip_img.shape)
     # ret1, p1 = cv2.threshold(src=clip_img, thresh=100, maxval=255, type=cv2.THRESH_BINARY)
     # cv2.imwrite("D:/workspace/GP/trade_desk/test3.png", p1)
 
 
-if __name__ == "__main__":
-    img = gray_img(cv2.imread("C:\\Users\\Administrator\\Desktop\\test.png"))
-    cv2.imwrite("C:\\Users\\Administrator\\Desktop\\test_gray.png", img)
+def __test4():
+    files = os.listdir("datas/test4/")
+    for file in files:
+        code = file[:-4]
+        img = cv2.imread(f"datas/test4/{file}", cv2.IMREAD_GRAYSCALE)
+        img_details = split_nums_img(img)
+        for d in range(0, len(img_details)):
+            cv2.imwrite(f"C:/Users/Administrator/Desktop/ocr/codes/{code}_{random.randint(0, 100000)}.png",
+                        img_details[d])
+        plt.figure(figsize=(10, 4))
+        for i in range(0, len(img_details)):
+            plt.subplot(2, 5, i + 1)
+            plt.title(f'pred {i}')
+            plt.axis('off')
+            plt.imshow(img_details[i], cmap='gray')
+        plt.show()
+
+
+def __test3():
+    files = os.listdir("datas/test3/")
+    for file in files:
+        code = file[:-4]
+        img = cv2.imread(f"datas/test3/{file}", cv2.IMREAD_GRAYSCALE)
+        rows, cols = img.shape
+        for r in range(rows):
+            for c in range(cols):
+                img[r][c] = 255 - img[r][c]
+
+        img_details = split_nums_img(img)
+        for d in range(0, len(img_details)):
+            cv2.imwrite(f"C:/Users/Administrator/Desktop/ocr/codes/{code}_{random.randint(0, 100000)}.png",
+                        img_details[d])
+        plt.figure(figsize=(10, 4))
+        for i in range(0, len(img_details)):
+            plt.subplot(2, 5, i + 1)
+            plt.title(f'pred {i}')
+            plt.axis('off')
+            plt.imshow(img_details[i], cmap='gray')
+        plt.show()
+
+
+if __name__ == '__main__':
+    __test4()
+
+if __name__ == "__main__1":
+    #
+    files = os.listdir("datas/test/")
+    for file in files:
+        code = file[:6]
+        img = cv2.imread(f"datas/test/{file}", cv2.IMREAD_GRAYSCALE)
+        img, img_details = clip_ths_code_area(img)
+        for d in range(0, len(img_details)):
+            cv2.imwrite(f"C:/Users/Administrator/Desktop/ocr/codes/{code}_{random.randint(0, 100000)}.png",
+                        img_details[d])
+    if SHOW_PLT:
+        plt.figure(figsize=(1, 1))
+        plt.subplot(1, 1, 1)
+        plt.title(f"test")
+        plt.axis('off')
+        plt.imshow(img)
+        plt.show()
+    pass
+    # img = gray_img(cv2.imread("C:\\Users\\Administrator\\Desktop\\ocr\\code_test.png"))
+    # h = img.shape[0]
+    # w = img.shape[1]
+    # img = cv2.resize(img, (int(w * 2), int(h * 2)), interpolation=cv2.INTER_AREA)
+    # cv2.imwrite("C:\\Users\\Administrator\\Desktop\\ocr\\code_test_gray.png", img)

--
Gitblit v1.8.0