machine learnning - identification verify code image

Scroll Down

identify verify code image

machine learnning - identification verify code image - [opencv to identify string, machine learning to identify what string]


from the last year i still want to traning a pretty good model to identify the verify code image, we can see the currently we can found the code from website or from github, most way they use the fixture size and fixture string number to trainning model. this way i think not pretty, i want to create model to identify the verify code from any size image any numbers string.

main framework


trainning the model to identify code string at where in image, here we can use the opencv to create CascadeClass model, we can use the main String image and some nose image to trainning the model, it is easy.


we know the image no possible only has the string image, the image must have nose line or point. the first we should to remove these noise and interference point, here we can use opencv function: algorithm like this

def verification_code_recognition(image):
    # 二值化图像
    gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)  # 转灰度图
    ret, binary = cv.threshold(gray, 0, 255, cv.THRESH_BINARY_INV | cv.THRESH_OTSU)  # 二值化
    cv.imshow("binary", binary)

    # 形态学操作
    kernel = cv.getStructuringElement(cv.MORPH_RECT, (4, 4))
    bin_af = cv.morphologyEx(binary, cv.MORPH_OPEN, kernel=kernel)
    cv.imshow("bin_af", bin_af)

    textImage = Image.fromarray(bin_af)
    text = tess.image_to_string(textImage)

    print("The result:", text)


we can throug the model that befor we trained cascadeclass model to select where the string at. and copy that select to trainning the identify string model.


I think this will be pretty good, I will try later, Continually updated……

talk is cheep, let me show you code

generate image to trainning yolov5 model to detect capcha string

# -*- coding:utf-8 -*
import os.path

import cv2
import numpy as np
import string
from random import randint, choice
from captcha.image import ImageCaptcha
from matplotlib import pyplot as plt

class GenYoloV5Img:
    def __init__(self, img_save_dir='./images', label_save_dir='./labels', save_name='1'):
        self.char_num = 1
        self.characters = string.digits + string.ascii_uppercase + string.ascii_lowercase
        self.img_save_dir = img_save_dir
        self.label_save_dir = label_save_dir
        self.classes = ['captcha_string']
        self.save_name = str(save_name)

    def _create_classes_txt(self):
        classes_txt_path = os.path.join(os.path.abspath(self.label_save_dir), 'classes.txt')
        if not os.path.exists(classes_txt_path):
            with open(classes_txt_path, 'w') as f:
                for i in self.classes:

    def detect_boundary(x: np.array):
        boundary_dic = {}
        for index, value in enumerate(x):
            if value > 0:
                boundary_dic.update({'start': index-1})
        for index, value in enumerate(x[::-1]):
            if value > 0:
                boundary_dic.update({'end': len(x) - index - 1})
        return max(boundary_dic['start'], 0), max(boundary_dic['end'], 0)

    def gen_cap_img(self):
        im = ImageCaptcha(randint(50, 150), randint(50, 250))
        img = im.create_captcha_image(chars=choice(self.characters), color=self.rgb_random(),

        # plt.figure(figsize=(3, 3))
        # plt.imshow(img)
        # plt.pause(0.1)

        img_np = cv2.imread('test.png')
        gray = cv2.cvtColor(img_np, cv2.COLOR_BGR2GRAY)  # 二值化
        ret, img_np = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)  # 二值化
        # cv2.imshow("binary", binary)

        sum_x = img_np.sum(axis=0)
        sum_y = img_np.sum(axis=1)

        x_start, x_end = self.detect_boundary(sum_x)
        y_start, y_end = self.detect_boundary(sum_y)

        # for row in range(x_start, x_end + 1, x_end - x_start):
        #     for columns in range(y_start, y_end + 1, y_end - y_start):
        #         img_np[columns, row] = 255

        self.create_yolo_label_txt(x_start, x_end, y_start, y_end, img_np.shape[1], img_np.shape[0])

        # plt.imshow(img_np)
        # plt.pause(0.1)

        img = im.create_noise_dots(image=img, color=self.rgb_random(), width=randint(0, 5),
                                   number=randint(0, 80))
        img = im.create_noise_curve(image=img, color=self.rgb_random())
        img_save_path = os.path.join(os.path.abspath(self.img_save_dir), f'{self.save_name}.png')
        # plt.imshow(img)
        # plt.pause(0.1)

    def rgb_random():
        rgb_random = (randint(0, 255),
                      randint(0, 255),
                      randint(0, 255))
        return rgb_random

    def create_yolo_label_txt(self, x_start, x_end, y_start, y_end, width, height):
        center_point_x = (x_start + (x_end - x_start) / 2) / width
        center_point_y = (y_start + (y_end - y_start) / 2) / height
        string_width_rate = (x_end - x_start) / width
        string_height_rate = (y_end - y_start) / height
        label_save_path = os.path.join(os.path.abspath(self.label_save_dir), f'{self.save_name}.txt')
        with open(label_save_path, 'w') as f:
            f.write(f'0 {center_point_x} {center_point_y} {string_width_rate} {string_height_rate}')

if __name__ == '__main__':
    te = GenYoloV5Img(img_save_dir='captchas/images/train22424',
    # plt.ion()
    for i in range(3000):
        te.save_name = i

ok, let's start training

def parse_opt(known=False):
    parser = argparse.ArgumentParser()
    parser.add_argument('--weights', type=str, default=ROOT / '', help='initial weights path')
    parser.add_argument('--cfg', type=str, default='', help='model.yaml path')
    parser.add_argument('--data', type=str, default=ROOT / 'data/captcha_string.yaml', help='dataset.yaml path')
    parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path')
    parser.add_argument('--epochs', type=int, default=300)
    parser.add_argument('--batch-size', type=int, default=5, help='total batch size for all GPUs, -1 for autobatch')
    parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)')
    parser.add_argument('--rect', action='store_true', help='rectangular training')
    parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')
    parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
    parser.add_argument('--noval', action='store_true', help='only validate final epoch')
    parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor')
    parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations')
    parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
    parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"')
    parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
    parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
    parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class')
    parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer')
    parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
    parser.add_argument('--workers', type=int, default=2, help='max dataloader workers (per RANK in DDP mode)')
    parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name')
    parser.add_argument('--name', default='exp', help='save to project/name')
    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
    parser.add_argument('--quad', action='store_true', help='quad dataloader')
    parser.add_argument('--cos-lr', action='store_true', help='cosine LR scheduler')
    parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon')
    parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)')
    parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2')
    parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)')
    parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify')

    # Weights & Biases arguments
    parser.add_argument('--entity', default=None, help='W&B: Entity')
    parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='W&B: Upload data, "val" option')
    parser.add_argument('--bbox_interval', type=int, default=-1, help='W&B: Set bounding-box image logging interval')
    parser.add_argument('--artifact_alias', type=str, default='latest', help='W&B: Version of dataset artifact to use')

    opt = parser.parse_known_args()[0] if known else parser.parse_args()

the trainning start from two days ago, look like the process will finish after ten days, so ,let's watting the model performance.
i use the apple M1 chip, have to say it's too slowlly!!!