Python3.4实现的12306最新验证码识别详解编程语言

    import ssl   
    import json   
    from PIL import Image   
    import urllib   
    import re   
    import urllib.request as urllib2   
    if hasattr(ssl, '_create_unverified_context'):   
        ssl.create_default_context = ssl._create_unverified_context   
    UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36"   
    pic_url = "https://kyfw.12306.cn/otn/passcodeNew/getPassCodeNew?module=login&rand=sjrand&0.21191171556711197"   
    def get_img():   
        resp = urllib2.urlopen(pic_url)   
        raw = resp.read()   
        with open('./tmp.jpg', 'wb') as fp:   
            fp.write(raw)   
        return Image.open('./tmp.jpg')   
    def get_sub_img(im, x, y):   
        assert 0 <= x <= 3   
        assert 0 <= y <= 2   
        WITH = HEIGHT = 68   
        left = 5 + (67 + 5) * x   
        top = 41 + (67 + 5) * y   
        right = left + 67   
        bottom = top + 67   
        return im.crop((left, top, right, bottom))   
    def baidu_stu_lookup(im):   
        url = "http://stu.baidu.com/n/image?fr=html5&needRawImageUrl=true&id=WU_FILE_0&name=233.png&type=image%2Fpng&lastModifiedDate=Mon+Mar+16+2015+20%3A49%3A11+GMT%2B0800+(CST)&size="   
        im.save("./query_temp_img.png")   
        raw = open("./query_temp_img.png", 'rb').read()   
        url = url + str(len(raw))   
        req = urllib2.Request(url, raw, {'Content-Type': 'image/png', 'User-Agent': UA})   
        resp_url = urllib2.urlopen(req).read()   
       
        url = "http://stu.baidu.com/n/searchpc?queryImageUrl=" + urllib2.quote(resp_url)   
        req = urllib2.Request(url, headers={'User-Agent': UA})   
        resp = urllib2.urlopen(req)   
        html = resp.read().decode()   
        return baidu_stu_html_extract(html)   
    def baidu_stu_html_extract(html):   
       
        pattern = re.compile(r"keywords:'(.*?)'")   
        matches = pattern.findall(html)   
        if not matches:   
            return '[UNKOWN]'   
        json_str = matches[0]   
        json_str = json_str.replace('//x22', '"').replace('////', '//')   
        result = [item['keyword'] for item in json.loads(json_str)]   
        return '|'.join(result) if result else '[UNKOWN]'   
    if __name__ == '__main__':   
        im = get_img()   
        for y in range(2):   
            for x in range(4):   
                im2 = get_sub_img(im, x, y)   
                result = baidu_stu_lookup(im2)   
                print((y, x), result)  

改自https://github.com/andelf/fuck12306/blob/master/fuck12306.py

Python 3.4 可用

原创文章,作者:ItWorker,如若转载,请注明出处:https://blog.ytso.com/8206.html

(0)
上一篇 2021年7月18日
下一篇 2021年7月18日

相关推荐

发表回复

登录后才能评论