0. 环境
iOS 10 + MacOS 10.12 + Python 2.7
1. 思路
APP界面中弹出的题的位置和答案的位置都是固定的,因此我们可以将手机屏幕想办法投到电脑屏幕上,通过OCR识别指定区域,实时打开搜索引擎界面搜索问题,甚至匹配答案。
2. 关键步骤
2.1 投屏
我是iPhone 5s + Mac电脑,可以用Mac的Quicktime Player播放器的屏幕录制功能(安卓据说可以用ADB)。
具体的,打开Quicktime Player后,点击“文件–新建屏幕录制–(红色录制按钮旁的下拉菜单)选择从手机录制”,这时,手机屏幕就实时投到屏幕上了。
2.2 截屏和OCR
- 截屏
截屏要将你的手机投屏窗口固定在一个位置,找准屏幕上的左上角和右下角两个坐标,利用PIL中的ImageGrab进行抓屏,以截取题干为例,代码如下:
from PIL import ImageGrab
image = ImageGrab.grab((50, 170, 540, 330))
- OCR
利用tesseract库和对应的pytesseract接口进行OCR,具体配置可以参考[1]。
import pytesseract
ocr_str = pytesseract.image_to_string(image, lang='chi_sim')
2.3 搜索
分两种思路,我们可以直接打开一个浏览器页面用百度搜索,把答案筛选工作交给人:
import webbrowser
url = "http://www.baidu.com/s?rn=50&wd=" + ocr_str.encode(encoding='UTF-8',errors='strict')
webbrowser.open_new_tab(url)
也可以将搜索结果页面下载下来用选项字符串匹配,统计该出现的次数(当然,第二种方法需要增加一次识别答案字符串的OCR过程):
# 以统计答案1出现的次数为例
import urllib
res = urllib.urlopen(url).read()
o1cnt = res.count(o1_ocr.encode(encoding='UTF-8'))
3. 优化
3.1 分词
尤其是在我们用选项字符串匹配下载下来的搜索页面文本时,很可能匹配数很少,这是由于正确答案不一定一字不差地藏在搜索文本中,我想到的更好的方法就是进行分词,然后匹配出现的次数。这里用到了jieba分词的python接口。这时我们就应该将2.3节中的第二段代码改为如下:
import jieba
import urllib
res = urllib.urlopen(url).read()
o1cnt = 0
for i in o1str_c:
o1str_c += jieba.cut_for_search(o1str)
3.2 多进程并行
游戏只有10秒钟,而且题干是从左到右滚动出现的,所以留给我们计算的时间只有8秒左右,时间十分重要,利用line profiler工具,发现最耗时的部分出现在OCR部分(2秒左右),如果要进行选项匹配,需要2次截屏和OCR,所以,想到可以用2个线程将两次OCR并行,将选项的OCR放到另一个子线程中,在最后进行字符串匹配时进行同步。
def options(q):
o = ImageGrab.grab((60, 395, 380, 640))
ostr = pytesseract.image_to_string(o, lang='chi_sim').encode(encoding='UTF-8',errors='strict')
ostr_l = ostr.split('\n')
q.put(ostr_l)
def main():
# ... 其他初始化
q = Queue(maxsize = 10)
o_p = Process(target = options, args = (q, ))
o_p.start()
# ... 识别题干的OCR和下载搜索结果页面
o_p.join()
ostr_l = q.get()
# ... 进行选项字符串和搜索结果页面字符串的匹配统计
另外,如果我们还同时打开浏览器页面用于肉眼搜索,打开浏览器也是挺耗时的(0.7秒左右),我们可以将其放到一个子进程中进行。
至此,我们将原先7秒左右可以运行完的程序,优化到了4秒左右,还能留下3秒钟供我们考虑到底选哪个。
实现代码,仅供参考:
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/python | |
# -*- coding: utf-8 -*- | |
import pytesseract | |
from PIL import Image | |
from PIL import ImageGrab | |
import webbrowser | |
import time | |
import jieba | |
import urllib | |
import threading | |
from multiprocessing import Process, Queue | |
#DEBUG = True | |
DEBUG = False | |
#CUT = False | |
CUT = True | |
def start_browser(s): | |
#pass | |
webbrowser.open_new_tab(s) | |
def options(q): | |
o = ImageGrab.grab((60, 395, 380, 640)) | |
if DEBUG: | |
o.save('/Users/Jaycee/test/ocr/iphone/options.png') | |
ostr = pytesseract.image_to_string(o, lang='chi_sim').encode(encoding='UTF-8',errors='strict') | |
ostr_l = ostr.split('\n') | |
q.put(ostr_l) | |
#@profile | |
def main(): | |
while True: | |
t00 = time.time() | |
q = Queue(maxsize = 10) | |
o_p = Process(target = options, args = (q, )) | |
o_p.start() | |
# (y1, x1, y2, x2) | |
# 50 170 520 320 | |
# 40 140 445 300 | |
image = ImageGrab.grab((50, 170, 540, 330)) | |
t0 = time.time() | |
#image = ImageGrab.grab((40, 140, 455, 300)) | |
#image.save('/Users/Jaycee/test/ocr/iphone/1.png') | |
if DEBUG: | |
t1 = time.time() # grab time | |
grab_time = t1 – t0 | |
image.save('/Users/Jaycee/test/ocr/iphone/1.png') | |
#image = Image.open('/Users/Jaycee/test/ocr/iphone/1.png') | |
t1 = time.time() | |
# open image | |
#code = pytesseract.image_to_string(image, lang='chi_sim').encode(encoding='UTF-8',errors='strict') | |
code = pytesseract.image_to_string(image, lang='chi_sim') | |
print code | |
if CUT: | |
jieba_s = jieba.cut_for_search(code) | |
jieba_s = ' '.join(jieba_s) | |
code = jieba_s | |
if DEBUG: | |
t2 = time.time() # ocr time | |
ocr_time = t2 – t1 | |
url = "http://www.baidu.com/s?rn=50&wd=" + code.encode(encoding='UTF-8',errors='strict') | |
#url = "https://www.google.com/search?q=" + code.encode(encoding='UTF-8',errors='strict') | |
p = Process(target = start_browser, args = (url, )) | |
p.start() | |
if DEBUG: | |
t3 = time.time() | |
open_browser_time = t3 – t2 | |
t000 = time.time() | |
res = urllib.urlopen(url).read() | |
t111 = time.time() | |
print "Download Html Time:", t111 – t000 | |
o_p.join() | |
ostr_l = q.get() | |
try: | |
o1str = ostr_l[0] | |
o2str = ostr_l[2] | |
o3str = ostr_l[4] | |
except: | |
a = raw_input("Error! Press 'Enter' to process next..") | |
continue | |
else: | |
pass | |
o1str_c = jieba.cut_for_search(o1str) | |
o2str_c = jieba.cut_for_search(o2str) | |
o3str_c = jieba.cut_for_search(o3str) | |
o1cnt = 0 | |
o2cnt = 0 | |
o3cnt = 0 | |
for i in o1str_c: | |
o1cnt += res.count(i.encode(encoding='UTF-8')) | |
for i in o2str_c: | |
o2cnt += res.count(i.encode(encoding='UTF-8')) | |
for i in o3str_c: | |
o3cnt += res.count(i.encode(encoding='UTF-8')) | |
if DEBUG: | |
t4 = time.time() | |
option_count_time = t4 – t3 | |
print "grab time:", grab_time, "ocr time:", ocr_time, "open browser time:", open_browser_time, "open_browser_time:", open_browser_time | |
print "A:\t[OCR] %s [COUNT] %d" % (o1str, o1cnt) | |
print "B:\t[OCR] %s [COUNT] %d" % (o2str, o2cnt) | |
print "C:\t[OCR] %s [COUNT] %d" % (o3str, o3cnt) | |
t11 = time.time() | |
print "Total Time:", t11-t00 | |
a = raw_input("Press 'Enter' to process next..") | |
if __name__ == "__main__": | |
main() |
[1] Python 中文OCR, http://blog.csdn.net/wwj_748/article/details/78109680?utm_source=tuicool&utm_medium=referral
~ o(* ̄▽ ̄*)ブ
^_^