JciX ~

2018-01-12T00:21:48+08:00

~ o(*￣▽￣*)ブ

Reply

2018-01-15T13:17:02+08:00

^_^

Reply

	#!/usr/local/bin/python
	# -- coding: utf-8 --

	import pytesseract
	from PIL import Image
	from PIL import ImageGrab
	import webbrowser
	import time
	import jieba
	import urllib
	import threading
	from multiprocessing import Process, Queue

	#DEBUG = True
	DEBUG = False
	#CUT = False
	CUT = True

	def start_browser(s):
	#pass
	webbrowser.open_new_tab(s)

	def options(q):
	o = ImageGrab.grab((60, 395, 380, 640))
	if DEBUG:
	o.save('/Users/Jaycee/test/ocr/iphone/options.png')
	ostr = pytesseract.image_to_string(o, lang='chi_sim').encode(encoding='UTF-8',errors='strict')
	ostr_l = ostr.split('\n')
	q.put(ostr_l)

	#@profile
	def main():
	while True:
	t00 = time.time()

	q = Queue(maxsize = 10)
	o_p = Process(target = options, args = (q, ))
	o_p.start()

	# (y1, x1, y2, x2)
	# 50 170 520 320
	# 40 140 445 300
	image = ImageGrab.grab((50, 170, 540, 330))
	t0 = time.time()
	#image = ImageGrab.grab((40, 140, 455, 300))
	#image.save('/Users/Jaycee/test/ocr/iphone/1.png')
	if DEBUG:
	t1 = time.time() # grab time
	grab_time = t1 – t0
	image.save('/Users/Jaycee/test/ocr/iphone/1.png')
	#image = Image.open('/Users/Jaycee/test/ocr/iphone/1.png')
	t1 = time.time()

	# open image
	#code = pytesseract.image_to_string(image, lang='chi_sim').encode(encoding='UTF-8',errors='strict')
	code = pytesseract.image_to_string(image, lang='chi_sim')
	print code
	if CUT:
	jieba_s = jieba.cut_for_search(code)
	jieba_s = ' '.join(jieba_s)
	code = jieba_s
	if DEBUG:
	t2 = time.time() # ocr time
	ocr_time = t2 – t1
	url = "http://www.baidu.com/s?rn=50&wd=" + code.encode(encoding='UTF-8',errors='strict')
	#url = "https://www.google.com/search?q=" + code.encode(encoding='UTF-8',errors='strict')
	p = Process(target = start_browser, args = (url, ))
	p.start()
	if DEBUG:
	t3 = time.time()
	open_browser_time = t3 – t2
	t000 = time.time()
	res = urllib.urlopen(url).read()
	t111 = time.time()
	print "Download Html Time:", t111 – t000

	o_p.join()
	ostr_l = q.get()
	try:
	o1str = ostr_l[0]
	o2str = ostr_l[2]
	o3str = ostr_l[4]
	except:
	a = raw_input("Error! Press 'Enter' to process next..")
	continue
	else:
	pass
	o1str_c = jieba.cut_for_search(o1str)
	o2str_c = jieba.cut_for_search(o2str)
	o3str_c = jieba.cut_for_search(o3str)
	o1cnt = 0
	o2cnt = 0
	o3cnt = 0
	for i in o1str_c:
	o1cnt += res.count(i.encode(encoding='UTF-8'))
	for i in o2str_c:
	o2cnt += res.count(i.encode(encoding='UTF-8'))
	for i in o3str_c:
	o3cnt += res.count(i.encode(encoding='UTF-8'))
	if DEBUG:
	t4 = time.time()
	option_count_time = t4 – t3
	print "grab time:", grab_time, "ocr time:", ocr_time, "open browser time:", open_browser_time, "open_browser_time:", open_browser_time
	print "A:\t[OCR] %s [COUNT] %d" % (o1str, o1cnt)
	print "B:\t[OCR] %s [COUNT] %d" % (o2str, o2cnt)
	print "C:\t[OCR] %s [COUNT] %d" % (o3str, o3cnt)
	t11 = time.time()
	print "Total Time:", t11-t00
	a = raw_input("Press 'Enter' to process next..")

	if __name__ == "__main__":
	main()

JciX ~

Jc's Blog

《百万英雄》Python + OCR搜索引擎统计辅助简单实现思路

0. 环境

1. 思路

2. 关键步骤

2.1 投屏

2.2 截屏和OCR

2.3 搜索

3. 优化

3.1 分词

3.2 多进程并行

相关

2 comments

Leave a Reply to JayceeZ Cancel reply