Python跳一跳:使用Cython加速opencv像素级访问
简要概述
网上已经有很多Python实现的跳一跳辅助程序,有基于模版匹配的,还有基于深度学习端到端的方法,都很厉害。但是没有一种算法和我自己想的一样:寻找一行上与背景不一样的像素,找出其最值,当最值连续不变化三次时,即认为找到了中心点的y坐标,而x坐标选择第一行存在于背景色不一致的像素x值得平均值。 所以自己写代码把想法实现了出来。
主要的算法如下:
1 使用模版匹配寻找棋子的位置
2 根据棋子坐标截取篮框部分用以识别吓一跳的中心坐标
3 用Cython实现的子程序识别篮筐部分的中心:x坐标为第一行存在于背景色不一致的像素x值得平均值, y坐标为连续三次与背景色颜色不一致像素x坐标的最值不产生变化时的y值;在寻找中心时,兼顾寻找RGB=(245, 245, 245)
的像素区域中心,用以纠正识别误差;如图中Out
所示。
4 最后根据识别的棋子和块中心计算出像素距离,计算跳跃时间;跳跃时间先是使用简单的线性模型,然后不断地记录调到正中心的距离和时间,最后使用KNN
算法给出下一跳的时间。
Code
首先是Cython写的像素级访问函数,文件名为fastLocation.pyx
,注意后缀是.pyx
而非py
。
import numpy as np
cimport numpy as np
cimport cython
DTYPE = np.uint8
ctypedef np.uint8_t DTYPE_t
cdef unsigned char absSub(unsigned char v1, unsigned char v2):
return v1-v2 if v1>v2 else v2-v1
@cython.boundscheck(False)
@cython.wraparound(False)
def chessDetect(np.ndarray[DTYPE_t, ndim=3] image):
cdef int height, width, i, j, xmin, xmax, prexmin=0, prexmax=0, x, y, rcount=0, lcount=0, xcount = 0, xsum=0,whitex=0, whitey = 0, whitecount=0, ai, aj
cdef bint Foundx=False, Foundxmin
cdef unsigned int diff
height = image.shape[0]
width = image.shape[1]
cdef np.ndarray[DTYPE_t, ndim=2] out = np.zeros([height, width], dtype=DTYPE)
cdef np.ndarray[DTYPE_t, ndim=1] backgroundColor, t
backgroundColor = image[0, 0]
for i in range(height):
xmin = 0
xmax = 0
Foundxmin = False
for j in range(1, width):
t = image[i, j]
if t[0] == 245 and t[1] == 245 and t[2] == 245:
whitex += j
whitey += i
whitecount += 1
diff = absSub(t[0], backgroundColor[0]) + absSub(t[1], backgroundColor[1]) + absSub(t[2], backgroundColor[2])
if diff > 30:
out[i, j] = 255
if not Foundx:
xsum += j
xcount += 1
if not Foundxmin:
xmin = j
Foundxmin = True
xmax = j
if xcount != 0:
x = xsum // xcount
Foundx = True
if (xmin == prexmin or xmax == prexmax) and Foundx and (xmax-x>50 or x-xmin>50):
# print(xmax, xmin, xmax-xmin)
if xmin == prexmin and xmax == prexmax:
lcount += 1
if xmax == prexmax:
rcount += 1
if lcount >= 2 or rcount >= 6:
y = i
break
prexmin = xmin
prexmax = xmax
for ai in range(i, min(height, i+20)):
for aj in range(1, width):
t = image[ai, aj]
if t[0] == 245 and t[1] == 245 and t[2] == 245:
whitex += aj
whitey += ai
whitecount += 1
diff = absSub(t[0], backgroundColor[0]) + absSub(t[1], backgroundColor[1]) + absSub(t[2], backgroundColor[2])
if diff > 30:
out[ai, aj] = 255
if whitecount != 0:
# print("Here", whitex, whitey, whitecount)
whitex = int(whitex/whitecount)
whitey = int(whitey/whitecount)
return out, x, y, whitex, whitey
关于如何使用Python与numpy交互,请参阅Cython文档。然后再同目录下建立setup.py
from distutils.core import setup, Extension
from Cython.Build import cythonize
import numpy
setup(ext_modules=cythonize("fastGetLocation.pyx"), include_dirs=[numpy.get_include()])
然后,在命令行使用
python setup.py build_ext --inplace
编译Cython生成对应的C代码和可以被Python调用的库,这样Cython的像素级访问就完成啦。简单对比一下性能(基于Intel core i7 3630QM),直接使用Python访问numpy进行处理需要8秒;使用Cython
之后只需要400ms,提速约20倍;使用C++版本的OpenCV实现,处理一张图像仅需20ms。由此可见,还是C++
的速度更快更好。
下面进入主题部分:
# encoding=utf-8
import cv2
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsRegressor
from fastGetLocation import chessDetect
import time
import os
import glob
class AutoJumper():
def __init__(self):
self.player = cv2.imread("player.jpg")
if self.player is None:
print("player.jpg lost, exiting...")
exit(0)
self.player_height, self.player_width, _ = self.player.shape
self.screen_width, self.screen_height = 1080, 1920
self.player_bias = 40 # 减去棋子宽度,缩小检测范围
self.BEGIN_Y = 540 # 检索开始行
self.delayTime = 1000
self.debug = False
self.paths = glob.glob(".\\backup\*.png")
cv2.namedWindow("Auto_Jump^_^", 0)
self.count, self.predistance, self.pretime = 0, 0, 0
data = pd.read_csv("data.csv")
print("%d pre data loaded!" % len(data))
if len(data) > 500:
data = data[len(data)-500:len(data)]
reg_X = data['distance'].values.reshape(-1, 1)
reg_y = data['time']
self.knnreg = KNeighborsRegressor(n_neighbors=2).fit(reg_X, reg_y)
# Running Parameter
self.player_x, self.player_y = 0, 0
self.chess_x, self.chess_y = 0, 0
self.count = 0
self.predistance, self.pretime = 0, 0
self.currdistance, self.currtime = 0, 0
self.jumpRight = False #恰好调到中央Flag
def get_screenshot(self, id):
os.system('adb shell screencap -p /sdcard/%s.png' % str(id))
os.system('adb pull /sdcard/%s.png .' % str(id))
def makeJump(self):
press_x = int(320 + np.random.randint(20))
press_y = int(410 + np.random.randint(20))
cmd = 'adb shell input swipe %d %d %d %d ' % (press_x, press_y, press_x, press_y) + str(self.currtime)
os.system(cmd)
def detectPlayer(self):
res1 = cv2.matchTemplate(self.image, self.player, cv2.TM_CCOEFF_NORMED)
min_val1, max_val1, min_loc1, max_loc1 = cv2.minMaxLoc(res1)
top_left = max_loc1
bottom_right = (top_left[0] + self.player_width//2, top_left[1] + self.player_height)
cv2.circle(self.image, bottom_right, 10, 255, 10)
self.player_x, self.player_y = bottom_right
def detectChess(self):
if self.player_x >= self.screen_width/2:
startx, endx, starty, endy = 0, max(self.player_x-self.player_bias, 10), self.BEGIN_Y, self.player_y
else:
startx, endx, starty, endy = self.player_x+self.player_bias, self.screen_width, self.BEGIN_Y, self.player_y
out, x, y, whitex, whitey = chessDetect(self.image[starty:endy, startx:endx])
cv2.rectangle(self.image, (startx, starty), (endx, endy), 255, 10)
cv2.circle(self.image, (whitex+startx, whitey+starty), 20, (0, 255, 0), 10)
cv2.circle(self.image, (x+startx, y+starty), 10, (0, 0, 255), 10)
# if self.count % 5 != 0:
# y = self.player_y - abs(x-self.player_x)*1.732/3
if abs(x-whitex) + abs(y-whitey) < 30:
x = whitex
y = whitey
self.jumpRight = True
self.chess_x, self.chess_y = x+startx, y+starty
def calDistanceAndTime(self):
self.currdistance = np.sqrt((self.chess_x-self.player_x)**2+(self.chess_y-self.player_y)**2)
self.currtime = int(self.knnreg.predict(self.currdistance))
def showImage(self):
cv2.imshow("Auto_Jump^_^", self.image)
if cv2.waitKey(self.delayTime) & 0xFF == 27:
print("Ese key pressed, exiting")
exit(0)
def parameterUpdate(self):
self.count += 1
self.predistance, self.pretime = self.currdistance, self.currtime
if self.jumpRight:
f = open("data.csv", 'a')
print("Writing log: (%f, %d)" % (self.predistance, self.pretime))
f.write("%f,%d\n" % (self.predistance, self.pretime))
f.close()
self.jumpRight = False
def jump(self):
t = time.time()
self.get_screenshot(0)
if self.debug:
self.image = cv2.imread(self.paths[self.count])
self.delayTime = 0
else:
self.image = cv2.imread("0.png")
self.detectPlayer()
self.detectChess()
self.calDistanceAndTime()
self.makeJump()
self.showImage()
self.parameterUpdate()
print("\nStep %d:" % self.count, time.time()-t)
if __name__ == '__main__':
jumper = AutoJumper()
while True:
jumper.jump()
主体部分的代码和其他作者的代码大同小异,所以没怎么写注释。这里使用了KNN
算法去计算距离,并且在收集数据较多时,只取后500项数据进行训练,理论上具有一定的自学习能力。
距离时间模型
根据我自己手机的数据(小米Note标准版),绘制成一下时间距离图像,横轴为像素距离,纵轴为跳跃时间。
距离-时间图像
从图中可以看出,距离时间大体上呈线性关系,但是在两端具有截面效应,而且由于高距离的样本偏少,会导致距离较远时跳跃时间样本不足,从而导致并不能一直跳在中心。
不足
其实我一直想实现能够一直调到正中心的算法,但是后来发现这个目标比较难。此算法目前达到的最高分是1538分。
每跳得分大致在6分左右(衡量不同算法的优劣的指标之一),与我理想中的32还相差甚远。识别正方体时还是比较准确的,但是对于圆筒就有差距了,虽然已经做了差异化处理,但是还是不够准确;另外一点就是距离时间的映射模型还有待提升。我想,实现这个算法最大的收获便是学习了
Cython
的使用吧。这也让我觉得,Python+C
的技能储备应该是比较好的,这也会是我之后的技能发展方向。