最近查看bugly ,发现存在一个多版本遗留 棘手的量级几十w的bug:
java.lang.RuntimeException
createContext Failed: EGL_BAD_CONfig
android.opengl.GLSurfaceView$EglHelper.throwEglException(GLSurfaceView.java:1245)
android.opengl.GLSurfaceView$EglHelper.throwEglException(GLSurfaceView.java:1236)
android.opengl.GLSurfaceView$EglHelper.start(GLSurfaceView.java:1086)
android.opengl.GLSurfaceView$GLThread.guardedRun(GLSurfaceView.java:1462)
android.opengl.GLSurfaceView$GLThread.run(GLSurfaceView.java:1299)
报错信息发生在Android framework api 中,因此考虑从源码入手,本案例是基于 android 7.0 。
1.查看源码走向
先来看下GLSurfaceView$EglHelper#start()
:
frameworks/base/opengl/java/android/opengl/GLSurfaceView.java
/**
* Initialize EGL for a given configuration spec.
* @param configSpec
*/
public void start() {
mEgl = (EGL10) EGLContext.getEGL();
mEgldisplay = mEgl.eglGetdisplay(EGL10.EGL_DEFAULT_disPLAY);
//.......
GLSurfaceView view = mGLSurfaceViewWeakRef.get();
if (view == null) {
mEglConfig = null;
mEglContext = null;
} else {
mEglConfig = view.mEGLConfigChooser.chooseConfig(mEgl, mEgldisplay);
// 通过eglContextFacotry 创建对应有效的egl context
mEglContext = view.mEGLContextFactory.createContext(mEgl, mEgldisplay, mEglConfig);
}
if (mEglContext == null || mEglContext == EGL10.EGL_NO_CONTEXT) {
//关键点,当创建egl context 失败时,会抛出异常
mEglContext = null;
throwEglException("createContext");
}
mEglSurface = null;
}
结合bugly上的crash 信息,基本上可以确定是创建egl context 失败导致的异常。接下来看下,创建过程。
EGLContextFactory
的实现类(即GLSurfaceView&DefaultContextFactory
)中,看下createContext()
方法如何实现的:
private class DefaultContextFactory implements EGLContextFactory {
private int EGL_CONTEXT_CLIENT_VERSION = 0x3098;
public EGLContext createContext(EGL10 egl, EGLdisplay display, EGLConfig config) {
// 这是选择gl es 版本 ,开发者可自由配置 mEGLContextClientVersion 为 2 还是3
int[] attrib_list = {EGL_CONTEXT_CLIENT_VERSION, mEGLContextClientVersion,
EGL10.EGL_NONE };
return egl.eglCreateContext(display, config, EGL10.EGL_NO_CONTEXT,
mEGLContextClientVersion != 0 ? attrib_list : null);
}
//.......
}
接下来看下EGLImpl
中如何创建对应的context:
frameworks/base/opengl/java/com/google/android/gles_jni/EGLImpl.java
public EGLContext eglCreateContext(EGLdisplay display, EGLConfig config, EGLContext share_context, int[] attrib_list) {
long eglContextId = _eglCreateContext(display, config, share_context, attrib_list);
if (eglContextId == 0) { // 关键点,若是调用native 层 创建context 失败,则会返回EGL_NO_CONTEXT。
return EGL10.EGL_NO_CONTEXT;
}
return new EGLContextImpl( eglContextId );
}
private native long _eglCreateContext(EGLdisplay display, EGLConfig config, EGLContext share_context, int[] attrib_list);
接下来看看下,jni层的代码走向:
frameworks/base/core/jni/com_google_android_gles_jni_EGLImpl.cpp
static jlong jni_eglCreateContext(jnienv *_env, jobject _this, jobject display,
jobject config, jobject share_context, jintArray attrib_list) {
if (display == NULL || config == NULL || share_context == NULL
|| !validAttribList(_env, attrib_list)) {
jniThrowException(_env, "java/lang/IllegalArgumentException", NULL);
return JNI_FALSE;
}
EGLdisplay dpy = getdisplay(_env, display);
EGLConfig cnf = getConfig(_env, config);
EGLContext shr = getContext(_env, share_context);
jint* base = beginNativeAttribList(_env, attrib_list);
EGLContext ctx = eglCreateContext(dpy, cnf, shr, base);
endNativeAttributeList(_env, attrib_list, base);
return reinterpret_cast<jlong>(ctx);
}
接下来看下 eglCreateContext()
:
frameworks/native/opengl/libs/EGL/eglApi.cpp
EGLContext eglCreateContext(EGLdisplay dpy, EGLConfig config,
EGLContext share_list, const EGLint *attrib_list)
{
clearError();
egl_connection_t* cnx = NULL;
const egl_display_ptr dp = validate_display_connection(dpy, cnx);
if (dp) {
if (share_list != EGL_NO_CONTEXT) {
if (!ContextRef(dp.get(), share_list).get()) {
return setError(EGL_BAD_CONTEXT, EGL_NO_CONTEXT);
}
egl_context_t* const c = get_context(share_list);
share_list = c->context;
}
EGLContext context = cnx->egl.eglCreateContext(
dp->disp.dpy, config, share_list, attrib_list);
if (context != EGL_NO_CONTEXT) {
// figure out if it's a GLESv1 or GLESv2
int version = 0;
if (attrib_list) {
while (*attrib_list != EGL_NONE) {
GLint attr = *attrib_list++;
GLint value = *attrib_list++;
if (attr == EGL_CONTEXT_CLIENT_VERSION) {
if (value == 1) {
version = egl_connection_t::GLESv1_INDEX;
} else if (value == 2 || value == 3) {
version = egl_connection_t::GLESv2_INDEX;
}
}
};
}
egl_context_t* c = new egl_context_t(dpy, context, config, cnx,
version);
return c;
}
}
//当获取不到有效的display 时,会返回egl_no_context;
return EGL_NO_CONTEXT;
}
接下来继续看下:
egl_display_ptr validate_display_connection(EGLdisplay dpy,
egl_connection_t*& cnx) {
cnx = NULL;
egl_display_ptr dp = validate_display(dpy);
if (!dp)
return dp;
cnx = &gEGLImpl;
if (cnx->dso == 0) {
//这里是关键信息
return setError(EGL_BAD_CONfig, egl_display_ptr(NULL));
}
return dp;
}
接下来看下validate_display():
egl_display_ptr validate_display(EGLdisplay dpy) {
egl_display_ptr dp = get_display(dpy);
if (!dp)
return setError(EGL_BAD_disPLAY, egl_display_ptr(NULL));
if (!dp->isReady())
return setError(EGL_NOT_INITIALIZED, egl_display_ptr(NULL));
return dp;
}
也就是获取不到 有效的diplay 时,会导致egl context 创建失败,从而抛出异常。
2.推断定位原因和解决方案
谷歌搜索:相关的报错,有些较老的机型确实存在问题,如下图所示:
参考链接:https://github.com/ofZach/inkSpace/issues/2
有些机型是可能不支持gl es 2 创建context ,可以通过以下代码来判断:
private static class ContextFactory implements GLSurfaceView.EGLContextFactory {
private static int EGL_CONTEXT_CLIENT_VERSION = 0x3098;
public EGLContext createContext(EGL10 egl, EGLdisplay display, EGLConfig eglConfig) {
Log.w(TAG, "creating OpenGL ES 2.0 context");
checkEglError("Before eglCreateContext", egl);
int[] attrib_list = {EGL_CONTEXT_CLIENT_VERSION, 2, EGL10.EGL_NONE };
EGLContext context = egl.eglCreateContext(display, eglConfig, EGL10.EGL_NO_CONTEXT, attrib_list);
checkEglError("After eglCreateContext", egl);
return context;
}
public void destroyContext(EGL10 egl, EGLdisplay display, EGLContext context) {
egl.eglDestroyContext(display, context);
}
}
private static void checkEglError(String prompt, EGL10 egl) {
int error;
while ((error = egl.eglGetError()) != EGL10.EGL_SUCCESS) {
Log.e(TAG, String.format("%s: EGL error: 0x%x", prompt, error));
}
}
更多信息请阅读ndk-samples/hello-gl2
但比对了bugly上的机型发现,都是一些新机器,现在大部分都支持opengl es 2,因此排除了该可能性。
还有一种可能性是状态不对导致的,因触发EglHelper.start()
,必须readyToDraw() 返回true:
frameworks/base/opengl/java/android/opengl/GLSurfaceView.java
private boolean readyToDraw() {
return (!mPaused) && mHasSurface && (!mSurfaceIsBad)
&& (mWidth > 0) && (mHeight > 0)
&& (mRequestRender || (mRenderMode == RENDERMODE_CONTINUOUSLY));
}
解读下几个条件:
- mPaused: 是非调用onPause()后状态;
- mHasSurface 是指suface 创建成功;
- mSurfaceIsBad是指suface 是否有效;
- mRequestRender 是请求主动Render;
- RENDERMODE_CONTINUOUSLY 是指循环渲染模式.
检查项目中发现,有一处老代码中存在严重的问题,没有调用GLSufaceView #onPause()
,被注释掉了:
@Override
public void onPause() {
Message msg = Message.obtain();
msg.what = HANDLER_ON_NATIVE_PAUSE;
mHandler.sendMessageAtFrontOfQueue(msg);
//this.activity.startChkRoomTick();
setRenderMode(RENDERMODE_WHEN_DIRTY);
mRenderer.onPause();
//super.onPause();
}
重点:Activity 的onResume()
和onPause()
必须调用GLSufaceView 的onResume()
和onPause()
。
3.OpenGl 创建context 过程中其他异常(EGL_BAD_ALLOC和EGL_BAD_disPLAY)
当然也可以继续看下open gl es 端eglCreateContext的过程(可能抛出的其他异常):
frameworks/native/opengl/libagl/egl.cpp
EGLContext eglCreateContext(EGLdisplay dpy, EGLConfig config,
EGLContext /*share_list*/, const EGLint* /*attrib_list*/)
{
// 检查EGLdisplay 是否有效的
if (egl_display_t::is_valid(dpy) == EGL_FALSE)
return setError(EGL_BAD_disPLAY, EGL_NO_SURFACE);
ogles_context_t* gl = ogles_init(sizeof(egl_context_t));
//初始化失败,返回为0时,会抛出内存申请失败
if (!gl) return setError(EGL_BAD_ALLOC, EGL_NO_CONTEXT);
egl_context_t* c = static_cast<egl_context_t*>(gl->rasterizer.base);
c->flags = egl_context_t::NEVER_CURRENT;
c->dpy = dpy;
c->config = config;
c->read = 0;
c->draw = 0;
return (EGLContext)gl;
}
若是想了解opengl es 的更多信息,可以阅读OpenGL ES升级打怪之 GLSurfaceView源码分析