Service的自动重启问题
版权声明:本文为作者原创,转载必须注明出处。
转载请注明出处:http://www.jianshu.com/p/1c995328c293
今天简单讨论下服务重启的问题和分析一个常见的系统log:
一、Service自动启动服务流程:
每次调用startService(Intent)的时候,都会调用该Service对象的onStartCommand(Intent,int,int)方法,这个方法return 一个int值,return 的值有四种:
START_STICKY:如果service进程被kill掉,保留service的状态为开始状态,但不保留递送的intent对象。
随后系统会尝试重新创建service,由于服务状态为开始状态,所以创建服务后一定会调用onStartCommand(
Intent,int,int)方法。如果在此期间没有任何启动命令被传递到service,那么参数Intent将为null。
START_NOT_STICKY:“非粘性的”。使用这个返回值时,如果在执行完onStartCommand后,服务被异常
kill掉,系统不会自动重启该服务。
START_REDELIVER_INTENT:重传Intent。使用这个返回值时,如果在执行完onStartCommand后,服务被
异常kill掉,系统会自动重启该服务,并将Intent的值传入。
START_STICKY_COMPATIBILITY:START_STICKY的兼容版本,但不保证服务被kill后一定能重启。
最终在framework层,s.onStartCommand返回res
1. private void handleServiceArgs(ServiceArgsData data) {
2. Service s = mServices.get(data.token);
3. if (s != null) {
4. try {
5. if (data.args != null) {
6. data.args.setExtrasClassLoader(s.getClassLoader());
7. }
8. int res;
9. if (!data.taskRemoved) {
10. //就是回调了用户服务的onStartCommand生命周期,这个做应用的都知道了,
11.
12. //这里可以通过设置其返回值来控制自己的服务是否允许被重新启动,顺理成章的这
//个值就是res
13. res = s.onStartCommand(data.args, data.flags, data.startId);
14. } else {
15. s.onTaskRemoved(data.args);
16. res = Service.START_TASK_REMOVED_COMPLETE;
17. }
18. ...............
19. try {
20. //看看系统用这个值都干了一些什么导致有这个特性
21. ActivityManagerNative.getDefault().serviceDoneExecuting(
22. data.token, 1, data.startId, res);
23. } catch (RemoteException e) {
24. // nothing to do.
25. }
26. ensureJitEnabled();
27. }
28. ..................
29. }
30. }
下面就是这个特性的关键代码,里面的注释已经写的很全了,关键其作用的就是stopIfKilled这个标志。
1. void serviceDoneExecutingLocked(ServiceRecord r, int type, int startId, int res) {
2. boolean inDestroying = mDestroyingServices.contains(r);
3. if (r != null) {
4. if (type == 1) {
5. // This is a call from a service start... take care of
6. // book-keeping.
7. r.callStart = true;
8. switch (res) {
9. case Service.START_STICKY_COMPATIBILITY:
10. case Service.START_STICKY: {
11. // We are done with the associated start arguments.
12. r.findDeliveredStart(startId, true);
13. // Don't stop if killed.
14. r.stopIfKilled = false;
15. break;
16. }
17. case Service.START_NOT_STICKY: {
18. // We are done with the associated start arguments.
19. r.findDeliveredStart(startId, true);
20. if (r.getLastStartId() == startId) {
21. // There is no more work, and this service
22. // doesn't want to hang around if killed.
23. r.stopIfKilled = true;
24. }
25. break;
26. }
27. case Service.START_REDELIVER_INTENT: {
28. // We'll keep this item until they explicitly
29. // call stop for it, but keep track of the fact
30. // that it was delivered.
31. ServiceRecord.StartItem si = r.findDeliveredStart(
startId, false);
32. if (si != null) {
33. si.deliveryCount = 0;
34. si.doneExecutingCount++;
35. // Don't stop if killed.
36. r.stopIfKilled = true;
37. }
38. break;
39. }
40. case Service.START_TASK_REMOVED_COMPLETE: {
41. // Special processing for onTaskRemoved(). Don't
42. // impact normal onStartCommand() processing.
43. r.findDeliveredStart(startId, true);
44. break;
45. }
46. default:
47. throw new IllegalArgumentException(
48. "Unknown service start result: " + res);
49. }
50. if (res == Service.START_STICKY_COMPATIBILITY) {
51. r.callStart = false;
52. }
53. }
54. final long origId = Binder.clearCallingIdentity();
55. serviceDoneExecutingLocked(r, inDestroying, inDestroying);
56. Binder.restoreCallingIdentity(origId);
57. } else {
58. Slog.w(TAG, "Done executing unknown service from pid "
59. + Binder.getCallingPid());
60. }
61. }
那么这个标志位又是在哪些情况下使得服务可以重启的呢?这种场景入口很多啊,比如系统清理进程等,总之就是APP Died的情况下,入口方法不列举了,最后都会执行到这来:
1. final void killServicesLocked(ProcessRecord app, boolean allowRestart) {
2. // Report disconnected services.
3. if (false) {
4. // XXX we are letting the client link to the service for
5. // death notifications.
6. if (app.services.size() > 0) {
7. Iterator<ServiceRecord> it = app.services.iterator();
8. while (it.hasNext()) {
9. ServiceRecord r = it.next();
10. for (int conni=r.connections.size()-1; conni>=0; conni--) {
11. ArrayList<ConnectionRecord> cl = r.connections.valueAt(conni);
12. for (int i=0; i<cl.size(); i++) {
13. ConnectionRecord c = cl.get(i);
14. if (c.binding.client != app) {
15. try {
16. //c.conn.connected(r.className, null);
17. } catch (Exception e) {
18. // todo: this should be asynchronous!
19. Slog.w(TAG, "Exception thrown disconnected servce "
20. + r.shortName
21. + " from app " + app.processName, e);
22. }
23. }
24. }
25. }
26. }
27. }
28. }
29.
30. // First clear app state from services.
31. for (int i=app.services.size()-1; i>=0; i--) {
32. ServiceRecord sr = app.services.valueAt(i);
33. synchronized (sr.stats.getBatteryStats()) {
34. sr.stats.stopLaunchedLocked();
35. }
36. if (sr.app != null) {
37. sr.app.services.remove(sr);
38. }
39. sr.app = null;
40. sr.isolatedProc = null;
41. sr.executeNesting = 0;
42. sr.forceClearTracker();
43. if (mDestroyingServices.remove(sr)) {
44. if (DEBUG_SERVICE) Slog.v(TAG, "killServices remove destroying " + sr);
45. }
46.
47. final int numClients = sr.bindings.size();
48. for (int bindingi=numClients-1; bindingi>=0; bindingi--) {
49. IntentBindRecord b = sr.bindings.valueAt(bindingi);
50. if (DEBUG_SERVICE) Slog.v(TAG, "Killing binding " + b
51. + ": shouldUnbind=" + b.hasBound);
52. b.binder = null;
53. b.requested = b.received = b.hasBound = false;
54. }
55. }
56.
57. // Clean up any connections this application has to other services.
58. for (int i=app.connections.size()-1; i>=0; i--) {
59. ConnectionRecord r = app.connections.valueAt(i);
60. removeConnectionLocked(r, app, null);
61. }
62. app.connections.clear();
63.
64. ServiceMap smap = getServiceMap(app.userId);
65.
66. // Now do remaining service cleanup.
67. for (int i=app.services.size()-1; i>=0; i--) {
68. ServiceRecord sr = app.services.valueAt(i);
69. // Sanity check: if the service listed for the app is not one
70. // we actually are maintaining, drop it.
71. if (smap.mServicesByName.get(sr.name) != sr) {
72. ServiceRecord cur = smap.mServicesByName.get(sr.name);
73. Slog.wtf(TAG, "Service " + sr + " in process " + app
74. + " not same as in map: " + cur);
75. app.services.removeAt(i);
76. continue;
77. }
78.
79. // Any services running in the application may need to be placed
80. // back in the pending list.
81. // 这里还是分很多种情况的
82. // 允许重启时,如果当前服务所在进程crash超过两次,并且不是persistent的进程就结束不会
//重启了
83. if (allowRestart && sr.crashCount >= 2 && (sr.serviceInfo
.applicationInfo.flags
84. &ApplicationInfo.FLAG_PERSISTENT) == 0) {
85. Slog.w(TAG, "Service crashed " + sr.crashCount
86. + " times, stopping: " + sr);
87. EventLog.writeEvent(EventLogTags.AM_SERVICE_CRASHED_TOO_MUCH,
88. sr.userId, sr.crashCount, sr.shortName, app.pid);
89. bringDownServiceLocked(sr);
90. } else if (!allowRestart) {
91. // 不允许重启直接挂掉
92. bringDownServiceLocked(sr);
93. } else {
94. //
95. boolean canceled = scheduleServiceRestartLocked(sr, true);
96.
97. // Should the service remain running? Note that in the
98. // extreme case of so many attempts to deliver a command
99. // that it failed we also will stop it here.
100. if (sr.startRequested && (sr.stopIfKilled || canceled)) {
101. if (sr.pendingStarts.size() == 0) {
102. sr.startRequested = false;
103. if (sr.tracker != null) {
104. sr.tracker.setStarted(false, mAm.mProcessStats
.getMemFactorLocked(),
105. SystemClock.uptimeMillis());
106. }
107. if (!sr.hasAutoCreateConnections()) {
108. // Whoops, no reason to restart!
109. bringDownServiceLocked(sr);
110. }
111. }
112. }
113. }
114. }
115.
116. if (!allowRestart) {
117. app.services.clear();
118.
119. // Make sure there are no more restarting services for this process.
120. for (int i=mRestartingServices.size()-1; i>=0; i--) {
121. ServiceRecord r = mRestartingServices.get(i);
122. if (r.processName.equals(app.processName) &&
123. r.serviceInfo.applicationInfo.uid == app.info.uid) {
124. mRestartingServices.remove(i);
125. clearRestartingIfNeededLocked(r);
126. }
127. }
128. for (int i=mPendingServices.size()-1; i>=0; i--) {
129. ServiceRecord r = mPendingServices.get(i);
130. if (r.processName.equals(app.processName) &&
131. r.serviceInfo.applicationInfo.uid == app.info.uid) {
132. mPendingServices.remove(i);
133. }
134. }
135. }
136.
137. // Make sure we have no more records on the stopping list.
138. int i = mDestroyingServices.size();
139. while (i > 0) {
140. i--;
141. ServiceRecord sr = mDestroyingServices.get(i);
142. if (sr.app == app) {
143. sr.forceClearTracker();
144. mDestroyingServices.remove(i);
145. if (DEBUG_SERVICE) Slog.v(TAG, "killServices remove destroying "
+ sr);
146. }
147. }
148.
149. app.executingServices.clear();
150. }
1. private final boolean scheduleServiceRestartLocked(ServiceRecord r,
2. boolean allowCancel) {
3. boolean canceled = false;
4.
5. ServiceMap smap = getServiceMap(r.userId);
6. if (smap.mServicesByName.get(r.name) != r) {
7. ServiceRecord cur = smap.mServicesByName.get(r.name);
8. Slog.wtf(TAG, "Attempting to schedule restart of " + r
9. + " when found in map: " + cur);
10. return false;
11. }
12.
13. final long now = SystemClock.uptimeMillis();
14.
15. if ((r.serviceInfo.applicationInfo.flags
16. &ApplicationInfo.FLAG_PERSISTENT) == 0) {
17. long minDuration = SERVICE_RESTART_DURATION;
18. long resetTime = SERVICE_RESET_RUN_DURATION;
19.
20. // Any delivered but not yet finished starts should be put back
21. // on the pending list.
22. final int N = r.deliveredStarts.size();
23. if (N > 0) {
24. for (int i=N-1; i>=0; i--) {
25. ServiceRecord.StartItem si = r.deliveredStarts.get(i);
26. si.removeUriPermissionsLocked();
27. //注意了,这里的canceled如果为true还是需要结束服务的
28. //还要关注一下delivery的上限和doneExecuting的上限
29. if (si.intent == null) {
30. // We'll generate this again if needed.
31. } else if (!allowCancel || (si.deliveryCount <
ServiceRecord.MAX_DELIVERY_COUNT
32. && si.doneExecutingCount <
ServiceRecord.MAX_DONE_EXECUTING_COUNT)) {
33. //重新在pendingStart中添加si,所以会在下次执行时重新带入intent进去
34. r.pendingStarts.add(0, si);
35. long dur = SystemClock.uptimeMillis() - si.deliveredTime;
36. dur *= 2;
37. if (minDuration < dur) minDuration = dur;
38. if (resetTime < dur) resetTime = dur;
39. } else {
40. Slog.w(TAG, "Canceling start item " + si.intent +
41. " in service " + r.name);
42. canceled = true;
43. }
44. }
45. r.deliveredStarts.clear();
46. }
47.
48. r.totalRestartCount++;
49. if (r.restartDelay == 0) {
50. r.restartCount++;
51. r.restartDelay = minDuration;
52. } else {
53. // If it has been a "reasonably long time" since the service
54. // was started, then reset our restart duration back to
55. // the beginning, so we don't infinitely increase the duration
56. // on a service that just occasionally gets killed (which is
57. // a normal case, due to process being killed to reclaim memory).
58. if (now > (r.restartTime+resetTime)) {
59. r.restartCount = 1;
60. r.restartDelay = minDuration;
61. } else {
62. r.restartDelay *= SERVICE_RESTART_DURATION_FACTOR;
63. if (r.restartDelay < minDuration) {
64. r.restartDelay = minDuration;
65. }
66. }
67. }
68.
69. r.nextRestartTime = now + r.restartDelay;
70.
71. // Make sure that we don't end up restarting a bunch of services
72. // all at the same time.
73. boolean repeat;
74. do {
75. repeat = false;
76. for (int i=mRestartingServices.size()-1; i>=0; i--) {
77. ServiceRecord r2 = mRestartingServices.get(i);
78. if (r2 != r && r.nextRestartTime
79. >= (r2.nextRestartTime-SERVICE_MIN_RESTART_TIME_BETWEEN)
80. && r.nextRestartTime
81. < (r2.nextRestartTime+SERVICE_MIN_RESTART_TIME_BETWEEN)) {
82. r.nextRestartTime = r2.nextRestartTime +
SERVICE_MIN_RESTART_TIME_BETWEEN;
83. r.restartDelay = r.nextRestartTime - now;
84. repeat = true;
85. break;
86. }
87. }
88. } while (repeat);
89.
90. } else {
91. // Persistent processes are immediately restarted, so there is no
92. // reason to hold of on restarting their services.
93. r.totalRestartCount++;
94. r.restartCount = 0;
95. r.restartDelay = 0;
96. r.nextRestartTime = now;
97. }
98.
99. if (!mRestartingServices.contains(r)) {
100. r.createdFromFg = false;
101. mRestartingServices.add(r);
102. r.makeRestarting(mAm.mProcessStats.getMemFactorLocked(), now);
103. }
104.
105. r.cancelNotification();
106.
107. mAm.mHandler.removeCallbacks(r.restarter);
108. // 最关键的操作在这里,忘ActivityManagerService的handler里面post一个重启的Runnable
109. // 这个东西前面启动过程创建ServiceRecord时有的,很简单就是一个ServiceRestarter,
//它里面,保存了这个ServiceRecord本身
110. // 重启的时候根据这个record就可以直接启动服务了
111. mAm.mHandler.postAtTime(r.restarter, r.nextRestartTime);
112. r.nextRestartTime = SystemClock.uptimeMillis() + r.restartDelay;
113. Slog.w(TAG, "Scheduling restart of crashed service "
114. + r.shortName + " in " + r.restartDelay + "ms");
115. EventLog.writeEvent(EventLogTags.AM_SCHEDULE_SERVICE_RESTART,
116. r.userId, r.shortName, r.restartDelay);
117.
118. return canceled;
119. }
1. private class ServiceRestarter implements Runnable {
2. private ServiceRecord mService;
3.
4. void setService(ServiceRecord service) {
5. mService = service;
6. }
7.
8. public void run() {
9. synchronized(mAm) {
10. //后面的事情就顺利成章了。
11. performServiceRestartLocked(mService);
12. }
13. }
14. }
整个这个过程中,有好几个参数控制着是否需要重启,也定了很多参数的上限等等,这里单独列出来解释一下。
ServiceRecord.crashCount、ServiceRecord.StartItem.deliveryCount、ServiceRecord.StartItem.doneExecutingCount
crashCount顾名思义啊,就是crash的次数,这个在handleAppCrashLocked()中自增的,很明显每crash一次就会自增,没什么好说的
deliveryCount也很好理解,他是属于StartItem的,所以表示的是启动信息,是执行onStartCommand方法的次数,也就是外部startService的次数
doneExecutingCount跟deliveryCount还很有关联,类似的也是说的这个服务执行的次数,那么它们有什么区别呢?
还有两个标志位Service.START_FLAG_RETRY、Service.START_FLAG_REDELIVERY要一起看。这个在ActivesService.sendServiceArgsLocked()中可以看到。意思就是说这个服务是直接重启还是重新发送发送请求。
它们还是互斥的,这点在serviceDoneExecutingLocked()方法的START_REDELIVER_INTENT分支处理中可以得到结论,总的来说就是说onStartCommand返回START_STICKY是允许重启,而START_REDELIVER_INTENT会重新将上次的intent请求发送出去,服务中会重新接收到这个。
二、一个常见的系统log分析:
很多时候会看到这样的系统log:
10-27 16:31:57.300 2108 3930 [system_server] I ActivityManager: Process xxx (pid xxx) has died
10-27 16:31:57.300 2108 3930 [system_server] D ActivityManager: cleanUpApplicationRecord – xxx
10-27 16:31:57.303 2108 3930 [system_server] W ActivityManager: Scheduling restart of crashed service xxxr/.xxxService in 319547ms
在319547ms后xxx又被xxxService唤醒:
10-27 16:37:16.882 2108 2138 [system_server] I ActivityManager: Start proc (pid):xxx/u10a97 for service xxx/.xxxService caller=xxx
根据log反推流程:
在执行到第9步之后,请参照启动服务的流程图:
通过流程图看出,在app主进程died之后,延迟唤醒该app内对应service,然后再由service拉起app主进程,造成app的自启动。