Linux thread schedule priority
一,背景
最近线上系统压力比较大,loadaverage 八九十是经常的,然后导致某些比较核心的线程收到不到网络消息,或者长时间得不得调用的机会,所以这个时候我们就需要把该线程的调用优先级提高。
不过Linux系统提供了一个比较牛的特性,可以调整一个线程的调用策略和调用优先级,它包含多种策略,默认的策略是SCHED_OTHER,对于这种策略所有线程的优先级都是0,采用round_robin的方式进行调度。除了它还有SCHED_BATCH,SCHED_IDLE,SCHED_FIFO和SCHED_RR,而我们今天注重介绍后面两种semi-realtime策略。
-
SCHED_OTHER
它是默认的线程分时调度策略,所有的线程的优先级别都是0,线程的调度是通过分时来完成的。简单地说,如果系统使用这种调度策略,程序将无法设置线程的优先级。请注意,这种调度策略也是抢占式的,当高优先级的线程准备运行的时候,当前线程将被抢占并进入等待队列。这种调度策略仅仅决定线程在可运行线程队列中的具有相同优先级的线程的运行次序。 -
SCHED_FIFO
它是一种实时的先进先出调用策略,且只能在超级用户下运行。这种调用策略仅仅被使用于优先级大于0的线程。它意味着,使用SCHED_FIFO的可运行线程将一直抢占使用SCHED_OTHER的运行线程。此外SCHED_FIFO是一个非分时的简单调度策略,当一个线程变成可运行状态,它将被追加到对应优先级队列的尾部((POSIX 1003.1)。当所有高优先级的线程终止或者阻塞时,它将被运行。对于相同优先级别的线程,按照简单的先进先运行的规则运行。我们考虑一种很坏的情况,如果有若干相同优先级的线程等待执行,然而最早执行的线程无终止或者阻塞动作,那么其他线程是无法执行的,除非当前线程调用如pthread_yield之类的函数,所以在使用SCHED_FIFO的时候要小心处理相同级别线程的动作。 -
SCHED_RR
鉴于SCHED_FIFO调度策略的一些缺点,SCHED_RR对SCHED_FIFO做出了一些增强功能。从实质上看,它还是SCHED_FIFO调用策略。它使用最大运行时间来限制当前进程的运行,当运行时间大于等于最大运行时间的时候,当前线程将被切换并放置于相同优先级队列的最后。这样做的好处是其他具有相同级别的线程能在“自私“线程下执行。
综上所述,我推荐使用SCHED_RR,该策略可以在可控的范围内改变我们想要提高的线程调度策略和优先级。
二,pthread_setschedpolicy / pthread_setschedparam
int pthread_setschedparam(pthread_t thread, int policy,
const struct sched_param *param);
int pthread_getschedparam(pthread_t thread, int *policy,
struct sched_param *param);
Linux 系统为我们提供了上面的两个api来设置和获取一个已经存在的线程的调度策略和调度优先级,实验1:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <pthread.h>
#include <sched.h>
pthread_mutex_t mutex;
int set_realtime_priority()
{
int ret,policy;
struct sched_param params;
pthread_t this_thread = pthread_self();
printf("\nset realtime priority for thread policy: SCHED_FIFO prio: 1\n" );
params.sched_priority = 10;
ret = pthread_setschedparam(this_thread, SCHED_FIFO, ¶ms);
if (ret != 0) {
printf( "Unsuccessful in setting thread realtime policy: SCHED_FIFO prio: 1\n");
return 0;
}
}
void *thr_fn2(void *arg)
{
int ret,policy = 0;
pthread_attr_t tattr;
struct sched_param params;
pthread_attr_init(&tattr);
printf("+ start -------------------- child thread -------------------------+\n");
printf("| child thread getshed: |\n");
pthread_t this_thread = pthread_self();
ret = pthread_getschedparam(this_thread, &policy, ¶ms);
if(ret == 0 && policy == SCHED_FIFO) {
printf("| thread thr_fn2 policy is SCHED_FIFO prior is %d |\n", params.sched_priority);
}
printf("+ end --------------------- child thread --------------------------+\n\n");
sleep(1);
pthread_exit(0);
}
void *run(void *arg)
{
int err=0,i=0;
pthread_t tid;
void *tret;
set_realtime_priority();
printf("start create child thread .... \n\n");
err=pthread_create(&tid, NULL, thr_fn2, NULL);
if(err!=0)
{
perror("pthread_create");
exit(-1);
}
err=pthread_join(tid,&tret);
if(err!=0)
{
perror("pthread_join");
exit(-1);
}
printf("thread exit code is %ld\n",(long)tret);
}
int main()
{
int err;
void *tret;
pthread_t thread_id;
err = pthread_create(&thread_id, NULL, run, NULL);
if(err!=0)
{
perror("pthread_create");
exit(-1);
}
err=pthread_join(thread_id,&tret);
return 0;
}
gcc thread_setschedxxx.c -o thread_setschedxxx -lpthread
./thread_setschedxxx
set realtime priority for thread policy: SCHED_FIFO prio: 1
start create child thread ....
+ start -------------------- child thread -------------------------+
| child thread getshed: |
| thread thr_fn2 policy is SCHED_FIFO prior is 10 |
+ end --------------------- child thread --------------------------+
thread exit code is 0
三,pthread_attr_setschedpolicy / pthread_setschedparam
int pthread_attr_setschedparam(pthread_attr_t *attr,
const struct sched_param *param);
int pthread_attr_getschedparam(const pthread_attr_t *attr,
struct sched_param *param);
同样系统也为我们提供了另外一套api,这两个接口是先把相关的属性设置到attr对象上面,在把这个对象作为参数传给pthread_create,从而让新建立的线程具备对应的属性。它与上面第二条里面提到的API区别就是:
1. 需要把policy和priority分开设置(不重要)
2.这套API只能设置还没有创建的线程,对于已经创建的线程就只能用pthread_setschedparam来设置了。
实验2:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <pthread.h>
#include <sched.h>
pthread_mutex_t mutex;
void *thr_fn2(void *arg)
{
int ret,policy = 0;
pthread_attr_t tattr;
struct sched_param params;
pthread_attr_init(&tattr);
printf("+ start -------------------- child thread -------------------------+\n");
printf("| child thread getshed: |\n");
pthread_t this_thread = pthread_self();
ret = pthread_getschedparam(this_thread, &policy, ¶ms);
if(ret == 0 && policy == SCHED_FIFO) {
printf("| thread thr_fn2 policy is SCHED_FIFO prior is %d |\n", params.sched_priority);
}
printf("+ end --------------------- child thread --------------------------+\n\n");
sleep(1);
pthread_exit(0);
}
void *run(void *arg)
{
int err=0,i=0;
pthread_t tid;
void *tret;
pthread_attr_t tattr;
int policy;
struct sched_param param;
pthread_t this_thread = pthread_self();
pthread_getschedparam(this_thread, &policy, ¶m);
printf("\nrun thread policy is %d prior is %d\n\n", policy, param.sched_priority);
sleep(1);
printf("start create child thread .... \n\n");
//pthread_attr_init(&tattr);
//err=pthread_attr_setinheritsched(&tattr, PTHREAD_EXPLICIT_SCHED);
//err=pthread_create(&tid[i], &tattr,thr_fn2, &rank[i]);
err=pthread_create(&tid, NULL, thr_fn2, NULL);
if(err!=0)
{
perror("pthread_create");
exit(-1);
}
//pthread_attr_destroy(&tattr);
err=pthread_join(tid,&tret);
if(err!=0)
{
perror("pthread_join");
exit(-1);
}
printf("thread exit code is %ld\n",(long)tret);
}
int main()
{
int err, policy;
pthread_attr_t tattr;
struct sched_param param;
void *tret;
pthread_t thread_id;
err = pthread_attr_init(&tattr);
param.sched_priority=10;
err = pthread_attr_setschedpolicy(&tattr, SCHED_FIFO);
if(err != 0)
{
printf("failed to set schedpolicy\n");
}
err = pthread_attr_setschedparam(&tattr, ¶m);
if(err != 0)
{
printf("failed to set schedparam\n");
}
err=pthread_attr_setinheritsched(&tattr, PTHREAD_EXPLICIT_SCHED);
if(err != 0)
{
printf("failed to set inheritsched\n");
}
err = pthread_create(&thread_id, &tattr, run, NULL);
if(err!=0)
{
perror("pthread_create");
exit(-1);
}
pthread_attr_destroy(&tattr);
err=pthread_join(thread_id,&tret);
return 0;
}
gcc thread_attr_inherit.c -o thread_attr_inherit -lpthread
./thread_attr_inherit
run thread policy is 1 prior is 10
start create child thread ....
+ start -------------------- child thread -------------------------+
| child thread getshed: |
| thread thr_fn2 policy is 1 prior is 10 |
+ end --------------------- child thread --------------------------+
thread exit code is 0
注意:
- 我们在使用pthread_attr_setschedxxx接口的时候必须同时把attr对象的inheritsched属性设置为“PTHREAD_EXPLICIT_SCHED”,否则子线程将继承来自父亲线程的所有属性。
- 一般要用pthread_getschedparam接口来获取调度策略policy和调度优先级param.sched_priority。若使用pthread_attr_getschedxxx获取调度属性的话必须使用创建线程时使用的attr对象,否则获取不到。
四,权限问题
很多人估计跟我一样,看到这么好用的功能,赶紧写个demo程序验证一下,但是悲剧的是如果使用pthread_setschedpolicy会报错:
pthread_setschedparam: Operation not permitted
使用pthread_attr_setschedxxx则会报错:
pthread_create: Operation not permitted
经过一番搜索和实验,发现有以下解决方案:
解决方案:
网络解决方案:在宿主机上执行“sysctl -w kernel.sched_rt_runtime_us=-1”
却是可以解决问题,但是这个命令改变了全局的sched_rt_runtime_us这样会有风险,可能导致别的线程拿不到时间片,所以不推荐!
非容器环境
对于宿主机上面的线程,我们可以通过把父亲线程的pid添加到/sys/fs/cgroup/cpu/tasks来解决
sprintf(cmd, "echo %d >> /sys/fs/cgroup/cpu/tasks", getpid());
system(cmd);
s = pthread_create(&thread, attrp, &thread_start, NULL);
容器环境
在宿主机上面设置:
echo 950000 > /sys/fs/cgroup/cpu/system.slice/cpu.rt_runtime_us
echo 200000 > /sys/fs/cgroup/cpu/system.slice/docker-287a7b24d1e0fd73cbb04f19bea13e6e17d7aead59d6e84ff0025f752ea8a01d.scope/cpu.rt_runtime_us
上面这一步可以替换为在容器里面执行
echo 200000 > /sys/fs/cgroup/cpu/cpu.rt_runtime_us //分配200000给容器的cpu.rt_runtime_us
如果设置的时候权限不允许的话,可以把当前的shell添加到tasks里面
echo $$ >> /sys/fs/cgroup/cpu/tasks
五,属性继承的困扰
从上面两个例子种可以看到,子线程会继承父亲线程的线程属性,如果我们不想让其继承父线程的属性,我们可以通过下面的接口来实现:
#include <pthread.h>
int pthread_attr_setinheritsched(pthread_attr_t *attr,
int inheritsched);
inherit 缺省值是 PTHREAD_INHERIT_SCHED 表示新建的线程将继承创建者线程中定义的调度策略。将忽略在 pthread_create() 调用中定义的所有调度属性。如果使用值PTHREAD_EXPLICIT_SCHED,则将使用 pthread_create() 调用中的属性。
实验3:
我们发现虽然调用了pthread_attr_setinheritsched,但是仍然继承了父线程的属性,仔细看man page,发现了一个已知bug:
As at glibc 2.8, if a thread attributes object is initialized using pthread_attr_init(3), then the
scheduling policy of the attributes object is set to SCHED_OTHER and the scheduling
priority is set to 0. However, if the inherit-scheduler attribute is then set to
PTHREAD_EXPLICIT_SCHED, then a thread created using the attribute object wrongly
inherits its scheduling attributes from the creating thread. This bug does not occur
if either the scheduling policy or scheduling priority attribute is explicitly set in
the thread attributes object before calling pthread_create(3)
所以我们必须要显示指定attr的属性:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <pthread.h>
#include <sched.h>
pthread_mutex_t mutex;
int set_realtime_priority()
{
int ret,policy;
struct sched_param params;
pthread_t this_thread = pthread_self();
printf("\nset realtime priority for thread policy: SCHED_FIFO prio: 1\n" );
params.sched_priority = 10;
ret = pthread_setschedparam(this_thread, SCHED_FIFO, ¶ms);
if (ret != 0) {
printf( "Unsuccessful in setting thread realtime policy: SCHED_FIFO prio: 1\n");
return 0;
}
}
void *thr_fn2(void *arg)
{
int ret,policy = 0;
struct sched_param params;
pthread_t this_thread = pthread_self();
printf("+ start -------------------- child thread -------------------------+\n");
printf("| child thread getshed: |\n");
ret = pthread_getschedparam(this_thread, &policy, ¶ms);
if(ret == 0 && policy == SCHED_FIFO) {
printf("| thread thr_fn2 policy is %d prior is %d |\n", policy, params.sched_priority);
}
printf("+ end --------------------- child thread --------------------------+\n\n");
sleep(1);
pthread_exit(0);
}
void *run(void *arg)
{
int err=0,i=0;
pthread_t tid;
void *tret;
pthread_attr_t tattr;
int policy;
struct sched_param param;
pthread_t this_thread = pthread_self();
pthread_getschedparam(this_thread, &policy, ¶m);
printf("\nrun thread policy is %d prior is %d\n\n", policy, param.sched_priority);
sleep(1);
printf("start create child thread .... \n\n");
pthread_attr_init(&tattr);
err = pthread_attr_setschedpolicy(&tattr, SCHED_OTHER);
if(err!=0)
{
printf("failed to set policy\n");
}
param.sched_priority = 0;
err=pthread_attr_setschedparam(&tattr,¶m);
if(err!=0)
{
printf("failed to set priority\n");
}
err=pthread_attr_setinheritsched(&tattr, PTHREAD_EXPLICIT_SCHED);
if(err!=0)
{
printf("failed to setinheritsched\n");
}
err=pthread_create(&tid, &tattr,thr_fn2, NULL);
if(err!=0)
{
perror("pthread_create");
exit(-1);
}
pthread_attr_destroy(&tattr);
err=pthread_join(tid,&tret);
if(err!=0)
{
perror("pthread_join");
exit(-1);
}
printf("thread exit code is %ld\n",(long)tret);
}
int main()
{
int err, policy;
void *tret;
pthread_t thread_id;
// set realtime policy and priority
set_realtime_priority();
err = pthread_create(&thread_id, NULL, run, NULL);
if(err!=0)
{
perror("pthread_create");
exit(-1);
}
err=pthread_join(thread_id,&tret);
return 0;
}
编译执行结果:
set realtime priority for thread policy: SCHED_FIFO prio: 1
run thread policy is 1 prior is 10
start create child thread ....
+ start -------------------- child thread -------------------------+
| child thread getshed: |
+ end --------------------- child thread --------------------------+
thread exit code is 0
chrt小工具
我们可以使用这个小工具来动态设置现成的调度策略和优先级,比如:
chrt -p 1 14790 //设置一个线程的调度策略为SCHED_RR且优先级为-2
14790 root -2 0 648936 194452 8424 S 0.0 2.5 0:00.05 ivan_prio
chrt -p 99 14790 //设置一个线程的调度策略为SCHED_RR且优先级为RT
14790 root rt 0 648936 194452 8424 S 0.0 2.5 0:00.05 ivan_prio
chrt -f -p 1 14790 //设置一个线程的调度策略为SCHED_FIFO且优先级为-2
14790 root -2 0 648936 194452 8424 S 0.0 2.5 0:00.05 ivan_prio
chrt -o -p 0 14790 //设置一个线程的调度策略为SCHED_OTHER且优先级为20,如果设置SCHED_OTHER策略时,优先级只能指定‘0’,在top种PR显示为20
14790 root 20 0 648936 194900 8424 S 0.0 2.5 0:00.05 ivan_prio
https://blog.csdn.net/u010317005/article/details/80531985
https://www.cnblogs.com/tmpt/p/3603561.html
参考文献
<1> https://blog.csdn.net/xiaoyeyopulei/article/details/7965840
<2> http://man7.org/linux/man-pages/man3/pthread_setschedparam.3.html
<3> http://man7.org/linux/man-pages/man3/pthread_attr_setschedparam.3.html
<4> http://man7.org/linux/man-pages/man3/pthread_attr_setinheritsched.3.html
<5> https://github.com/coreos/bugs/issues/410