Question

我有两个简单的微基准测试试图测量线程和进程切换开销，但是进程切换开销比线程切换的更低，这是出乎意料的。设置：1.8GHz Core 2 Duo，2GB RAM，Linux 2.6.32-21-通用x86_64（Ubuntu 10.04）。我得到了：

~2.1-2.4us
~4us

我还尝试使用numactl --physcpubind=0和likwid-pin -c0运行，但这似乎只会减慢线程切换到5us。有人知道评估有什么问题，或者这些结果是否合适？

代码位于以下URL，并且r1667粘贴在此处：

https://assorted.svn.sourceforge.net/svnroot/assorted/sandbox/trunk/src/c/process_switch_bench.c

// on zs, ~2.1-2.4us/switch

#include <stdlib.h>
#include <fcntl.h>
#include <stdint.h>
#include <stdio.h>
#include <semaphore.h>
#include <unistd.h>
#include <sys/wait.h>
#include <sys/types.h>
#include <sys/time.h>
#include <pthread.h>

uint32_t COUNTER;
pthread_mutex_t LOCK;
pthread_mutex_t START;
sem_t *s0, *s1, *s2;

void * threads (
    void * unused
) {
    // Wait till we may fire away
    sem_wait(s2);

    for (;;) {
        pthread_mutex_lock(&LOCK);
        pthread_mutex_unlock(&LOCK);
        COUNTER++;
        sem_post(s0);
        sem_wait(s1);
    }
    return 0;
}

int64_t timeInMS ()
{
    struct timeval t;

    gettimeofday(&t, NULL);
    return (
        (int64_t)t.tv_sec * 1000 +
        (int64_t)t.tv_usec / 1000
    );
}

int main (
    int argc,
    char ** argv
) {
    int64_t start;
    pthread_t t1;

    pthread_mutex_init(&LOCK, NULL);

    COUNTER = 0;
    s0 = sem_open("/s0", O_CREAT, 0022, 0);
    if (s0 == 0) { perror("sem_open"); exit(1); }
    s1 = sem_open("/s1", O_CREAT, 0022, 0);
    if (s1 == 0) { perror("sem_open"); exit(1); }
    s2 = sem_open("/s2", O_CREAT, 0022, 0);
    if (s2 == 0) { perror("sem_open"); exit(1); }

    int x, y, z;
    sem_getvalue(s0, &x);
    sem_getvalue(s1, &y);
    sem_getvalue(s2, &z);
    printf("%d %d %d\n", x, y, z);

    pid_t pid = fork();
    if (pid) {
      pthread_create(&t1, NULL, threads, NULL);
      pthread_detach(t1);
      // Get start time and fire away
      start = timeInMS();
      sem_post(s2);
      sem_post(s2);

      // Wait for about a second
      sleep(1);
      // Stop thread
      pthread_mutex_lock(&LOCK);

      // Find out how much time has really passed. sleep won't guarantee me that
      // I sleep exactly one second, I might sleep longer since even after being
      // woken up, it can take some time before I gain back CPU time. Further
      // some more time might have passed before I obtained the lock!
      int64_t time = timeInMS() - start;
      // Correct the number of thread switches accordingly
      COUNTER = (uint32_t)(((uint64_t)COUNTER * 2 * 1000) / time);
      printf("Number of process switches in about one second was %u\n", COUNTER);
      printf("roughly %f microseconds per switch\n", 1000000.0 / COUNTER);

      // clean up
      kill(pid, 9);
      wait(0);
      sem_close(s0);
      sem_close(s1);
      sem_unlink("/s0");
      sem_unlink("/s1");
      sem_unlink("/s2");
    } else {
      if (1) { sem_t *t = s0; s0 = s1; s1 = t; }
      threads(0); // never return
    }
    return 0;
}

https://assorted.svn.sourceforge.net/svnroot/assorted/sandbox/trunk/src/c/thread_switch_bench.c

// From <http://stackoverflow.com/questions/304752/how-to-estimate-the-thread-context-switching-overhead>

// on zs, ~4-5us/switch; tried making COUNTER updated only by one thread, but no difference

#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>
#include <pthread.h>
#include <unistd.h>
#include <sys/time.h>

uint32_t COUNTER;
pthread_mutex_t LOCK;
pthread_mutex_t START;
pthread_cond_t CONDITION;

void * threads (
    void * unused
) {
    // Wait till we may fire away
    pthread_mutex_lock(&START);
    pthread_mutex_unlock(&START);
    int first=1;

    pthread_mutex_lock(&LOCK);
    // If I'm not the first thread, the other thread is already waiting on
    // the condition, thus Ihave to wake it up first, otherwise we'll deadlock
    if (COUNTER > 0) {
        pthread_cond_signal(&CONDITION);
        first=0;
    }
    for (;;) {
        if (first) COUNTER++;
        pthread_cond_wait(&CONDITION, &LOCK);
        // Always wake up the other thread before processing. The other
        // thread will not be able to do anything as long as I don't go
        // back to sleep first.
        pthread_cond_signal(&CONDITION);
    }
    pthread_mutex_unlock(&LOCK);
    return 0;
}

int64_t timeInMS ()
{
    struct timeval t;

    gettimeofday(&t, NULL);
    return (
        (int64_t)t.tv_sec * 1000 +
        (int64_t)t.tv_usec / 1000
    );
}


int main (
    int argc,
    char ** argv
) {
    int64_t start;
    pthread_t t1;
    pthread_t t2;

    pthread_mutex_init(&LOCK, NULL);
    pthread_mutex_init(&START, NULL);   
    pthread_cond_init(&CONDITION, NULL);

    pthread_mutex_lock(&START);
    COUNTER = 0;
    pthread_create(&t1, NULL, threads, NULL);
    pthread_create(&t2, NULL, threads, NULL);
    pthread_detach(t1);
    pthread_detach(t2);
    // Get start time and fire away
    start = timeInMS();
    pthread_mutex_unlock(&START);
    // Wait for about a second
    sleep(1);
    // Stop both threads
    pthread_mutex_lock(&LOCK);
    // Find out how much time has really passed. sleep won't guarantee me that
    // I sleep exactly one second, I might sleep longer since even after being
    // woken up, it can take some time before I gain back CPU time. Further
    // some more time might have passed before I obtained the lock!
    int64_t time = timeInMS() - start;
    // Correct the number of thread switches accordingly
    COUNTER = (uint32_t)(((uint64_t)COUNTER * 2 * 1000) / time);
    printf("Number of thread switches in about one second was %u\n", COUNTER);
    printf("roughly %f microseconds per switch\n", 1000000.0 / COUNTER);
    return 0;
}

Answer 1

简单：pthread_mutex_lock（）在您的系统上大约需要2ms，并且您的线程版本每次通过循环都会获得两个锁，而进程版本只需要一个锁。

Answer 2

历史上，Unix（以及Linux衍生产品）具有相对便宜的fork()，因此创建流程不是问题，并且并发处理（并且仍然主要是）使用多个流程完成。

后来出现了其他操作系统（不想调用名称），这些操作系统在创建进程时非常繁重，因此处理这些操作系统的人必须发明线程，这些非常“轻松”的进程因此引入了全新的并发问题。

UNIX / Linux世界也引入了线程，尽管并不是真的需要。但是对Linux中的线程的支持有些限制 - 一个进程的线程必须共享一个核心，因此在许多情况下Linux上的多进程环境比多线程更快，这可能是您获得结果的原因。

Microbenchmark显示流程切换比线程切换更快;怎么了？

2 个答案: