读取文件的多个线程:pread或多个fds

时间:2016-12-29 01:03:37

标签: c multithreading performance

我有一个多线程程序,我希望从同一个文件中读取每个线程并拥有相同的数据

我知道pread(2)它是线程安全的;但是,我关心的是速度,而不是记忆。

使用pread从同一个文件描述符读取多个线程会更快吗?或者每个单独的线程将同一个文件描述符放到同一个文件并从那里读取会更快吗?

我的想法是,如果pread是原子的,那么就会阻止另一个线程同时读取;但是,如果拥有 lot 的线程,每个线程都有自己的fd,并且操作系统需要为每次读取提供服务,那么所有上下文切换可能会占用更多时间。

1 个答案:

答案 0 :(得分:0)

出于同样的要求,我为此做了一个测试。根据对SSD和HDD的测试,
pread - 提高读取速度,但如果使用相同的 FD 进行写入,写入会变慢。
read(使用单独的 FD) - 读取操作比 pread 慢。但它不会影响写操作(使用单独的 FD 完成)。

所以最好的选择是,

  • 使用单独的 FD 进行写入(只写模式)
  • 打开单个 FD(只读模式)并使用 pread 在多个线程中使用它

SSD 上的结果(读/写计数 10,000,000)

<头>
- pread(用于读写的同一个FD) 使用不同的FD读取
阅读 25 秒 + 27 秒 +
38 秒 + 33 秒+

用于测试的代码

#include <string.h>
#include <iostream>

#include <unistd.h>

#include <sstream>  

#include <string.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <time.h>
#include <errno.h>

#include <assert.h>
#include <sys/time.h>

#include <memory>
#include <atomic>
#include <thread>

off_t tell(int fd)
{
    return lseek(fd, 0, SEEK_CUR);
}

inline int64_t getMicroSecDiff(const timeval& begin, const timeval& end)
{
    constexpr uint32_t MicroSecPerSec = 1000 * 1000;
    return (end.tv_sec - begin.tv_sec) * MicroSecPerSec + (end.tv_usec - begin.tv_usec);
}

inline int64_t getMicroSecDiffFrom(const timeval& begin)
{
    timeval end;
    gettimeofday(&end, nullptr);

    return getMicroSecDiff(begin, end);
}

using namespace std;


int g_fileFd_A = -1;
int g_fileFd_B = -1;

std::atomic<int64_t> g_lastFileAPos = { -1 };
std::atomic<int64_t> g_lastFileBPos = { -1 };

std::atomic<bool> g_writeCompleted = { false };

const int WriteSz = 1;

uint64_t g_writeCount = 0;

void writer(int fdA, int fdB)
{
    timeval begin;
    gettimeofday(&begin, nullptr);
    for (size_t i = 0; i < g_writeCount; i++)
    {
        int64_t fileAPosition = tell(fdA);
        int64_t fileBPosition = tell(fdB);

        auto written = write(fdA, "A", WriteSz);
        assert(written == WriteSz);
        written = write(fdB, "b", WriteSz);
        assert(written == WriteSz);

        g_lastFileAPos.store(fileAPosition, std::memory_order_relaxed);
        g_lastFileBPos.store(fileBPosition, std::memory_order_relaxed);
    }
    g_writeCompleted = true;

    auto diff = getMicroSecDiffFrom(begin);
    std::ostringstream oss;
    oss << "Write time: " << diff / 1000000 << " sec " << diff % 1000000 << " us" << endl;
    cout << oss.str();

}

void reader(bool duplicateFds)
{
    char bufferA[8];
    char bufferB[8];
    int failedCount = 0;

    int fdA, fdB;
    if (duplicateFds)
    {
        fdA = open("./A", O_RDONLY);
        fdB = open("./B", O_RDONLY);
    }
    else
    {
        fdA = g_fileFd_A;
        fdB = g_fileFd_B;
    }

    timeval begin;
    gettimeofday(&begin, nullptr);
    size_t iteNo = 0;
    //for (; g_writeCompleted.load(memory_order_relaxed) == false; iteNo++)
    for (; iteNo < g_writeCount; iteNo++)
    {
        off_t posA = g_lastFileAPos.load(std::memory_order_relaxed);
        off_t posB = g_lastFileBPos.load(std::memory_order_relaxed);

        if (posA < 0 or posB < 0)
        {
            iteNo--;
            gettimeofday(&begin, nullptr);
            continue;
        }

        int readSzA;
        int readSzB;

        off_t readPosA = rand();
        off_t readPosB = rand();

        if (readPosA > posA)
        {
            readPosA = posA;
        }

        if (readPosB > posB)
        {
            readPosB = posB;
        }
                
        if (duplicateFds)
        {
            lseek(fdA, readPosA, SEEK_SET);
            lseek(fdB, readPosB, SEEK_SET);

            readSzA = read(fdA, bufferA, WriteSz);
            readSzB = read(fdB, bufferB, WriteSz);
        }
        else
        {
            readSzA = pread(fdA, bufferA, WriteSz, readPosA);
            readSzB = pread(fdB, bufferB, WriteSz, readPosB);
        }

        if (readSzA < WriteSz or readSzB < WriteSz)
        {
            failedCount++;

            if (failedCount % 1000 == 0)
            {
                cout << " " << failedCount;
                cout.flush();
            }
        }
    }

    auto diff = getMicroSecDiffFrom(begin);
    std::ostringstream oss;
    oss << "Read time: " << diff / 1000000 << " sec " << diff % 1000000 << " us"  << " ReadCount=" << iteNo << endl;
    oss << "failedCount=" << failedCount << endl;
    cout << oss.str();

    if (duplicateFds)
    {
        close(fdA);
        close(fdB);
    }
}



int main (int argc, char** argv) 
{
    if (argc < 3)
    {
        cout << "usage: " << argv[0] << " duplicateFds" << " write-count" << endl;
        return 0;
    }

    bool duplicateFds = atoi(argv[1]);
    g_writeCount = atoi(argv[2]);
    cout << "duplicateFds=" << duplicateFds << " write-count=" << g_writeCount << endl;

    if (duplicateFds)
    {
        g_fileFd_A = open("./A", O_WRONLY | O_CREAT, S_IREAD | S_IWRITE | S_IRGRP | S_IROTH);
        g_fileFd_B = open("./B", O_WRONLY | O_CREAT, S_IREAD | S_IWRITE | S_IRGRP | S_IROTH);
    }
    else 
    {
        g_fileFd_A = open("./A", O_RDWR | O_CREAT, S_IREAD | S_IWRITE | S_IRGRP | S_IROTH);
        g_fileFd_B = open("./B", O_RDWR | O_CREAT, S_IREAD | S_IWRITE | S_IRGRP | S_IROTH);
    }

    std::thread readerThread1(reader, duplicateFds);
    std::thread readerThread2(reader, duplicateFds);
    std::thread writerThread(writer, g_fileFd_A, g_fileFd_B);

    readerThread1.join();
    readerThread2.join();
    writerThread.join();

    close(g_fileFd_A);
    close(g_fileFd_B);

    return 0;
}