Question

我正在尝试在我创建的Linux驱动程序中同步两个操作。本质上，我需要使用DMAEngine在两个不同的DMA通道上执行功能，一个用于发送数据，另一个用于接收数据。目前，在我的驱动程序中，我可以通过DMA将数据从发送通道发送到接收通道。

此内部驱动程序测试如下所示

#define TEST_SIZE 1024

struct dma_proxy_channel_interface {
    unsigned char buffer[TEST_SIZE];
    enum proxy_status { PROXY_NO_ERROR = 0, PROXY_BUSY = 1, PROXY_TIMEOUT = 2, PROXY_ERROR = 3 } status;
    unsigned int length;
};

struct dma_proxy_channel {
    struct dma_proxy_channel_interface *interface_p;    /* user to kernel space interface */
    dma_addr_t interface_phys_addr;         

    struct device *proxy_device_p;              /* character device support */
    struct device *dma_device_p;
    dev_t dev_node;
    struct cdev cdev;
    struct class *class_p;

    struct dma_chan *channel_p;             /* dma support */
    struct completion cmp;
    dma_cookie_t cookie;
    dma_addr_t dma_handle;
    u32 direction;                      /* DMA_MEM_TO_DEV or DMA_DEV_TO_MEM */
};

static struct dma_proxy_channel channels[2];


static void tx_test(struct work_struct *unused)
{
    transfer(&channels[0]);
}
static void test(void)
{
    int i;
    const int test_size = 1024;
    struct work_struct work;

    /* Initialize the transmit buffer with a pattern and then start
     * the seperate thread of control to handle the transmit transfer
     * since the functions block waiting for the transfer to complete.
     */
    for (i = 0; i < test_size; i++) {
        channels[0].interface_p->buffer[i] = i;
    }

    channels[0].interface_p->length = test_size;

    INIT_WORK(&work, tx_test);
    schedule_work(&work);

    /* Initialize the receive buffer with zeroes so that we can be sure
     * the transfer worked, then start the receive transfer.
     */
    for (i = 0; i < test_size; i++) {
        channels[1].interface_p->buffer[i] = 0;
    }

    channels[1].interface_p->length = test_size;
    transfer(&channels[1]);

    /* Verify the receiver buffer matches the transmit buffer to 
     * verify the transfer was good
     */
    for (i = 0; i < test_size; i++) {
        printk(KERN_INFO "tx: %d\trx: %d\n", channels[0].interface_p->buffer[i], channels[1].interface_p->buffer[i]);

        if (channels[0].interface_p->buffer[i] !=
            channels[1].interface_p->buffer[i])
            printk("buffer not equal, index = %d\n", i);
    }
}

传递函数包括对DMAEngine的调用，不仅执行传输，还执行通道的大部分设置。这些函数包括dma_map_single，dmaengine_prep_slave_single，为通道设置回调，init_completion，dmaengine_submit，dma_async_issue_pending，wait_for_completion_timeout和dma_async_is_tx_complete。

根据我的理解，这些功能几乎需要在每个通道准备好与另一个通道相同的时间执行。使用驱动程序中的工作队列可以实现此目的。

现在的问题是，当我创建一个字符设备驱动程序来执行此操作时，每个通道都有自己的设备文件。该结构是这样的：对每个通道执行mmap，以便可以为每个通道的用户空间设置和改变dma_proxy_channel-＆gt; interface_p的缓冲区和长度属性。然后进行ioctl调用，该调用基本上从mmap设置的字符驱动程序中获取私有数据，并将其泵入您在上面看到的传递函数中。实际上，这些呼叫也必须同步。在我收到的示例中，作者尝试使用pthread来同步ioctl调用。这是他的代码。

static struct dma_proxy_channel_interface *tx_proxy_interface_p;
static int tx_proxy_fd;
/* The following function is the transmit thread to allow the transmit and the 
 * receive channels to be operating simultaneously. The ioctl calls are blocking
 * such that a thread is needed.
 */
void *tx_thread()
{
    int dummy, i;

    /* Set up the length for the DMA transfer and initialize the transmit
     * buffer to a known pattern.
     */
    tx_proxy_interface_p->length = TEST_SIZE;

        for (i = 0; i < TEST_SIZE; i++)
            tx_proxy_interface_p->buffer[i] = i;

    /* Perform the DMA transfer and the check the status after it completes
     * as the call blocks til the transfer is done.
     */
    printf("tx ioctl user space\n");
    ioctl(tx_proxy_fd, 0, &dummy);

    if (tx_proxy_interface_p->status != PROXY_NO_ERROR)
        printf("Proxy tx transfer error\n");
}

/* The following function uses the dma proxy device driver to perform DMA transfers
 * from user space. This app and the driver are tested with a system containing an
 * AXI DMA without scatter gather and with transmit looped back to receive.
 */
int main(int argc, char *argv[])
{   
    struct dma_proxy_channel_interface *rx_proxy_interface_p;
    int rx_proxy_fd, i;
    int dummy;
    pthread_t tid;

    printf("DMA proxy test\n");

    /* Step 1, open the DMA proxy device for the transmit and receive channels with
     * read/write permissions
     */     

    tx_proxy_fd = open("/dev/dma_proxy_tx", O_RDWR);

    if (tx_proxy_fd < 1) {
        printf("Unable to open DMA proxy device file\n");
        return -1;
    }

    rx_proxy_fd = open("/dev/dma_proxy_rx", O_RDWR);
    if (tx_proxy_fd < 1) {
        printf("Unable to open DMA proxy device file\n");
        return -1;
    }

    /* Step 2, map the transmit and receive channels memory into user space so it's accessible
     */
    tx_proxy_interface_p = (struct dma_proxy_channel_interface *)mmap(NULL, sizeof(struct dma_proxy_channel_interface), 
                                    PROT_READ | PROT_WRITE, MAP_SHARED, tx_proxy_fd, 0);

    printf("tx_proxy_interface_p: %p\n", (void*)tx_proxy_interface_p);

    rx_proxy_interface_p = (struct dma_proxy_channel_interface *)mmap(NULL, sizeof(struct dma_proxy_channel_interface),
                                    PROT_READ | PROT_WRITE, MAP_SHARED, rx_proxy_fd, 0);

        printf("rx_proxy_interface_p: %p\n", (void*)rx_proxy_interface_p);

        if ((rx_proxy_interface_p == MAP_FAILED) || (tx_proxy_interface_p == MAP_FAILED)) {
            printf("Failed to mmap\n");
            return -1;
        }

    /* Create the thread for the transmit processing and then wait a second so the printf output is not 
     * intermingled with the receive processing
     */
    pthread_create(&tid, NULL, tx_thread, NULL);    
    sleep(1);

    /* Initialize the receive buffer so that it can be verified after the transfer is done
     * and setup the size of the transfer for the receive channel
     */ 
    for (i = 0; i < TEST_SIZE; i++)
        rx_proxy_interface_p->buffer[i] = 0;

        rx_proxy_interface_p->length = TEST_SIZE;
    printf("rx_proxy_interface_p->length: %d\n", rx_proxy_interface_p->length); 
    /* Step 3, Perform the DMA transfer and after it finishes check the status 
     */

    printf("rx ioctl user space\n");
    ioctl(rx_proxy_fd, 0, &dummy);

    if (rx_proxy_interface_p->status != PROXY_NO_ERROR)
        printf("Proxy rx transfer error\n");

    /* Verify the data recieved matchs what was sent (tx is looped back to tx)
     */
    for (i = 0; i < TEST_SIZE; i++) {
        printf("tx: %d\trx: %d\n", tx_proxy_interface_p->buffer[i], rx_proxy_interface_p->buffer[i]);
//          if (tx_proxy_interface_p->buffer[i] !=
  //                rx_proxy_interface_p->buffer[i])
    //              printf("buffer not equal, index = %d\n", i);
        }

    /* Unmap the proxy channel interface memory and close the device files before leaving
     */
    munmap(tx_proxy_interface_p, sizeof(struct dma_proxy_channel_interface));
    munmap(rx_proxy_interface_p, sizeof(struct dma_proxy_channel_interface));

    close(tx_proxy_fd);
    close(rx_proxy_fd);
    return 0;
}

你注意到他使用了pthread_create和sleep来尝试同步ioctl调用。奇怪的是，在第二次ioctl调用之前在用户空间中设置的rx_proxy_interface_p的数据未映射到内核空间。进行ioctl调用时，与该通道关联的字符驱动程序的私有数据尚未更改。

我的问题是为什么在内核空间中，工作队列似乎同步调用这些通道的传递函数，但在用户空间中，当使用线程时，内核空间中的mmapped位置不是改变了吗？在使用工作队列和线程库之间我应该看到哪些性能差异？

Linux中的Pthread与工作队列

0 个答案: