从numpy数组中组装cython memoryview

时间:2012-05-05 19:07:53

标签: cython

我有一堆numpy数组作为python对象数组的属性,在cython中,为prange处理做准备(需要nogil),我想创建一个内存视图"间接"在第一维中,其进一步的维度引用了numpy数组中的数据。因此,假设objects是一个具有vector属性的对象列表。

我想做类似的事情:

cdef double[ ::cython.view.indirect, ::1 ] vectors
for object in objects:
    vectors[ i ] = object.vector

但是我应该如何初始化"载体"这可能吗?如果有可能的话?或者也许内存视图只允许作为一个对象的内存视图......在这种情况下还有另一个问题 - 如何动态创建一个内存视图数组?

1 个答案:

答案 0 :(得分:5)

使用以下代码,您将使用此分配:

cimport stackoverflow_contrib

cdef double[::cython.view.indirect, ::1] vectors =
    stackoverflow_contrib.OnceIndirect([object.vector for object in objects])

其中stackoverflow_contrib.pyx如下:

from libc.stdlib cimport malloc, free
from libc.string cimport strcmp

from cython.view cimport memoryview
from cpython cimport buffer

cdef class OnceIndirect:
    cdef object _objects
    cdef void** buf
    cdef int ndim
    cdef int n_rows
    cdef int buf_len
    cdef Py_ssize_t* shape
    cdef Py_ssize_t* strides
    cdef Py_ssize_t* suboffsets
    cdef Py_ssize_t itemsize
    cdef bytes format
    cdef int is_readonly

    def __cinit__(self, object rows, want_writable=True, want_format=True, allow_indirect=False):
        """
        Set want_writable to False if you don't want writable data. (This may
        prevent copies.)
        Set want_format to False if your input doesn't support PyBUF_FORMAT (unlikely)
        Set allow_indirect to True if you are ok with the memoryview being indirect
        in dimensions other than the first. (This may prevent copies.)
        """
        demand = buffer.PyBUF_INDIRECT if allow_indirect else buffer.PyBUF_STRIDES
        if want_writable:
            demand |= buffer.PyBUF_WRITABLE
        if want_format:
            demand |= buffer.PyBUF_FORMAT
        self._objects = [memoryview(row, demand) for row in rows]
        self.n_rows = len(self._objects)
        self.buf_len = sizeof(void*) * self.n_rows
        self.buf = <void**>malloc(self.buf_len)
        self.ndim = 1 + self._objects[0].ndim
        self.shape = <Py_ssize_t*>malloc(sizeof(Py_ssize_t) * self.ndim)
        self.strides = <Py_ssize_t*>malloc(sizeof(Py_ssize_t) * self.ndim)
        self.suboffsets = <Py_ssize_t*>malloc(sizeof(Py_ssize_t) * self.ndim)

        cdef memoryview example_obj = self._objects[0]
        self.itemsize = example_obj.itemsize

        if want_format:
            self.format = example_obj.view.format
        else:
            self.format = None
        self.is_readonly |= example_obj.view.readonly

        for dim in range(self.ndim):
            if dim == 0:
                self.shape[dim] = self.n_rows
                self.strides[dim] = sizeof(void*)
                self.suboffsets[dim] = 0
            else:
                self.shape[dim] = example_obj.view.shape[dim - 1]
                self.strides[dim] = example_obj.view.strides[dim - 1]
                if example_obj.view.suboffsets == NULL:
                    self.suboffsets[dim] = -1
                else:
                    self.suboffsets[dim] = example_obj.suboffsets[dim - 1]

        cdef memoryview obj
        cdef int i = 0
        for obj in self._objects:
            assert_similar(example_obj, obj)
            self.buf[i] = obj.view.buf
            i += 1

    def __getbuffer__(self, Py_buffer* buff, int flags):
        if (flags & buffer.PyBUF_INDIRECT) != buffer.PyBUF_INDIRECT:
            raise Exception("don't want to copy data")
        if flags & buffer.PyBUF_WRITABLE and self.is_readonly:
            raise Exception("couldn't provide writable, you should have demanded it earlier")
        if flags & buffer.PyBUF_FORMAT:
            if self.format is None:
                raise Exception("couldn't provide format, you should have demanded it earlier")
            buff.format = self.format
        else:
            buff.format = NULL

        buff.buf = <void*>self.buf
        buff.obj = self
        buff.len = self.buf_len
        buff.readonly = self.is_readonly
        buff.ndim = self.ndim
        buff.shape = self.shape
        buff.strides = self.strides
        buff.suboffsets = self.suboffsets
        buff.itemsize = self.itemsize
        buff.internal = NULL

    def __dealloc__(self):
        free(self.buf)
        free(self.shape)
        free(self.strides)
        free(self.suboffsets)

cdef int assert_similar(memoryview left_, memoryview right_) except -1:
    cdef Py_buffer left = left_.view
    cdef Py_buffer right = right_.view
    assert left.ndim == right.ndim
    cdef int i
    for i in range(left.ndim):
        assert left.shape[i] == right.shape[i], (left_.shape, right_.shape)
        assert left.strides[i] == right.strides[i], (left_.strides, right_.strides)

    if left.suboffsets == NULL:
        assert right.suboffsets == NULL, (left_.suboffsets, right_.suboffsets)
    else:
        for i in range(left.ndim):
            assert left.suboffsets[i] == right.suboffsets[i], (left_.suboffsets, right_.suboffsets)

    if left.format == NULL:
        assert right.format == NULL, (bytes(left.format), bytes(right.format))
    else:
        #alternatively, compare as Python strings:
        #assert bytes(left.format) == bytes(right.format)
        assert strcmp(left.format, right.format) == 0, (bytes(left.format), bytes(right.format))
    return 0

from cython cimport view

cimport numpy as np
import numpy as np

def show_memoryview(object x):
    print dict(shape=x.shape, strides=x.strides, suboffsets=x.suboffsets, itemsize=x.itemsize)

def go():
    row0 = np.array(range(20), dtype=np.float64).reshape(2, 10)
    row1 = np.array(range(20, 40), dtype=np.float64).reshape(2, 10)
    row2 = np.array(range(40, 60), dtype=np.float64).reshape(2, 10)
    small_view = memoryview(row0, buffer.PyBUF_STRIDES)
    show_memoryview(small_view)
    rows = [row0, row1, row2]
        big_view = OnceIndirect(rows)
    cdef double[::view.indirect, :, :] big_view2 = big_view
    cdef int i, j, k
    show_memoryview(big_view2)
    print row1
    big_view2[1, 0, 1] += 200
    print row1
    cdef double[:, :] row1_view = big_view2[1]
    assert row1_view[0, 1] >= 200
    cdef double[::view.indirect, :, :] big_view3 = OnceIndirect([row0, row1, row0])
    cdef double[::view.indirect, ::view.indirect, :, :] dub = OnceIndirect([big_view2, big_view3], allow_indirect=True)
    show_memoryview(dub)
        # big_view2 can be indexed and sliced in Cython and Python code
        # note big_view2 is a cython memoryview object not a OnceIndirect object because it was implicitly cast to one
        # rows, big_view, big_view2 all refer to the same data!
    return (rows, big_view, big_view2)