Cython is a programming language that makes writing C extensions for the Python language as easy as Python itself.
It aims to become a superset of the Python language which gives it high-level, object-oriented, functional, and dynamic programming.
Cython can convet cython code to C/C++ code, then compile it to shared library that can be used with python.
Basic structure
The basic structure of cython files:
# test.pxd
# define functions from C library
# just copy function declaration from .hpp or .h file (without ";")
# next line can define MACROS in converted C file. Next line is not meaningless comment
# distutils: define_macros=NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION
# will be convert to"define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION" in converted C file
cdef extern from "permutation.hpp":
void cmkl_permut(unsigned int* seed_p, float* location_vecs, int* N_SNPs, int N_chros, float* annot_mats, int N_annots, int N_times, float* count_mats, int* RNsCol)
# test.pyx
# define the API for Python to call these C functions
from . cimport test # import test.pxd
import numpy as np # import python's numpy
from libc.stdlib cimport malloc, free # import malloc and free from C
def python_api():
cdef int a = 0 # define a int var "a" in c level
cdef float b = 1 # define a float var "b" in c level
c = 0 # define a int var "c" in Python level
cdef d = c # Assigh Python var's value to C var
cdef int* a_p = &a # Pointer in C
cdef int[2][2] carr_a # create c array
# create c array in pointer style
cdef int carr_b*
carr_b = <int*>malloc(4 * sizeof(int))
cdef [:] mv_a = np.zeros(...) # create memoryview from numpy array
cdef [:,:] mv_b = carr_a # create memory view from C array
cdef int[:] mv_c = <int[:4]>carr_b # create memory view from C pointer style array
# memory view can create without shape
cdef int[:] mv_d
mv_d = np.zeros(...)
# call C function
test.cmkl_permut(...)
free(carr_b) # remenber free alloced memory
Caution: cdef can only lay in first layer of a function. It can’t lay in something like if
, for
and so on.
Memoryview
Memoryview is a Python Object which can used to control memory directly. Numpy and Pands array and dataframe are based on memory as well. In the sametime, Cython provide api to control memoryview as well. So memoryview is a bridge from python object like np.array to C level memory data.
C array <–> memoryview
from libc.stdlib cimport malloc, free
def func():
cdef int[2][2] carr_a
cdef int carr_b*
carr_b = <int*>malloc(4 * sizeof(int))
# to memoryview
cdef int[:,:] mv_a = carr_a
cdef int[:] mv_b = <int[:4]>carr_b
# no copy, memoryview begin to control a part of memory. mv_a and carr_a, mv_b and carr_b, use same part of memory to store data
# to C pointer
cdef int* a_p = &mv_a[0][0]
cdef int* b_p = &mv_b[0]
np.array <–> memoryview
import numpy as np
def func():
cdef int[:] mv_a = np.zeros(...)
np_arr = np.asarray(mv_a, dtype=np.intc)
Compile Cython module
...
from setuptools.extension import Extension
from Cython.Build import cythonize
e = Extension(
"fastpermut.c_conf", # Name of the resulting python extension
sources=["src/fastpermut/pyconf.pyx"], # Cython source file
libraries=["permut_utils"], # Name of the C library without the 'lib' prefix and '.so' suffix
library_dirs=[os.path.expanduser("~/hpcPermut/lib"),
os.path.join(os.environ.get('PERMUT_PATH'),"hpcPermut/lib")], # Path to the directory of your .so file for build time
runtime_library_dirs=[os.path.expanduser("~/hpcPermut/lib"),
os.path.join(os.environ.get('PERMUT_PATH'),"hpcPermut/lib")], # # Path to the directory of your .so file for runtime
language='c++',
include_dirs=[os.path.expanduser("~/hpcPermut/include"),
os.path.join(os.environ.get('PERMUT_PATH'),"hpcPermut/include/common")],
# Path to the directory of your header files
)
setup(
...
ext_modules=cythonize([e], language_level=3),
...
)