22 #ifndef RT_THREADS_INC    23 #define RT_THREADS_INC 1    30 #if defined(USEPOSIXTHREADS) && defined(USEUITHREADS)    31 #error You may only define USEPOSIXTHREADS or USEUITHREADS, but not both    35 #if defined(_AIX) || defined(__APPLE__) || defined(_CRAY) || defined(__hpux) || defined(__irix) || defined(__linux) || defined(__osf__) ||  defined(__PARAGON__)    36 #if !defined(USEUITHREADS) && !defined(USEPOSIXTHREADS)    37 #define USEPOSIXTHREADS    43 #if !defined(USEPOSIXTHREADS) && !defined(USEUITHREADS)    51 #define CPU_SMTDEPTH_UNKNOWN           0      52 #define CPU_UNKNOWN           0x00000001      55 #define CPU_HT                0x00000010      56 #define CPU_HYPERVISOR        0x00000020      57 #define CPU_SSE2              0x00000100      58 #define CPU_SSE4_1            0x00000200      59 #define CPU_F16C              0x00000400      60 #define CPU_FMA               0x00000800      61 #define CPU_AVX               0x00001000      62 #define CPU_AVX2              0x00002000      63 #define CPU_AVX512F           0x00010000      64 #define CPU_AVX512CD          0x00020000      65 #define CPU_AVX512ER          0x00040000      66 #define CPU_AVX512PF          0x00080000      67 #define CPU_KNL         (CPU_AVX512F | CPU_AVX512CD | \    68                          CPU_AVX512ER | CPU_AVX512PF)       71 #define CPU_ARM64_CPUID       0x00000010      72 #define CPU_ARM64_CRC32       0x00000020      73 #define CPU_ARM64_FP          0x00000080      74 #define CPU_ARM64_HPFP        0x00000080      75 #define CPU_ARM64_AES         0x00000100      76 #define CPU_ARM64_ATOMICS     0x00000200      77 #define CPU_ARM64_ASIMD       0x00000400      78 #define CPU_ARM64_ASIMDDP     0x00000800      79 #define CPU_ARM64_ASIMDHP     0x00001000      80 #define CPU_ARM64_ASIMDRDM    0x00002000      81 #define CPU_ARM64_ASIMDFHM    0x00004000      82 #define CPU_ARM64_SVE         0x00008000      83 #define CPU_ARM64_SHA512      0x00010000      84 #define CPU_ARM64_SHA1        0x00020000      85 #define CPU_ARM64_SHA2        0x00040000      86 #define CPU_ARM64_SHA3        0x00080000      88 typedef struct rt_cpu_caps_struct {    95 #ifdef USEPOSIXTHREADS   103 typedef struct rwlock_struct {
   104   pthread_mutex_t lock;          
   106   pthread_cond_t  rdrs_ok;       
   107   unsigned int waiting_writers;  
   108   pthread_cond_t  wrtr_ok;       
   128 #if 0 && (NTDDI_VERSION >= NTDDI_WS08 || _WIN32_WINNT > 0x0600)   130 #define RTUSEWIN2008CONDVARS 1   140 #define RTUSEINTERLOCKEDATOMICOPS 1   142 #define RT_COND_SIGNAL    0   143 #define RT_COND_BROADCAST 1   147   CRITICAL_SECTION waiters_lock; 
   152 typedef struct rwlock_struct {
   156   unsigned int waiting_writers;  
   171 #if defined(USENETBSDATOMICS)    172 #include <sys/atomic.h>   173 #elif defined(USESOLARISATOMICS)   181 #if defined(USENETBSDATOMICS)   183 #elif defined(USESOLARISATOMICS)   185 #elif defined(USEWIN32ATOMICS)   215   void * (*fctn)(
void *); 
   217   void * (*rslt)(
void *); 
   376                                 void **rsltparms))(
void *);
   396 #define RT_TILESTACK_EMPTY -1   436 #define RT_SCHED_DONE     -1      437 #define RT_SCHED_CONTINUE  0      440 typedef struct rt_shared_iterator_struct {   477 #define RT_THREADPOOL_DEVLIST_CPUSONLY NULL   480 #define RT_THREADPOOL_DEVID_CPU -1   514                          void *fctn(
void *), 
void *parms, 
int blocking);
   593 int rt_threadlaunch(
int numprocs, 
void *clientdata, 
void * fctn(
void *),
 int padding1[8]
avoid false sharing, cache aliasing 
rt_shared_iterator_t * iter
dynamic work scheduler 
rt_mutex_t mtx
Mutex lock for the structure. 
int * devlist
per-worker CPU/GPU device IDs 
iterator used for dynamic load balancing 
int n_clients
Number of threads to wait for at barrier. 
int rt_rwlock_writelock(rt_rwlock_t *)
set writer lock 
int rt_mutex_unlock(rt_mutex_t *)
unlock a mutex 
int * rt_cpu_affinitylist(int *cpuaffinitycount)
query CPU affinity of the calling process (if allowed by host system) 
int rt_tilestack_empty(rt_tilestack_t *)
query if the task tile stack is empty or not 
void * clientdata
worker parameters 
int result
Answer to be returned by barrier_wait. 
rt_mutex_t lock
Mutex lock for the structure. 
int rt_threadpool_worker_devscaletile(void *voiddata, int *tilesize)
worker thread calls this to scale max tile size by worker speed as determined by the SM/core count an...
Routines to generate a pool of threads which then grind through a dynamically load balanced work queu...
struct rt_tasktile_struct rt_tasktile_t
Task tile struct for stack, iterator, and scheduler routines; 'start' is inclusive, 'end' is exclusive. 
int rt_mutex_lock(rt_mutex_t *)
lock a mutex 
int rt_tilestack_push(rt_tilestack_t *, const rt_tasktile_t *)
push a task tile onto the stack 
int rt_cpu_capability_flags(rt_cpu_caps_t *cpucaps)
CPU optional instruction set capability flags. 
int padding2[8]
Pad to avoid false sharing, cache aliasing. 
int rt_threadpool_sched_dynamic(rt_threadpool_t *thrpool, rt_tasktile_t *tile)
Set dynamic scheduler state to half-open interval defined by tile. 
int rt_mutex_init(rt_mutex_t *)
initialize a mutex 
int padding1[8]
Pad to avoid false sharing, cache aliasing. 
Task tile struct for stack, iterator, and scheduler routines; 'start' is inclusive, 'end' is exclusive. 
struct rt_threadpool_workerdata_struct rt_threadpool_workerdata_t
thread-specific handle data for workers 
int rt_threadpool_destroy(rt_threadpool_t *thrpool)
join all worker threads and free resources 
float devspeed
speed scaling for this device 
int rt_shared_iterator_init(rt_shared_iterator_t *it)
initialize a shared iterator 
void * rsltparms
parms to return to barrier wait callers 
int rt_tilestack_init(rt_tilestack_t *s, int size)
initialize task tile stack (to empty) 
int rt_threadlaunch_setfatalerror(void *thrparms)
worker thread calls this to indicate that an unrecoverable error occured 
int rt_cond_broadcast(rt_cond_t *)
signal a condition variable, waking all threads 
struct rt_threadlaunch_struct rt_threadlaunch_t
Routines to generate a pool of threads which then grind through a dynamically load balanced work queu...
int rt_shared_iterator_getfatalerror(rt_shared_iterator_t *it)
master thread calls this to query for fatal errors 
void rt_tilestack_destroy(rt_tilestack_t *)
destroy task tile stack 
int size
current allocated stack size 
rt_threadpool_workerdata_t * workerdata
per-worker data 
int rt_thread_numprocessors(void)
number of processors available, subject to user override 
int rt_thread_setconcurrency(int)
set the concurrency level and scheduling scope for threads 
rt_cond_t wait_cv
Clients wait on condition variable to proceed. 
int rt_cpu_smt_depth(void)
CPU logical processors (SMT depth / aka hyperthreading) 
int padding2[8]
Pad to avoid false sharing, cache aliasing. 
int rt_threadpool_getfatalerror(void *thrparms)
master thread calls this to query for fatal errors 
int padding2[8]
avoid false sharing 
rt_tasktile_t * s
stack of task tiles 
int rt_thread_numphysprocessors(void)
number of physical processors available 
int top
index of top stack element 
int rt_shared_iterator_setfatalerror(rt_shared_iterator_t *it)
worker thread calls this to indicate a fatal error 
int rt_atomic_int_add_and_fetch(rt_atomic_int_t *atomp, int inc)
fetch an atomic int and add inc to it, returning new value 
int rt_threadpool_worker_getdata(void *voiddata, void **clientdata)
worker thread can call this to get its client data pointer 
int end
ending value (exlusive) 
int n_clients
Number of threads to wait for at barrier. 
int rt_thread_barrier_init_proc_shared(rt_barrier_t *, int n_clients)
When rendering in the CAVE we use a special synchronization mode so that shared memory mutexes and co...
int rt_threadpool_tile_failed(void *thrpool, rt_tasktile_t *tile)
worker thread calls this when it fails computing a tile after it has already taken it from the schedu...
int rt_threadpool_wait(rt_threadpool_t *thrpool)
wait for all worker threads to complete their work 
int rt_thread_join(rt_thread_t, void **)
join (wait for completion of, and merge with) a thread 
int rt_atomic_int_set(rt_atomic_int_t *atomp, int val)
set an atomic int variable 
int rt_threadpool_get_workercount(rt_threadpool_t *thrpool)
query number of worker threads in the pool 
rt_run_barrier_t runbar
master/worker run barrier 
int rt_thread_run_barrier_poll(rt_run_barrier_t *barrier)
non-blocking poll to see if peers are already at the barrier 
struct barrier_struct rt_barrier_t
barrier sync object with padding to prevent false sharing 
int rt_rwlock_readlock(rt_rwlock_t *)
set reader lock 
int rt_thread_create(rt_thread_t *, void *fctn(void *), void *)
create a new child thread 
void *(*)(void *) rt_thread_run_barrier(rt_run_barrier_t *barrier, void *fctn(void *), void *parms, void **rsltparms)
sleeping barrier synchronization for thread pool 
rt_tilestack_t * errorstack
stack of tiles that failed 
thread-specific handle data for workers 
int rt_atomic_int_get(rt_atomic_int_t *atomp)
get an atomic int variable 
int rt_shared_iterator_set(rt_shared_iterator_t *it, rt_tasktile_t *tile)
Set shared iterator state to half-open interval defined by tile. 
int rt_tilestack_pop(rt_tilestack_t *, rt_tasktile_t *)
pop a task tile off of the stack 
int rt_threadlaunch_getdata(void *thrparms, void **clientdata)
worker thread can call this to get its client data pointer 
int rt_threadlaunch_getid(void *thrparms, int *threadid, int *threadcount)
worker thread can call this to get its ID and number of peers 
int end
ending task ID (exclusive) 
int rt_shared_iterator_destroy(rt_shared_iterator_t *it)
destroy a shared iterator 
int val
Integer value to be atomically manipulated. 
struct rt_shared_iterator_struct rt_shared_iterator_t
iterator used for dynamic load balancing 
int rt_mutex_spin_lock(rt_mutex_t *)
lock a mutex by spinning only 
int sum
Sum of arguments passed to barrier_wait. 
int fatalerror
cancel processing immediately for all threads 
int threadcount
total number of worker threads 
int rt_mutex_trylock(rt_mutex_t *)
try to lock a mutex 
rt_mutex_t lock
Mutex lock for the structure. 
int start
starting task ID (inclusive) 
void rt_thread_barrier_destroy(rt_barrier_t *barrier)
destroy counting barrier primitive 
int rt_mutex_destroy(rt_mutex_t *)
destroy a mutex 
void * parms
fctn parms for this worker 
stack of work tiles, for error handling 
int rt_rwlock_init(rt_rwlock_t *)
initialize a reader/writer lock 
void rt_thread_run_barrier_destroy(rt_run_barrier_t *barrier)
destroy thread pool barrier 
int n_waiting
Number of currently waiting threads. 
int rt_threadlaunch(int numprocs, void *clientdata, void *fctn(void *), rt_tasktile_t *tile)
launch up to numprocs threads using shared iterator as a load balancer 
atomic int structure with padding to prevent false sharing 
int padding1[8]
Pad to avoid false sharing, cache aliasing. 
rt_shared_iterator_t iter
dynamic work scheduler 
int rt_threadpool_worker_setdevspeed(void *voiddata, float speed)
Worker thread calls this to set relative speed of this device as determined by the SM/core count and ...
int rt_threadpool_worker_getdevid(void *voiddata, int *devid)
worker thread can call this to get its CPU/GPU device ID 
int padding1[8]
Pad to avoid false sharing, cache aliasing. 
int rt_threadpool_next_tile(void *thrpool, int reqsize, rt_tasktile_t *tile)
worker thread calls this to get its next work unit iterate the shared iterator, returns -1 if no iter...
void * thrpool
void ptr to thread pool struct 
int growthrate
stack growth chunk size 
rt_tilestack_t errorstack
stack of tiles that failed 
int rt_tilestack_compact(rt_tilestack_t *)
shrink memory buffers associated with task tile stack if possible 
int rt_tilestack_popall(rt_tilestack_t *)
pop all of the task tiles off of the stack 
int n_waiting
Number of currently waiting threads. 
int rt_threadpool_setfatalerror(void *thrparms)
worker thread calls this to indicate that an unrecoverable error occured 
int padding2[8]
avoid false sharing, cache aliasing 
struct rt_run_barrier_struct rt_run_barrier_t
run-barrier sync object with padding to prevent false sharing 
int rt_cond_wait(rt_cond_t *, rt_mutex_t *)
wait on a condition variable 
barrier sync object with padding to prevent false sharing 
int rt_threadpool_worker_getdevspeed(void *voiddata, float *speed)
Worker thread calls this to get relative speed of this device as determined by the SM/core count and ...
rt_threadpool_t * rt_threadpool_create(int workercount, int *devlist)
create a thread pool with a specified number of worker threads 
struct atomic_int_struct rt_atomic_int_t
atomic int structure with padding to prevent false sharing 
rt_mutex_t lock
Mutex lock for the structure. 
int rt_atomic_int_init(rt_atomic_int_t *atomp, int val)
initialize an atomic int variable 
int start
starting value (inclusive) 
int workercount
number of worker threads 
int rt_threadpool_launch(rt_threadpool_t *thrpool, void *fctn(void *), void *parms, int blocking)
launch threads onto a new function, with associated parms 
int padding2[8]
Pad to avoid false sharing, cache aliasing. 
int rt_cond_destroy(rt_cond_t *)
destroy a condition variable 
rt_shared_iterator_t * iter
dynamic scheduler iterator 
int rt_threadpool_worker_getid(void *voiddata, int *threadid, int *threadcount)
worker thread can call this to get its ID and number of peers 
rt_cond_t wait_cv
Clients wait on condition variable to proceed. 
int phase
Flag to separate waiters from fast workers. 
int rt_atomic_int_fetch_and_add(rt_atomic_int_t *atomp, int inc)
fetch an atomic int and add inc to it, returning original value 
struct rt_cpu_caps_struct rt_cpu_caps_t
void * parms
parms for fctn pointer 
int rt_rwlock_unlock(rt_rwlock_t *)
unlock reader/writer lock 
int rt_cond_init(rt_cond_t *)
initialize a condition variable 
int rt_thread_barrier(rt_barrier_t *barrier, int increment)
synchronize on counting barrier primitive 
int rt_threadlaunch_next_tile(void *voidparms, int reqsize, rt_tasktile_t *tile)
worker thread calls this to get its next work unit iterate the shared iterator, returns -1 if no iter...
rt_thread_t * threads
worker threads 
int threadid
ID of worker thread. 
int rt_thread_run_barrier_init(rt_run_barrier_t *barrier, int n_clients)
initialize thread pool barrier 
int threadid
worker thread's id 
int padding1[8]
avoid false sharing 
run-barrier sync object with padding to prevent false sharing 
int devid
worker CPU/GPU device ID 
struct rt_threadpool_struct rt_threadpool_t
persistent thread pool 
int rt_atomic_int_destroy(rt_atomic_int_t *atomp)
destroy an atomic int variable 
int threadcount
number of workers 
int phase
Flag to separate waiters from fast workers. 
int rt_shared_iterator_next_tile(rt_shared_iterator_t *it, int reqsize, rt_tasktile_t *tile)
iterate the shared iterator with a requested tile size, returns the tile received, and a return code of -1 if no iterations left or a fatal error has occured during processing, canceling all worker threads. 
int rt_thread_set_self_cpuaffinity(int cpu)
set the CPU affinity of the current thread (if allowed by host system) 
rt_barrier_t * rt_thread_barrier_init(int n_clients)
initialize counting barrier primitive 
int rt_cond_signal(rt_cond_t *)
signal a condition variable, waking at least one thread