source: trunk/libs/libalmosmkh/almosmkh.h @ 641

Last change on this file since 641 was 641, checked in by alain, 3 years ago
  • Fix several bugs.
  • Introduce the "stat" command in KSH.

This almos-mkh version sucessfully executed the FFT application
(65536 complex points) on the TSAR architecture from 1 to 64 cores.

File size: 29.6 KB
Line 
1/*
2 * almosmkh.h - User level ALMOS-MKH specific library definition.
3 *
4 * Author     Alain Greiner (2016,2017,2018,2019)
5 *
6 * Copyright (c) UPMC Sorbonne Universites
7 *
8 * This file is part of ALMOS-MKH.
9 *
10 * ALMOS-MKH is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; version 2.0 of the License.
13 *
14 * ALMOS-MKH is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17 * General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with ALMOS-MKH; if not, write to the Free Software Foundation,
21 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24#ifndef _LIBALMOSMKH_H_
25#define _LIBALMOSMKH_H_
26
27/***************************************************************************************
28 * This file defines an user level, ALMOS-MKH specific library, containing:
29 * - non standard system calls.
30 * - debug functions.
31 * - remote malloc extensions.
32 **************************************************************************************/
33
34#include <pthread.h>
35#include <shared_almos.h>
36
37/****************** Non standard (ALMOS_MKH specific) system calls ********************/
38
39
40/***************************************************************************************
41 * This syscall gives the process identified by the <pid> argument the exclusive
42 * ownership of its TXT terminal.
43 ***************************************************************************************
44 * @ pid        : process identifier.
45 * @ returns O if success / returns -1 if process not found.
46 **************************************************************************************/
47int fg( unsigned int pid );
48
49/***************************************************************************************
50 * This syscall stores in the buffer identified by the <owner> argument a non zero
51 * value when the process identified by the <pid> argument is currently the exclusive
52 * owner of its TXT terminal.
53 ***************************************************************************************
54 * @ pid        : [in]  process identifier.
55 * @ owner      : [out] pointer on buffer to store the
56 * @ returns O if success / returns -1 if process not found.
57 **************************************************************************************/
58int is_fg( unsigned int pid,
59           unsigned int * owner );
60
61/***************************************************************************************
62 * This syscall returns the hardware platform parameters.
63 ***************************************************************************************
64 * @ x_size   : [out] number of clusters in a row.
65 * @ y_size   : [out] number of clusters in a column.
66 * @ ncores   : [out] number of cores per cluster.
67 * @ return always 0.
68 **************************************************************************************/
69int get_config( unsigned int * x_size,
70                unsigned int * y_size,
71                unsigned int * ncores );
72
73/***************************************************************************************
74 * This syscall returns the cluster identifier and the local index
75 * for the calling core.
76 ***************************************************************************************
77 * @ cxy      : [out] cluster identifier.
78 * @ lid      : [out] core local index in cluster.
79 * @ return always 0.
80 **************************************************************************************/
81int get_core_id( unsigned int * cxy,
82                 unsigned int * lid );
83
84/***************************************************************************************
85 * This syscall returns the number of cores in a given cluster.
86 ***************************************************************************************
87 * @ cxy      : [in]  target cluster identifier.
88 * @ ncores   : [out] number of cores in target cluster.
89 * @ return always 0.
90 **************************************************************************************/
91int get_nb_cores( unsigned int   cxy,
92                  unsigned int * ncores );
93
94/***************************************************************************************
95 * This syscall uses the DQDT to search, in a macro-cluster specified by the
96 * <cxy_base> and <level> arguments arguments, the core with the lowest load.
97 * it writes in the <cxy> and <lid> buffers the selected core cluster identifier
98 * and the local core index.
99 ***************************************************************************************
100 * @ cxy_base : [in]  any cluster identifier in macro-cluster.in clusters array.
101 * @ level    : [in]  macro-cluster level in [1,2,3,4,5].
102 * @ cxy      : [out] selected core cluster identifier.
103 * @ lid      : [out] selectod core local index.
104 * @ return 0 if success / 1 if no core in macro-cluster / -1 if illegal arguments.
105 **************************************************************************************/
106int get_best_core( unsigned int   cxy_base,
107                   unsigned int   level,
108                   unsigned int * cxy,
109                   unsigned int * lid );
110
111/***************************************************************************************
112 * This function returns the value contained in the calling core cycles counter,
113 * taking into account a possible overflow on 32 bits architectures.
114 ***************************************************************************************
115 * @ cycle    : [out] current cycle value.
116 * @ return always 0.
117 **************************************************************************************/
118int get_cycle( unsigned long long * cycle );
119
120/***************************************************************************************
121 * This syscall allows the calling thread to specify the target cluster for
122 * a subsequent fork(). It must be called for each fork().
123 ***************************************************************************************
124 * @ cxy      : [in] target cluster identifier.
125 * @ return 0 if success / returns -1 if illegal cxy argument.
126 **************************************************************************************/
127int place_fork( unsigned int cxy );
128
129/***************************************************************************************
130 * This syscall implements the operations related to User Thread Local Storage.
131 ***************************************************************************************
132 * @ operation  : UTLS operation type as defined in "shared_sycalls.h" file.
133 * @ value      : argument value for the UTLS_SET operation.
134 * @ return value for the UTLS_GET and UTLS_GET_ERRNO / return -1 if failure.
135 **************************************************************************************/
136int utls( unsigned int operation,
137          unsigned int value );
138
139/***************************************************************************************
140 * This syscall returns an unsigned 32 bits integer from the standard "stdin" stream.
141 * Both decimal numbers and hexadecimal numbers (prefixed by 0x) are supported.
142 ***************************************************************************************
143 * returns the integer value if success / returns -1 if failure.
144 **************************************************************************************/
145unsigned int get_uint32( void );
146
147
148/***************** Non standard (ALMOS-MKH specific) debug functions ******************/
149
150/***************************************************************************************
151 * This syscall copies in the user structure defined by the <info> argument the values
152 * registered in the calling thread "thread_info_t" kernel structure.
153 ******************************************************************************************
154 * @ info    : [out] pointer on thread_info_t structure in user space.
155 * @ return 0 if success / return -1 if illegal argument.
156 *****************************************************************************************/
157int get_thread_info( thread_info_t * info );
158
159/***************************************************************************************
160 * This debug syscall displays on the kernel terminal TXT0
161 * the thread / process / core identifiers, the current cycle, plus a user defined
162 * message as specified by the <string> argument.
163 ***************************************************************************************
164 * @ string    : [in] user defined message.
165 **************************************************************************************/
166void display_string( char * string );
167
168/***************************************************************************************
169 * This debug function displays on the kernel terminal TXT0
170 * the state of the  VMM for the process <pid> in cluster <cxy>.
171 * It can be called by any thread running in any cluster.
172 ***************************************************************************************
173 * @ cxy      : [in] target cluster identifier.
174 * @ pid      : [in] process identifier.
175 * @ mapping  : [in] detailed mapping if non zero.
176 * @ return 0 if success / return -1 if illegal argument.
177 **************************************************************************************/
178int display_vmm( unsigned int cxy,
179                 unsigned int pid,
180                 unsigned int mapping );
181
182/***************************************************************************************
183 * This debug syscall displays on the kernel terminal TXT0
184 * the state of the core scheduler identified by the <cxy> and <lid> arguments.
185 * It can be called by any thread running in any cluster.
186 ***************************************************************************************
187 * @ cxy      : [in] target cluster identifier.
188 * @ lid      : [in] target core local index.
189 * @ return 0 if success / return -1 if illegal arguments.
190 **************************************************************************************/
191int display_sched( unsigned int  cxy,
192                   unsigned int  lid );
193
194/***************************************************************************************
195 * This debug syscall displays on the kernel terminal TXT0
196 * the list of process registered in a given cluster identified by the <cxy> argument.
197 * Only the owned processes are displayed when the <owned> argument is non zero.
198 * It can be called by any thread running in any cluster.
199 ***************************************************************************************
200 * @ cxy      : [in] target cluster identifier.
201 * @ owned    : [in] only owned processes if non zero.
202 * @ return 0 if success / return -1 if illegal argument.
203 **************************************************************************************/
204int display_cluster_processes( unsigned int  cxy,
205                               unsigned int  owned );
206
207/***************************************************************************************
208 * This debug syscall displays on the kernel terminal TXT0
209 * the list of processes attached to a given TXT channel.
210 * It can be called by any thread running in any cluster.
211 ***************************************************************************************
212 * @ txt_id   : [in] TXT terminal indes.
213 * @ return 0 if success / return -1 if illegal argument.
214 **************************************************************************************/
215int display_txt_processes( unsigned int txt_id );
216
217/***************************************************************************************
218 * This debug syscall displays on the kernel terminal TXT0
219 * the set of busylocks hold by a thread identified by the <pid> and <trdid> arguments.
220 * It can be called by any thread running in any cluster.
221 ***************************************************************************************
222 * @ pid      : [in] process identifier.
223 * @ trdid    : [in] thread identifier.
224 * @ return 0 if success / return -1 if illegal arguments.
225 **************************************************************************************/
226int display_busylocks( unsigned int pid,
227                       unsigned int trdid );
228
229/***************************************************************************************
230 * This debug syscall displays on the kernel terminal TXT0
231 * the list of channel devices available in the architecture.
232 * It can be called by any thread running in any cluster.
233 ***************************************************************************************
234 * @ return always 0.
235 **************************************************************************************/
236int display_chdev( void );
237
238/***************************************************************************************
239 * This debug syscall displays on the kernel terminal TXT0
240 * the list of channel device or pseudo-files registered in the VFS cache.
241 * It can be called by any thread running in any cluster.
242 ***************************************************************************************
243 * @ return always 0.
244 **************************************************************************************/
245int display_vfs( void );
246
247/***************************************************************************************
248 * This debug syscall displays on the kernel terminal TXT0 the current DQDT state.
249 * It can be called by any thread running in any cluster.
250 ***************************************************************************************
251 * @ return always 0.
252 **************************************************************************************/
253int display_dqdt( void );
254
255/***************************************************************************************
256 * This debug syscall displays on the kernel terminal TXT0 the content of a given
257 * page of a given VFS mapper.
258 * It can be called by any thread running in any cluster.
259 ***************************************************************************************
260 * @ path      : pathname identifying the file/directory in VFS.
261 * @ page_id   : page index in file.
262 * @ nbytes    : number of bytes to display.
263 * @ return 0 if success / return -1 if file or page not found.
264 **************************************************************************************/
265int display_mapper( char        * path,
266                    unsigned int  page_id,
267                    unsigned int  nbytes);
268
269/***************************************************************************************
270 * This debug syscall displays on the kernel terminal TXT0
271 * the state of the barrier used by the process identified by the <pid> argument.
272 * It can be called by any thread running in any cluster.
273 ***************************************************************************************
274 * @ pid      : [in] process identifier.
275 * @ return 0 if success / return -1 if illegal arguments.
276 **************************************************************************************/
277int display_barrier( unsigned int pid );
278
279/***************************************************************************************
280 * This debug syscall displays on the kernel terminal TXT0 the content of one given
281 * page of the FAT mapper.
282 * It can be called by any thread running in any cluster.
283 ***************************************************************************************
284 * @ page_id    : page index in file.
285 * @ nb_entries : number of bytes to display.
286 * @ return 0 if success / return -1 if page not found.
287 **************************************************************************************/
288int display_fat( unsigned int page_id,
289                 unsigned int nb_entries );
290
291/*****************************************************************************************
292* This debug syscall is used to activate / desactivate the context switches trace
293* for a core identified by the <cxy> and <lid> arguments.
294* It can be called by any thread running in any cluster.
295*****************************************************************************************
296* @ active     : activate trace if non zero / desactivate if zero.
297* @ cxy        : cluster identifier.
298* @ lid        : core local index.
299* @ returns O if success / returns -1 if illegal arguments.
300****************************************************************************************/
301int trace( unsigned int active,
302           unsigned int cxy, 
303           unsigned int lid );
304
305/****************************************************************************************
306 * This syscall implements an user-level interactive debugger that can be
307 * introduced in any user application to display various kernel distributed structures.
308 ***************************************************************************************/
309void idbg( void );
310
311
312/****************** Non standard (ALMOS-MKH specific) malloc operations  ***************/
313
314/////////////////////////////////////////////////////////////////////////////////////////
315// General principles:
316// - In user space the HEAP zone spread between the ELF zone and the STACK zone,
317//   as defined in the kernel_config.h file.
318// - The malloc library uses the mmap() syscall to create - on demand -
319//   one vseg in a given cluster. The size of this vseg is defined below
320//   by the MALLOC_LOCAL_STORE_SIZE parameter.
321// - For a standard malloc(), the target cluster is the cluster containing
322//   the core running the client thread.
323// - For a remote_malloc(), the target cluster is explicitely defined
324//   by the argument.
325// - In each cluster, the available storage in virtual space is handled by a
326//   local allocator using the buddy algorithm.
327//
328// TODO : In this first implementation one single - fixed size - vseg
329//        is allocated on demand in each cluster.
330//        We should introduce the possibility to dynamically allocate
331//        several vsegs in each cluster, using several mmap when required.
332/////////////////////////////////////////////////////////////////////////////////////////
333// Free blocks organisation in each cluster :
334// - All free blocks have a size that is a power of 2, larger or equal
335//   to MALLOC_MIN_BLOCK_SIZE (typically 64 bytes).
336// - All free blocks are aligned.
337// - They are pre-classed in an array of linked lists, where all blocks in a
338//   given list have the same size.
339// - The NEXT pointer implementing those linked lists is written
340//   in the first bytes of the block itself, using the unsigned int type.
341// - The pointers on the first free block for each size are stored in an
342//   array of pointers free[32] in the storage(x,y) descriptor.
343/////////////////////////////////////////////////////////////////////////////////////////
344// Allocation policy:
345// - The block size required by the user can be any value, but the allocated
346//   block size can be larger than the requested size:
347// - The allocator computes actual_size, that is the smallest power of 2
348//   value larger or equal to the requested size AND larger or equal to
349//   MALLOC_MIN_BLOCK_SIZE.
350// - It pop the linked list of free blocks corresponding to actual_size,
351//   and returns the block B if the list[actual_size] is not empty.
352// - If the list[actual_size] is empty, it pop the list[actual_size * 2].
353//   If a block B' is found, it breaks this block in 2 B/2 blocks, returns
354//   the first B/2 block and push the other B/2 block into list[actual_size].
355// - If the list[actual_size * 2] is empty, it pop the list[actual_size * 4].
356//   If a block B is found, it break this block in 3 blocks B/4, B/4 and B/2,
357//   returns the first B/4 block, push the other blocks B/4 and B/2 into
358//   the proper lists. etc...
359// - If no block satisfying the request is available it returns a failure
360//   (NULL pointer).
361// - This allocation policy has the nice following property:
362//   If the vseg is aligned (the vseg base is a multiple of the
363//   vseg size), all allocated blocks are aligned on the actual_size.
364/////////////////////////////////////////////////////////////////////////////////////////
365// Free policy:
366// - Each allocated block is registered in an alloc[] array of unsigned char.
367// - This registration is required by the free() operation, because the size
368//   of the allocated block must be obtained from the base address of the block. 
369// - The number of entries in this array is equal to the max number
370//   of allocated block : MALLOC_LOCAL_STORE_SIZE / MALLOC_MIN_BLOCK_SIZE.
371// - For each allocated block, the value registered in the alloc[] array
372//   is log2( size_of_allocated_block ).
373// - The index in this array is computed from the allocated block base address:
374//      index = (block_base - vseg_base) / MALLOC_MIN_BLOCK_SIZE
375// - The alloc[] array is stored at the end of heap segment. This consume
376//   (1 / MALLOC_MIN_BLOCK_SIZE) of the total storage capacity.
377/////////////////////////////////////////////////////////////////////////////////////////
378
379
380#define MALLOC_INITIALIZED         0xBABEF00D   // magic number when initialised
381#define MALLOC_MIN_BLOCK_SIZE      0x40         // 64 bytes
382#define MALLOC_LOCAL_STORE_SIZE    0x800000     // 8 Mbytes     
383#define MALLOC_MAX_CLUSTERS        0x100        // 256 clusters
384
385/////////////////////////////////////////////////////////////////////////////////////////
386//               store(x,y) descriptor (one per cluster)
387/////////////////////////////////////////////////////////////////////////////////////////
388
389typedef struct malloc_store_s
390{
391    pthread_mutex_t mutex;           // lock protecting exclusive access to local heap
392    unsigned int    initialized;     // initialised when value == MALLOC_INITIALIZED
393    unsigned int    cxy;             // cluster identifier 
394    unsigned int    store_base;      // store base address
395    unsigned int    store_size;      // store size (bytes)
396    unsigned int    alloc_base;      // alloc[] array base address
397    unsigned int    alloc_size;      // alloc[] array size (bytes)
398    unsigned int    free[32];        // array of addresses of first free block
399} 
400malloc_store_t;
401
402/*****************************************************************************************
403 * This function allocates <size> bytes of memory in user space, and returns a pointer
404 * to the allocated buffer. The pysical memory is allocated from store located in
405 * cluster identified by the <cxy> argument.
406 *****************************************************************************************
407 * @ size    : number of requested bytes.
408 * @ cxy     : target cluster identifier.
409 * @ returns a pointer on the allocated buffer if success / returns NULL if failure
410 ****************************************************************************************/
411void * remote_malloc( unsigned int size, 
412                      unsigned int cxy );
413
414/*****************************************************************************************
415 * This function releases the memory buffer identified by the <ptr> argument,
416 * to the store identified by the <cxy> argument.
417 * It displays an error message, but does nothing if the ptr is illegal.
418 *****************************************************************************************
419 * @ ptr   : pointer on the released buffer.
420 * @ cxy   : target cluster identifier.
421 ****************************************************************************************/
422void remote_free( void        * ptr,
423                  unsigned int  cxy );
424
425/*****************************************************************************************
426 * This function releases the memory buffer identified by the <ptr> argument,
427 * to the store located in cluster identified by the <cxy> argument, and allocates
428 * a new buffer containing <size> bytes from this store.
429 * The content of the old buffer is copied to the new buffer, up to <size> bytes.
430 * It displays an error message, but does nothing if the ptr is illegal.
431 *****************************************************************************************
432 * @ ptr     : pointer on the released buffer.
433 * @ size    : new buffer requested size (bytes).
434 * @ cxy     : target cluster identifier.
435 * @ return a pointer on allocated buffer if success / return NULL if failure
436 ****************************************************************************************/
437void * remote_realloc( void        * ptr,
438                       unsigned int  size,
439                       unsigned int  cxy );
440
441/*****************************************************************************************
442 * This function allocates enough space for <count> objects that are <size> bytes
443 * of memory each from the store located in cluster identied by the <cxy> argument.
444 * The allocated memory is filled with bytes of value zero.
445 *****************************************************************************************
446 * @ count   : number of requested objects.
447 * @ size    : number of bytes per object.
448 * @ cxy     : target cluster identifier.
449 * @ returns a pointer on allocated buffer if success / returns NULL if failure
450 ****************************************************************************************/
451void * remote_calloc( unsigned int count,
452                      unsigned int size,
453                      unsigned int cxy );
454
455/********* Non standard (ALMOS-MKH specific) pthread_parallel_create() syscall  *********/
456
457//////////////////////////////////////////////////////////////////////////////////////////
458// This system call can be used to parallelize the creation and the termination
459// of a parallel multi-threaded application. It removes the loop in the main thread that
460// creates the N working threads (N  sequencial pthread_create() ). It also removes the
461// loop that waits completion of these N working threads (N sequencial pthread_join() ).
462// It creates one "work" thread (in detached mode) per core in the target architecture.
463// Each "work" thread is identified by the [cxy][lid] indexes (cluster / local core).
464// The pthread_parallel_create() function returns only when all "work" threads completed
465// (successfully or not).
466//
467// To use this system call, the application code must define the following structures:
468// - To define the arguments to pass to the <work> function the application must allocate
469//   and initialize a first 2D array, indexed by [cxy] and [lid] indexes, where each slot
470//   contains an application specific structure, and another 2D array, indexed by the same
471//   indexes, containing pointers on these structures. This array of pointers is one
472//   argument of the pthread_parallel_create() function.
473// - To detect the completion of the <work> threads, the application must allocate a 1D
474//   array, indexed by the cluster index [cxy], where each slot contains a pthread_barrier
475//   descriptor. This barrier is initialised by the pthread_parallel_create() function,
476//   in all cluster containing at least one work thread. This array of barriers is another
477//   argument of the pthread_parallel_create() function.
478//
479// Implementation note:
480// To parallelize the "work" threads creation and termination, the pthread_parallel_create()
481// function creates a distributed quad-tree (DQT) of "build" threads covering all cores
482// required to execute the parallel application.
483// Depending on the hardware topology, this DQT can be truncated, (i.e. some
484// parent nodes can have less than 4 chidren), if (x_size != y_size), or if one size
485// is not a power of 2. Each "build" thread is identified by two indexes [cxy][level].
486// Each "build" thread makes the following tasks:
487// 1) It calls the pthread_create() function to create up to 4 children threads, that
488//    are are "work" threads when (level == 0), or "build" threads, when (level > 0).
489// 2) It initializes the barrier (global variable), used to block/unblock
490//    the parent thread until children completion.
491// 3) It calls the pthread_barrier_wait( self ) to wait until all children threads
492//    completed (successfully or not).
493// 4) It calls the pthread_barrier_wait( parent ) to unblock the parent thread.
494//////////////////////////////////////////////////////////////////////////////////////////
495
496/*****************************************************************************************
497 * This blocking function creates N working threads that execute the code defined
498 * by the <work_func> and <work_args> arguments.
499 * The number N of created threads is entirely defined by the <root_level> argument.
500 * This value defines an abstract quad-tree, with a square base : level in [0,1,2,3,4],
501 * side in [1,2,4,8,16], nclusters in [1,4,16,64,256]. This base is called  macro_cluster.
502 * A working thread is created on all cores contained in the specified macro-cluster.
503 * The actual number of physical clusters containing cores can be smaller than the number
504 * of clusters covered by the quad tree. The actual number of cores in a cluster can be
505 * less than the max value.
506 *
507 * In the current implementation, all threads execute the same <work_func> function,
508 * on different arguments, that are specified as a 2D array of pointers <work_args>.
509 * This can be modified in a future version, where the <work_func> argument can become
510 * a 2D array of pointers, to have one specific function for each thread.
511 *****************************************************************************************
512 * @ root_level            : [in]  DQT root level in [0,1,2,3,4].
513 * @ work_func             : [in]  pointer on start function.
514 * @ work_args_array       : [in]  pointer on a 2D array of pointers.
515 * @ parent_barriers_array : [in]  pointer on a 1D array of barriers.
516 * @ return 0 if success / return -1 if failure.
517 ****************************************************************************************/
518int pthread_parallel_create( unsigned int   root_level,
519                             void         * work_func,
520                             void         * work_args_array,
521                             void         * parent_barriers_array );
522
523#endif /* _LIBALMOSMKH_H_ */
524
Note: See TracBrowser for help on using the repository browser.