source: trunk/kernel/mm/vmm.h @ 635

Last change on this file since 635 was 635, checked in by alain, 15 months ago

This version is a major evolution: The physical memory allocators,
defined in the kmem.c, ppm.c, and kcm.c files have been modified
to support remote accesses. The RPCs that were previously user
to allocate physical memory in a remote cluster have been removed.
This has been done to cure a dead-lock in case of concurrent page-faults.

This version 2.2 has been tested on a (4 clusters / 2 cores per cluster)
TSAR architecture, for both the "sort" and the "fft" applications.

File size: 27.8 KB
Line 
1/*
2 * vmm.h - virtual memory management related operations
3 *
4 * Authors   Ghassan Almaless (2008,2009,2010,2011, 2012)
5 *           Mohamed Lamine Karaoui (2015)
6 *           Alain Greiner (2016,2017,2018,2019)
7 *
8 * Copyright (c) UPMC Sorbonne Universites
9 *
10 * This file is part of ALMOS-MKH.
11 *
12 * ALMOS-MKH is free software; you can redistribute it and/or modify it
13 * under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; version 2.0 of the License.
15 *
16 * ALMOS-MKH is distributed in the hope that it will be useful, but
17 * WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 * General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with ALMOS-MKH; if not, write to the Free Software Foundation,
23 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24 */
25
26#ifndef _VMM_H_
27#define _VMM_H_
28
29#include <hal_kernel_types.h>
30#include <bits.h>
31#include <list.h>
32#include <queuelock.h>
33#include <hal_gpt.h>
34#include <vseg.h>
35#include <page.h>
36
37/****  Forward declarations  ****/
38
39struct process_s;
40struct vseg_s;
41
42/*********************************************************************************************
43 * This structure defines the STACK allocator used by the VMM to dynamically handle
44 * vseg allocation or release requests for an user thread.
45 * This allocator handles a fixed size array of fixed size slots in STACK zone of user space.
46 * The stack size and the number of slots are defined by the CONFIG_VMM_STACK_SIZE, and
47 * CONFIG_VMM_STACK_BASE parameters.
48 * Each slot can contain one user stack vseg. The first 4 Kbytes page in the slot is not
49 * mapped to detect stack overflow.
50 * In this implementation, the slot index is defined by the user thead LTID.
51 * All allocated stacks are registered in a bitmap defining the STACK zone state:
52 * - The allocator checks that the requested slot has not been already allocated, and set the
53 *   corresponding bit in the bitmap.
54 * - The de-allocator reset the corresponding bit in the bitmap.
55 ********************************************************************************************/
56
57typedef struct stack_mgr_s
58{
59    busylock_t     lock;               /*! lock protecting STACK allocator                  */
60    vpn_t          vpn_base;           /*! first page of STACK zone                         */
61    bitmap_t       bitmap;             /*! bit vector of allocated stacks                   */
62}
63stack_mgr_t;
64
65/*********************************************************************************************
66 * This structure defines the MMAP allocator used by the VMM to dynamically handle 
67 * MMAP vsegs requested or released by an user process.
68 * This allocator should be only used in the reference cluster.
69 * - allocation policy : all allocated vsegs occupy an integer number of pages that is
70 *   power of 2, and are aligned on a page boundary. The requested number of pages is
71 *   rounded if required. The first_free_vpn variable defines completely the MMAP zone state.
72 *   It is never decremented, as the released vsegs are simply registered in a zombi_list.
73 *   The relevant zombi_list is checked first for each allocation request.
74 * - release policy : a released MMAP vseg is registered in an array of zombi_lists.
75 *   This array is indexed by ln(number of pages), and each entry contains the root of
76 *   a local list of zombi vsegs that have the same size. The physical memory allocated
77 *   for a zombi vseg descriptor is not released, to use the "list" field.
78 *   This physical memory allocated for MMAP vseg descriptors is actually released
79 *   when the VMM is destroyed.
80 ********************************************************************************************/
81
82typedef struct mmap_mgr_s
83{
84    busylock_t     lock;               /*! lock protecting MMAP allocator                   */
85    vpn_t          vpn_base;           /*! first page of MMAP zone                          */
86    vpn_t          vpn_size;           /*! number of pages in MMAP zone                     */
87    vpn_t          first_free_vpn;     /*! first free page in MMAP zone                     */
88    xlist_entry_t  zombi_list[32];     /*! array of roots of released vsegs lists           */
89}
90mmap_mgr_t;
91
92/*********************************************************************************************
93 * This structure defines the Virtual Memory Manager for a given process in a given cluster.
94 * This local VMM implements four main services:
95 * 1) It contains the local copy of vseg list (VSL), only complete in referrence.
96 * 2) It contains the local copy of the generic page table (GPT), only complete in reference.
97 * 3) The stack manager dynamically allocates virtual memory space for the STACK vsegs.
98 * 4) The mmap manager dynamically allocates virtual memory for the (FILE/ANON/REMOTE) vsegs.
99 ******************************************************a**************************************
100 * Implementation notes:
101 * 1. In most clusters, the VSL and GPT are only partial copies of the reference VSL and GPT
102 *    structures, stored in the reference cluster.
103 * 2. The VSL contains only local vsegs, but it is implemented as an xlist, and protected by
104 *    a remote_rwlock, because it can be accessed by a thread running in a remote cluster.
105 *    An exemple is the vmm_fork_copy() function.
106 * 3. The GPT in the reference cluster can be directly accessed by remote threads to handle
107 *    false page-fault (page is mapped in the reference GPT, but the PTE copy is missing
108 *    in the local GPT). As each PTE can be protected by a specific GPT_LOCKED attribute
109 *    for exclusive access, it is NOT protected by a global lock.
110 ********************************************************************************************/
111
112typedef struct vmm_s
113{
114        remote_rwlock_t  vsl_lock;            /*! lock protecting the local VSL                 */
115        xlist_entry_t    vsegs_root;          /*! Virtual Segment List (complete in reference)  */
116        uint32_t         vsegs_nr;            /*! total number of local vsegs                   */
117
118    gpt_t            gpt;                 /*! Generic Page Table (complete in reference)    */
119
120    stack_mgr_t      stack_mgr;           /*! embedded STACK vsegs allocator                */
121    mmap_mgr_t       mmap_mgr;            /*! embedded MMAP vsegs allocator                 */
122
123        uint32_t         false_pgfault_nr;    /*! false page fault counter (for all threads)    */
124        uint32_t         local_pgfault_nr;    /*! false page fault counter (for all threads)    */
125        uint32_t         global_pgfault_nr;   /*! false page fault counter (for all threads)    */
126    uint32_t         false_pgfault_cost;  /*! cumulated cost (for all threads)              */
127    uint32_t         local_pgfault_cost;  /*! cumulated cost (for all threads)              */
128    uint32_t         global_pgfault_cost; /*! cumulated cost (for all threads)              */
129
130    vpn_t            args_vpn_base;       /*! args vseg first page                          */
131    vpn_t            envs_vpn_base;       /*! envs vseg first page                          */
132        vpn_t            code_vpn_base;       /*! code vseg first page                          */
133        vpn_t            data_vpn_base;       /*! data vseg first page                          */
134    vpn_t            heap_vpn_base;       /*! heap zone first page                          */
135
136        intptr_t         entry_point;         /*! main thread entry point                       */
137}
138vmm_t;
139
140/*********************************************************************************************
141 * This function makes only a partial initialisation of the VMM attached to an user
142 * process: It intializes the STACK and MMAP allocators, and the VSL lock.
143 * - The GPT has been previously created, with the hal_gpt_create() function.
144 * - The "kernel" vsegs are previously registered, by the hal_vmm_kernel_update() function.
145 * - The "code" and "data" vsegs are registered by the elf_load_process() function.
146 * - The "stack" vsegs are dynamically registered by the thread_user_create() function.
147 * - The "file", "anon", "remote" vsegs are dynamically registered by the mmap() syscall.
148 *********************************************************************************************
149 * @ process   : pointer on process descriptor
150 * @ return 0 if success / return -1 if failure.
151 ********************************************************************************************/
152error_t vmm_user_init( struct process_s * process );
153
154/*********************************************************************************************
155 * This function re-initialises the VMM attached to an user process to prepare a new
156 * call to the vmm_user_init() function after an exec() syscall.
157 * It removes from the VMM of the process identified by the <process> argument all
158 * non kernel vsegs (i.e. all user vsegs), by calling the vmm_remove_vseg() function.
159 * - the vsegs are removed from the VSL.
160 * - the corresponding GPT entries are removed from the GPT.
161 * - the physical pages are released to the relevant kmem when they are not shared.
162 * The VSL and the GPT are not modified for the kernel vsegs.
163 *********************************************************************************************
164 * @ process   : pointer on process descriptor.
165 ********************************************************************************************/
166void vmm_user_reset( struct process_s * process );
167
168/*********************************************************************************************
169 * This function is called by the process_make_fork() function. It partially copies
170 * the content of a remote parent process VMM to the local child process VMM:
171 * - The KERNEL vsegs required by the architecture must have been previously
172 *   created in the child VMM, using the hal_vmm_kernel_update() function.
173 * - The DATA, ANON, REMOTE vsegs registered in the parent VSL are registered in the
174 *   child VSL. All valid PTEs in parent GPT are copied to the child GPT.
175 *   The WRITABLE  and COW flags are not modified, as it will be done later for those
176 *   shared pages by the vmm_set_cow() function.
177 * - The CODE vsegs registered in the parent VSL are registered in the child VSL, but the
178 *   GPT entries are not copied in the child GPT, and will be dynamically updated from
179 *   the .elf file when a page fault is reported.
180 * - The FILE vsegs registered in the parent VSL are registered in the child VSL, and all
181 *   valid GPT entries in parent GPT are copied to the child GPT. The COW flag is not set.
182 * - No STACK vseg is copied from  parent VMM to child VMM: the child stack vseg is copied
183 *   later from the cluster containing the user thread requesting the fork().
184 *********************************************************************************************
185 * @ child_process     : local pointer on local child process descriptor.
186 * @ parent_process_xp : extended pointer on remote parent process descriptor.
187 * @ return 0 if success / return -1 if failure.
188 ********************************************************************************************/
189error_t vmm_fork_copy( struct process_s * child_process,
190                       xptr_t             parent_process_xp );
191
192/*********************************************************************************************
193 * This function is called by the process_make_fork() function to update the COW attribute
194 * in the parent parent process vsegs. It set the COW flag, and reset the WRITABLE flag of
195 * all GPT entries of the DATA, MMAP, and REMOTE vsegs of the <process> argument.
196 * It must be called by a thread running in the reference cluster, that contains the complete
197 * VSL and GPT (use the rpc_vmm_set_cow_client() when the calling thread client is remote).
198 * It updates all copies of the process in all clusters, to maintain coherence in GPT copies,
199 * using the list of copies stored in the owner process, and using remote_write accesses to
200 * update the remote GPTs. It atomically increment the pending_fork counter, in all involved
201 * physical page descriptors. It cannot fail, as only mapped entries in GPTs are updated.
202 *********************************************************************************************
203 * @ process   : local pointer on local reference process descriptor.
204 ********************************************************************************************/
205void vmm_set_cow( struct process_s * process );
206
207/*********************************************************************************************
208 * This function modifies one GPT entry identified by the <process> and <vpn> arguments
209 * in all clusters containing a process copy. It is used to maintain coherence in GPT
210 * copies, using remote_write accesses.
211 * It must be called by a thread running in the process owner cluster.
212 * Use the RPC_VMM_GLOBAL_UPDATE_PTE if required.
213 * It cannot fail, as only mapped PTE2 in GPT copies are updated.
214 *********************************************************************************************
215 * @ process   : local pointer on local process descriptor.
216 * @ vpn       : PTE index.
217 * @ attr      : PTE / attributes.
218 * @ ppn       : PTE / physical page index.
219 ********************************************************************************************/
220void vmm_global_update_pte( struct process_s * process,
221                            vpn_t              vpn,
222                            uint32_t           attr,
223                            ppn_t              ppn );
224
225/*********************************************************************************************
226 * This function deletes, in the local cluster, all vsegs registered in the VSL
227 * of the process identified by the <process> argument. For each vseg:
228 * - it unmaps all vseg PTEs from the GPT (release the physical pages when required).
229 * - it removes the vseg from the local VSL.
230 * - it releases the memory allocated to the local vseg descriptors.
231 * - it releases the memory allocated to the GPT itself.
232 *********************************************************************************************
233 * @ process   : pointer on process descriptor.
234 ********************************************************************************************/
235void vmm_destroy( struct process_s * process );
236
237/*********************************************************************************************
238 * This function scans the list of vsegs registered in the VMM of a given process descriptor
239 * to check if a given virtual region (defined by a base and size) overlap an existing vseg.
240 *********************************************************************************************
241 * @ process  : pointer on process descriptor.
242 * @ base     : region virtual base address.
243 * @ size     : region size (bytes).
244 * @ returns NULL if no conflict / return conflicting vseg pointer if conflict.
245 ********************************************************************************************/
246vseg_t * vmm_check_conflict( struct process_s * process,
247                             vpn_t              base,
248                             vpn_t              size );
249
250/*********************************************************************************************
251 * This function allocates memory for a vseg descriptor, initialises it, and register it
252 * in the VSL of the local process descriptor, that must be the reference process.
253 * - For the FILE, ANON, & REMOTE types, it does not use the <base> and <size> arguments,
254 *   but uses the specific MMAP virtual memory allocator.
255 * - For the STACK type, it does not use the <base> and <size> arguments,  but uses the
256 *   and the <base> argument the specific STACK virtual memory allocator.
257 * It checks collision with pre-existing vsegs.
258 * To comply with the "on-demand" paging policy, this function does NOT modify the GPT,
259 * and does not allocate physical memory for vseg data.
260 * It should be called by a local thread (could be a RPC thread if the client thread is not
261 * running in the reference cluster).
262 *********************************************************************************************
263 * @ process     : pointer on local processor descriptor.
264 * @ type        : vseg type.
265 * @ base        : vseg base address (or user thread ltid for an user stack vseg).
266 * @ size        : vseg size (bytes).
267 * @ file_offset : offset in file for CODE, DATA, FILE types.
268 * @ file_size   : can be smaller than "size" for DATA type.
269 * @ mapper_xp   : extended pointer on mapper for CODE, DATA, FILE types.
270 * @ cxy         : physical mapping cluster (for non distributed vsegs).
271 * @ returns pointer on vseg if success / returns NULL if no memory, or conflict.
272 ********************************************************************************************/
273vseg_t * vmm_create_vseg( struct process_s * process,
274                          vseg_type_t        type,
275                          intptr_t           base,
276                              uint32_t           size,
277                          uint32_t           file_offset,
278                          uint32_t           file_size,
279                          xptr_t             mapper_xp,
280                          cxy_t              cxy );
281
282/*********************************************************************************************
283 * This function removes from the VMM of a process descriptor identified by the <process>
284 * argument the vseg identified by the <vseg> argument. It can be used for any type of vseg.
285 * As it uses local pointers, it must be called by a local thread.
286 * It is called by the vmm_user_reset(), vmm_delete_vseg() and vmm_destroy() functions.
287 * It makes a kernel panic if the process is not registered in the local cluster,
288 * or if the vseg is not registered in the process VSL.
289 * For all vseg types, the vseg is detached from local VSL, and all associated PTEs are
290 * unmapped from local GPT. Other actions depend on the vseg type:
291 * - Regarding the vseg descriptor release:
292 *   . for ANON and REMOTE, the vseg is not released, but registered in local zombi_list.
293 *   . for STACK the vseg is released to the local stack allocator.
294 *   . for all other types, the vseg is released to the local kmem.
295 * - Regarding the physical pages release:
296 *   . for KERNEL and FILE, the pages are not released to kmem.
297 *   . for CODE and STACK, the pages are released to local kmem when they are not COW.
298 *   . for DATA, ANON and REMOTE, the pages are released to relevant kmem only when
299 *     the local cluster is the reference cluster.
300 * The lock protecting the VSL must be taken by the caller.
301 *********************************************************************************************
302 * @ process  : local pointer on process.
303 * @ vseg     : local pointer on vseg.
304 ********************************************************************************************/
305void vmm_remove_vseg( struct process_s * process,
306                      struct vseg_s    * vseg );
307
308/*********************************************************************************************
309 * This function call the vmm_remove vseg() function to remove from the VMM of a local
310 * process descriptor, identified by the <pid> argument the vseg identified by the <vaddr>
311 * virtual address in user space.
312 * Use the RPC_VMM_DELETE_VSEG to remove a vseg from a remote process descriptor.
313 *********************************************************************************************
314 * @ pid      : process identifier.
315 * @ vaddr    : virtual address in user space.
316 ********************************************************************************************/
317void vmm_delete_vseg( pid_t    pid,
318                      intptr_t vaddr );
319
320/*********************************************************************************************
321 * This function removes a given region (defined by a base address and a size) from
322 * the VMM of a given process descriptor. This can modify the number of vsegs:
323 * (a) if the region is not entirely mapped in an existing vseg, it's an error.
324 * (b) if the region has same base and size as an existing vseg, the vseg is removed.
325 * (c) if the removed region cut the vseg in two parts, it is modified.
326 * (d) if the removed region cut the vseg in three parts, it is modified, and a new
327 *     vseg is created with same type.
328 * FIXME [AG] this function should be called by a thread running in the reference cluster,
329 *       and the VMM should be updated in all process descriptors copies.
330 *********************************************************************************************
331 * @ process   : pointer on process descriptor
332 * @ base      : vseg base address
333 * @ size      : vseg size (bytes)
334 ********************************************************************************************/
335error_t vmm_resize_vseg( struct process_s * process,
336                         intptr_t           base,
337                         intptr_t           size );
338
339/*********************************************************************************************
340 * This low-level function scan the local VSL in <vmm> to find the unique vseg containing
341 * a given virtual address <vaddr>.
342 * It is called by the vmm_get_vseg(), vmm_get_pte(), and vmm_resize_vseg() functions.
343 *********************************************************************************************
344 * @ vmm     : pointer on the process VMM.
345 * @ vaddr   : virtual address.
346 * @ return vseg pointer if success / return NULL if not found.
347 ********************************************************************************************/
348struct vseg_s * vmm_vseg_from_vaddr( vmm_t    * vmm,
349                                     intptr_t   vaddr );
350
351/*********************************************************************************************
352 * This function checks that a given virtual address is contained in a registered vseg.
353 * It can be called by any thread running in any cluster:
354 * - if the vseg is registered in the local process VMM, it returns the local vseg pointer.
355 * - if the vseg is missing in local VMM, it uses a RPC to get it from the reference cluster,
356 *   register it in local VMM and returns the local vseg pointer, if success.
357 * - it returns an user error if the vseg is missing in the reference VMM, or if there is
358 *   not enough memory for a new vseg descriptor in the calling thread cluster.
359 *********************************************************************************************
360 * @ process   : [in] pointer on process descriptor
361 * @ vaddr     : [in] virtual address
362 * @ vseg      : [out] local pointer on local vseg
363 * @ returns 0 if success / returns -1 if user error (out of segment).
364 ********************************************************************************************/
365error_t vmm_get_vseg( struct process_s  * process,
366                      intptr_t            vaddr,
367                      vseg_t           ** vseg );           
368
369/*********************************************************************************************
370 * This function is called by the generic exception handler in case of page-fault event,
371 * detected for a given <vpn>. The <process> argument is used to access the relevant VMM.
372 * It checks the missing VPN and returns an user error if it is not in a registered vseg.
373 * For a legal VPN, there is actually 3 cases:
374 * 1) if the missing VPN belongs to a private vseg (STACK or CODE segment types, non
375 *    replicated in all clusters), it allocates a new physical page, computes the attributes,
376 *    depending on vseg type, and updates directly the local GPT.
377 * 2) if the missing VPN belongs to a public vseg, it can be a false page-fault, when the VPN
378 *    is mapped in the reference GPT, but not in the local GPT. For this false page-fault,
379 *    the local GPT is simply updated from the reference GPT.
380 * 3) if the missing VPN is public, and unmapped in the ref GPT, it is a true page fault.
381 *    The calling thread  allocates a new physical page, computes the attributes, depending
382 *    on vseg type, and updates directly (without RPC) the local GPT and the reference GPT.
383 *    Other GPT copies  will updated on demand.
384 * Concurrent accesses to the GPT(s) are handled, by locking the target PTE before accessing
385 * the local and/or reference GPT(s).
386 *********************************************************************************************
387 * @ process  : local pointer on local process.
388 * @ vpn      : VPN of the missing PTE.
389 * @ returns EXCP_NON_FATAL / EXCP_USER_ERROR / EXCP_KERNEL_PANIC after analysis
390 ********************************************************************************************/
391error_t vmm_handle_page_fault( struct process_s * process,
392                               vpn_t              vpn );
393
394/*********************************************************************************************
395 * This function is called by the generic exception handler in case of WRITE violation event,
396 * detected for a given <vpn>. The <process> argument is used to access the relevant VMM.
397 * It returns a kernel panic if VPN is not in a registered vseg or is not mapped.
398 * For a legal mapped vseg there is two cases:
399 * 1) If the missing VPN belongs to a private vseg (STACK), it access only the local GPT.
400 *    It access the forks counter in the current physical page descriptor.
401 *    If there is a pending fork, it allocates a new physical page from the cluster defined
402 *    by the vseg type, copies the old physical page content to the new physical page,
403 *    and decrements the pending_fork counter in old physical page descriptor.
404 *    Finally, it reset the COW flag and set the WRITE flag in local GPT.
405 * 2) If the missing VPN is public, it access only the reference GPT.
406 *    It access the forks counter in the current physical page descriptor.
407 *    If there is a pending fork, it allocates a new physical page from the cluster defined
408 *    by the vseg type, copies the old physical page content to the new physical page,
409 *    and decrements the pending_fork counter in old physical page descriptor.
410 *    Finally it calls the vmm_global_update_pte() function to reset the COW flag and set
411 *    the WRITE flag in all the GPT copies, using a RPC if the reference cluster is remote.
412 * In both cases, concurrent accesses to the GPT are handled by locking the target PTE
413 * before accessing the GPT.
414 *********************************************************************************************
415 * @ process   : pointer on local process descriptor copy.
416 * @ vpn       : VPN of the faulting PTE.
417 * @ returns EXCP_NON_FATAL / EXCP_USER_ERROR / EXCP_KERNEL_PANIC after analysis
418 ********************************************************************************************/
419error_t vmm_handle_cow( struct process_s * process,
420                        vpn_t              vpn );
421
422/*********************************************************************************************
423 * This function is called by the vmm_get_pte() function when a page is unmapped.
424 * Depending on the vseg type, defined by the <vseg> argument, it returns the PPN
425 * (Physical Page Number) associated to a missing page defined by the <vpn> argument.
426 * - For the FILE type, it returns directly the physical page from the file mapper.
427 * - For the CODE and DATA types, it allocates a new physical page from the cluster defined
428 *   by the <vseg->cxy> field, or by the <vpn> MSB bits for a distributed vseg,
429 *   and initialize this page from the .elf file mapper.
430 * - For all other types, it allocates a new physical page from the cluster defined
431 *   by the <vseg->cxy> field, or by the <vpn> MSB bits for a distributed vseg,
432 *   but the new page is not initialized.
433 *********************************************************************************************
434 * @ vseg   : local pointer on vseg containing the mising page.
435 * @ vpn    : Virtual Page Number identifying the missing page.
436 * @ ppn    : [out] returned Physical Page Number.
437 * return 0 if success / return EINVAL or ENOMEM if error.
438 ********************************************************************************************/
439error_t vmm_get_one_ppn( vseg_t * vseg,
440                         vpn_t    vpn,
441                         ppn_t  * ppn );
442
443
444#endif /* _VMM_H_ */
Note: See TracBrowser for help on using the repository browser.