source: trunk/kernel/mm/vmm.h @ 625

Last change on this file since 625 was 625, checked in by alain, 2 years ago

Fix a bug in the vmm_remove_vseg() function: the physical pages
associated to an user DATA vseg were released to the kernel when
the target process descriptor was in the reference cluster.
This physical pages release should be done only when the page
forks counter value is zero.
All other modifications are cosmetic.

File size: 27.5 KB
Line 
1/*
2 * vmm.h - virtual memory management related operations
3 *
4 * Authors   Ghassan Almaless (2008,2009,2010,2011, 2012)
5 *           Mohamed Lamine Karaoui (2015)
6 *           Alain Greiner (2016,2017,2018,2019)
7 *
8 * Copyright (c) UPMC Sorbonne Universites
9 *
10 * This file is part of ALMOS-MKH.
11 *
12 * ALMOS-MKH is free software; you can redistribute it and/or modify it
13 * under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; version 2.0 of the License.
15 *
16 * ALMOS-MKH is distributed in the hope that it will be useful, but
17 * WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 * General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with ALMOS-MKH; if not, write to the Free Software Foundation,
23 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24 */
25
26#ifndef _VMM_H_
27#define _VMM_H_
28
29#include <hal_kernel_types.h>
30#include <bits.h>
31#include <list.h>
32#include <queuelock.h>
33#include <hal_gpt.h>
34#include <vseg.h>
35#include <page.h>
36
37/****  Forward declarations  ****/
38
39struct process_s;
40struct vseg_s;
41
42/*********************************************************************************************
43 * This structure defines the STACK allocator used by the VMM to dynamically handle
44 * vseg allocation or release requests for an user thread.
45 * This allocator handles a fixed size array of fixed size slots in STACK zone of user space.
46 * The stack size and the number of slots are defined by the CONFIG_VMM_STACK_SIZE, and
47 * CONFIG_VMM_STACK_BASE parameters.
48 * Each slot can contain one user stack vseg. The first 4 Kbytes page in the slot is not
49 * mapped to detect stack overflow.
50 * In this implementation, the slot index is defined by the user thead LTID.
51 * All allocated stacks are registered in a bitmap defining the STACK zone state:
52 * - The allocator checks that the requested slot has not been already allocated, and set the
53 *   corresponding bit in the bitmap.
54 * - The de-allocator function reset the corresponding bit in the bitmap.
55 ********************************************************************************************/
56
57typedef struct stack_mgr_s
58{
59    busylock_t     lock;               /*! lock protecting STACK allocator                  */
60    vpn_t          vpn_base;           /*! first page of STACK zone                         */
61    bitmap_t       bitmap;             /*! bit vector of allocated stacks                   */
62}
63stack_mgr_t;
64
65/*********************************************************************************************
66 * This structure defines the MMAP allocator used by the VMM to dynamically handle 
67 * MMAP vsegs requested or released by an user process.
68 * This allocator should be only used in the reference cluster.
69 * - allocation policy : all allocated vsegs occupy an integer number of pages that is
70 *   power of 2, and are aligned on a page boundary. The requested number of pages is
71 *   rounded if required. The first_free_vpn variable defines completely the MMAP zone state.
72 *   It is never decremented, as the released vsegs are simply registered in a zombi_list.
73 *   The relevant zombi_list is checked first for each allocation request.
74 * - release policy : a released MMAP vseg is registered in an array of zombi_lists.
75 *   This array is indexed by ln(number of pages), and each entry contains the root of
76 *   a local list of zombi vsegs that have the same size. The physical memory allocated
77 *   for a zombi vseg descriptor is not released, to use the "list" field.
78 *   This physical memory allocated for MMAP vseg descriptors is actually released
79 *   when the VMM is destroyed.
80 ********************************************************************************************/
81
82typedef struct mmap_mgr_s
83{
84    busylock_t     lock;               /*! lock protecting MMAP allocator                   */
85    vpn_t          vpn_base;           /*! first page of MMAP zone                          */
86    vpn_t          vpn_size;           /*! number of pages in MMAP zone                     */
87    vpn_t          first_free_vpn;     /*! first free page in MMAP zone                     */
88    xlist_entry_t  zombi_list[32];     /*! array of roots of released vsegs lists           */
89}
90mmap_mgr_t;
91
92/*********************************************************************************************
93 * This structure defines the Virtual Memory Manager for a given process in a given cluster.
94 * This local VMM implements four main services:
95 * 1) It contains the local copy of vseg list (VSL), only complete in referrence.
96 * 2) It contains the local copy of the generic page table (GPT), only complete in reference.
97 * 3) The stack manager dynamically allocates virtual memory space for the STACK vsegs.
98 * 4) The mmap manager dynamically allocates virtual memory for the (FILE/ANON/REMOTE) vsegs.
99 ******************************************************a**************************************
100 * Implementation notes:
101 * 1. In most clusters, the VSL and GPT are only partial copies of the reference VSL and GPT
102 *    structures, stored in the reference cluster.
103 * 2. The VSL contains only local vsegs, but it is implemented as an xlist, and protected by
104 *    a remote_rwlock, because it can be accessed by a thread running in a remote cluster.
105 *    An exemple is the vmm_fork_copy() function.
106 * 3. The GPT in the reference cluster can be directly accessed by remote threads to handle
107 *    false page-fault (page is mapped in the reference GPT, but the PTE copy is missing
108 *    in the local GPT). It is also protected by a remote_rwlock.
109 ********************************************************************************************/
110
111typedef struct vmm_s
112{
113        remote_rwlock_t  vsl_lock;           /*! lock protecting the local VSL                  */
114        xlist_entry_t    vsegs_root;         /*! Virtual Segment List (complete in reference)   */
115        uint32_t         vsegs_nr;           /*! total number of local vsegs                    */
116
117    remote_rwlock_t  gpt_lock;           /*! lock protecting the local GPT                  */
118    gpt_t            gpt;                /*! Generic Page Table (complete in reference)     */
119
120    stack_mgr_t      stack_mgr;          /*! embedded STACK vsegs allocator                 */
121    mmap_mgr_t       mmap_mgr;           /*! embedded MMAP vsegs allocator                  */
122
123        uint32_t         pgfault_nr;         /*! page fault counter (instrumentation)           */
124
125    vpn_t            args_vpn_base;      /*! args vseg first page                           */
126    vpn_t            envs_vpn_base;      /*! envs vseg first page                           */
127        vpn_t            code_vpn_base;      /*! code vseg first page                           */
128        vpn_t            data_vpn_base;      /*! data vseg first page                           */
129    vpn_t            heap_vpn_base;      /*! heap zone first page                           */
130
131        intptr_t         entry_point;        /*! main thread entry point                        */
132}
133vmm_t;
134
135/*********************************************************************************************
136 * This function mkkes a partial initialisation of the VMM attached to an user process.
137 * The GPT must have been previously created, with the hal_gpt_create() function.
138 * - It registers "args", "envs" vsegs in the VSL.
139 * - It initializes the STACK and MMAP allocators.
140 * Note:
141 * - The "code" and "data" vsegs are registered by the elf_load_process() function.
142 * - The "stack" vsegs are dynamically registered by the thread_user_create() function.
143 * - The "file", "anon", "remote" vsegs are dynamically registered by the mmap() syscall.
144 *********************************************************************************************
145 * @ process   : pointer on process descriptor
146 * @ return 0 if success / return -1 if failure.
147 ********************************************************************************************/
148error_t vmm_user_init( struct process_s * process );
149
150/*********************************************************************************************
151 * This function re-initialises the VMM attached to an user process to prepare a new
152 * call to the vmm_user_init() function after an exec() syscall.
153 * It removes from the VMM of the process identified by the <process> argument all
154 * non kernel vsegs (i.e. all user vsegs), by calling the vmm_remove_vseg() function.
155 * - the vsegs are removed from the VSL.
156 * - the corresponding GPT entries are removed from the GPT.
157 * - the physical pages are released to the relevant kmem when they are not shared.
158 * The VSL and the GPT are not modified for the kernel vsegs.
159 *********************************************************************************************
160 * @ process   : pointer on process descriptor.
161 ********************************************************************************************/
162void vmm_user_reset( struct process_s * process );
163
164/*********************************************************************************************
165 * This function is called by the process_make_fork() function. It partially copies
166 * the content of a remote parent process VMM to the local child process VMM:
167 * - All DATA, ANON, REMOTE vsegs registered in the parent VSL are registered in the
168 *   child VSL. All valid PTEs in parent GPT are copied to the child GPT, but the
169 *   WRITABLE flag is reset and the COW flag is set.
170 * - All CODE vsegs registered in the parent VSL are registered in the child VSL, but the
171 *   GPT entries are not copied in the child GPT, and will be dynamically updated from
172 *   the .elf file when a page fault is reported.
173 * - All FILE vsegs registered in the parent VSL are registered in the child VSL, and all
174 *   valid GPT entries in parent GPT are copied to the child GPT. The COW flag is not set.
175 * - No STACK vseg is copied from  parent VMM to child VMM, because the child stack vseg
176 *   must be copied later from the cluster containing the user thread requesting the fork().
177 * - The KERNEL vsegs required by the target architecture are re-created in the child
178 *   VMM, from the local kernel process VMM, using the hal_vmm_kernel_update() function.
179 *********************************************************************************************
180 * @ child_process     : local pointer on local child process descriptor.
181 * @ parent_process_xp : extended pointer on remote parent process descriptor.
182 * @ return 0 if success / return -1 if failure.
183 ********************************************************************************************/
184error_t vmm_fork_copy( struct process_s * child_process,
185                       xptr_t             parent_process_xp );
186
187/*********************************************************************************************
188 * This function is called by the process_make_fork() function executing the fork syscall.
189 * It set the COW flag, and reset the WRITABLE flag of all GPT entries of the DATA, MMAP,
190 * and REMOTE vsegs of a process identified by the <process> argument.
191 * It must be called by a thread running in the reference cluster, that contains the complete
192 * VSL and GPT (use the rpc_vmm_set_cow_client() when the calling thread client is remote).
193 * It updates all copies of the process in all clusters, to maintain coherence in GPT copies,
194 * using the list of copies stored in the owner process, and using remote_write accesses to
195 * update the remote GPTs. It atomically increment the pending_fork counter, in all involved
196 * physical page descriptors. It cannot fail, as only mapped entries in GPTs are updated.
197 *********************************************************************************************
198 * @ process   : local pointer on local reference process descriptor.
199 ********************************************************************************************/
200void vmm_set_cow( struct process_s * process );
201
202/*********************************************************************************************
203 * This function modifies a GPT entry identified by the <process> and <vpn> arguments
204 * in all clusters containing a process copy.
205 * It must be called by a thread running in the reference cluster.
206 * It updates all copies of the process in all clusters, to maintain coherence in GPT copies,
207 * using the list of copies stored in the owner process, and using remote_write accesses to
208 * update the remote GPTs. It cannot fail, as only mapped entries in GPT copies are updated.
209 *********************************************************************************************
210 * @ process   : local pointer on local process descriptor.
211 * @ vpn       : PTE index.
212 * @ attr      : PTE / attributes.
213 * @ ppn       : PTE / physical page index.
214 ********************************************************************************************/
215void vmm_global_update_pte( struct process_s * process,
216                            vpn_t              vpn,
217                            uint32_t           attr,
218                            ppn_t              ppn );
219
220/*********************************************************************************************
221 * This function deletes, in the local cluster, all vsegs registered in the VSL
222 * of the process identified by the <process> argument. For each vseg:
223 * - it unmaps all vseg PTEs from the GPT (release the physical pages when required).
224 * - it removes the vseg from the local VSL.
225 * - it releases the memory allocated to the local vseg descriptors.
226 * - it releases the memory allocated to the GPT itself.
227 *********************************************************************************************
228 * @ process   : pointer on process descriptor.
229 ********************************************************************************************/
230void vmm_destroy( struct process_s * process );
231
232/*********************************************************************************************
233 * This function scans the list of vsegs registered in the VMM of a given process descriptor
234 * to check if a given virtual region (defined by a base and size) overlap an existing vseg.
235 *********************************************************************************************
236 * @ process  : pointer on process descriptor.
237 * @ base     : region virtual base address.
238 * @ size     : region size (bytes).
239 * @ returns NULL if no conflict / return conflicting vseg pointer if conflict.
240 ********************************************************************************************/
241vseg_t * vmm_check_conflict( struct process_s * process,
242                             vpn_t              base,
243                             vpn_t              size );
244
245/*********************************************************************************************
246 * This function allocates memory for a vseg descriptor, initialises it, and register it
247 * in the VSL of the local process descriptor, that must be the reference process.
248 * - For the FILE, ANON, & REMOTE types, it does not use the <base> and <size> arguments,
249 *   but uses the specific MMAP virtual memory allocator.
250 * - For the STACK type, it does not use the <size> argument, and the <base> argument
251 *   defines the user thread LTID used by the specific STACK virtual memory allocator.
252 * It checks collision with all pre-existing vsegs.
253 * To comply with the "on-demand" paging policy, this function does NOT modify the GPT,
254 * and does not allocate physical memory for vseg data.
255 * It should be called by a local thread (could be a RPC thread if the client thread is not
256 * running in the reference cluster).
257 *********************************************************************************************
258 * @ process     : pointer on local processor descriptor.
259 * @ type        : vseg type.
260 * @ base        : vseg base address (or user thread ltid for an user stack vseg).
261 * @ size        : vseg size (bytes).
262 * @ file_offset : offset in file for CODE, DATA, FILE types.
263 * @ file_size   : can be smaller than "size" for DATA type.
264 * @ mapper_xp   : extended pointer on mapper for CODE, DATA, FILE types.
265 * @ cxy         : physical mapping cluster (for non distributed vsegs).
266 * @ returns pointer on vseg if success / returns NULL if no memory, or conflict.
267 ********************************************************************************************/
268vseg_t * vmm_create_vseg( struct process_s * process,
269                          vseg_type_t        type,
270                          intptr_t           base,
271                              uint32_t           size,
272                          uint32_t           file_offset,
273                          uint32_t           file_size,
274                          xptr_t             mapper_xp,
275                          cxy_t              cxy );
276
277/*********************************************************************************************
278 * This function removes from the VMM of a process descriptor identified by the <process>
279 * argument the vseg identified by the <vseg> argument. It can be used for any type of vseg.
280 * As it uses local pointers, it must be called by a local thread.
281 * It is called by the vmm_user_reset(), vmm_delete_vseg() and vmm_destroy() functions.
282 * It makes a kernel panic if the process is not registered in the local cluster,
283 * or if the vseg is not registered in the process VSL.
284 * For all vseg types, the vseg is detached from local VSL, and all associated PTEs are
285 * unmapped from local GPT. Other actions depend on the vseg type:
286 * - Regarding the vseg descriptor release:
287 *   . for ANON and REMOTE, the vseg is not released, but registered in local zombi_list.
288 *   . for STACK the vseg is released to the local stack allocator.
289 *   . for all other types, the vseg is released to the local kmem.
290 * - Regarding the physical pages release:
291 *   . for KERNEL and FILE, the pages are not released to kmem.
292 *   . for CODE and STACK, the pages are released to local kmem when they are not COW.
293 *   . for DATA, ANON and REMOTE, the pages are released to relevant kmem only when
294 *     the local cluster is the reference cluster.
295 * The lock protecting the VSL must be taken by the caller.
296 *********************************************************************************************
297 * @ process  : local pointer on process.
298 * @ vseg     : local pointer on vseg.
299 ********************************************************************************************/
300void vmm_remove_vseg( struct process_s * process,
301                      struct vseg_s    * vseg );
302
303/*********************************************************************************************
304 * This function call the vmm_remove vseg() function to remove from the VMM of a local
305 * process descriptor, identified by the <pid> argument the vseg identified by the <vaddr>
306 * virtual address in user space.
307 * Use the RPC_VMM_DELETE_VSEG to remove a vseg from a remote process descriptor.
308 *********************************************************************************************
309 * @ pid      : process identifier.
310 * @ vaddr    : virtual address in user space.
311 ********************************************************************************************/
312void vmm_delete_vseg( pid_t    pid,
313                      intptr_t vaddr );
314
315/*********************************************************************************************
316 * This function removes a given region (defined by a base address and a size) from
317 * the VMM of a given process descriptor. This can modify the number of vsegs:
318 * (a) if the region is not entirely mapped in an existing vseg, it's an error.
319 * (b) if the region has same base and size as an existing vseg, the vseg is removed.
320 * (c) if the removed region cut the vseg in two parts, it is modified.
321 * (d) if the removed region cut the vseg in three parts, it is modified, and a new
322 *     vseg is created with same type.
323 * FIXME [AG] this function should be called by a thread running in the reference cluster,
324 *       and the VMM should be updated in all process descriptors copies.
325 *********************************************************************************************
326 * @ process   : pointer on process descriptor
327 * @ base      : vseg base address
328 * @ size      : vseg size (bytes)
329 ********************************************************************************************/
330error_t vmm_resize_vseg( struct process_s * process,
331                         intptr_t           base,
332                         intptr_t           size );
333
334/*********************************************************************************************
335 * This low-level function scan the local VSL in <vmm> to find the unique vseg containing
336 * a given virtual address <vaddr>.
337 * It is called by the vmm_get_vseg(), vmm_get_pte(), and vmm_resize_vseg() functions.
338 *********************************************************************************************
339 * @ vmm     : pointer on the process VMM.
340 * @ vaddr   : virtual address.
341 * @ return vseg pointer if success / return NULL if not found.
342 ********************************************************************************************/
343struct vseg_s * vmm_vseg_from_vaddr( vmm_t    * vmm,
344                                     intptr_t   vaddr );
345
346/*********************************************************************************************
347 * This function checks that a given virtual address is contained in a registered vseg.
348 * It can be called by any thread running in any cluster:
349 * - if the vseg is registered in the local process VMM, it returns the local vseg pointer.
350 * - if the vseg is missing in local VMM, it uses a RPC to get it from the reference cluster,
351 *   register it in local VMM and returns the local vseg pointer, if success.
352 * - it returns an user error if the vseg is missing in the reference VMM, or if there is
353 *   not enough memory for a new vseg descriptor in the calling thread cluster.
354 *********************************************************************************************
355 * @ process   : [in] pointer on process descriptor
356 * @ vaddr     : [in] virtual address
357 * @ vseg      : [out] local pointer on local vseg
358 * @ returns 0 if success / returns -1 if user error (out of segment).
359 ********************************************************************************************/
360error_t vmm_get_vseg( struct process_s  * process,
361                      intptr_t            vaddr,
362                      vseg_t           ** vseg );           
363
364/*********************************************************************************************
365 * This function is called by the generic exception handler in case of page-fault event,
366 * detected for a given <vpn>. The <process> argument is used to access the relevant VMM.
367 * It checks the missing VPN and returns an user error if it is not in a registered vseg.
368 * For a legal VPN, there is actually 3 cases:
369 * 1) if the missing VPN belongs to a private vseg (STACK or CODE segment types, non
370 *    replicated in all clusters), it allocates a new physical page, computes the attributes,
371 *    depending on vseg type, and updates directly the local GPT.
372 * 2) if the missing VPN belongs to a public vseg, it can be a false page-fault, when the VPN
373 *    is mapped in the reference GPT, but not in the local GPT. For this false page-fault,
374 *    the local GPT is simply updated from the reference GPT.
375 * 3) if the missing VPN is public, and unmapped in the reference GPT, it's a true page fault.
376 *    The calling thread  allocates a new physical page, computes the attributes, depending
377 *    on vseg type, and updates directly (without RPC) the local GPT and the reference GPT.
378 *    Other GPT copies  will updated on demand.
379 * Concurrent accesses to the GPT are handled, thanks to the
380 * remote_rwlock protecting each GPT copy.
381 *********************************************************************************************
382 * @ process  : local pointer on local process.
383 * @ vpn      : VPN of the missing PTE.
384 * @ returns EXCP_NON_FATAL / EXCP_USER_ERROR / EXCP_KERNEL_PANIC after analysis
385 ********************************************************************************************/
386error_t vmm_handle_page_fault( struct process_s * process,
387                               vpn_t              vpn );
388
389/*********************************************************************************************
390 * This function is called by the generic exception handler in case of WRITE violation event,
391 * detected for a given <vpn>. The <process> argument is used to access the relevant VMM.
392 * It returns a kernel panic if VPN is not in a registered vseg or is not mapped.
393 * For a legal mapped vseg there is two cases:
394 * 1) If the missing VPN belongs to a private vseg (STACK or CODE segment types, non
395 *    replicated in all clusters), it access the local GPT to get the current PPN and ATTR.
396 *    It access the forks counter in the current physical page descriptor.
397 *    If there is a pending fork, it allocates a new physical page from the cluster defined
398 *    by the vseg type, copies the old physical page content to the new physical page,
399 *    and decrements the pending_fork counter in old physical page descriptor.
400 *    Finally, it reset the COW flag and set the WRITE flag in local GPT.
401 * 2) If the missing VPN is public, it access the reference GPT to get the current PPN and
402 *    ATTR. It access the forks counter in the current physical page descriptor.
403 *    If there is a pending fork, it allocates a new physical page from the cluster defined
404 *    by the vseg type, copies the old physical page content to the new physical page,
405 *    and decrements the pending_fork counter in old physical page descriptor.
406 *    Finally it calls the vmm_global_update_pte() function to reset the COW flag and set
407 *    the WRITE flag in all the GPT copies, using a RPC if the reference cluster is remote.
408 * In both cases, concurrent accesses to the GPT are protected by the remote_rwlock
409 * atached to the GPT copy in VMM.
410 *********************************************************************************************
411 * @ process   : pointer on local process descriptor copy.
412 * @ vpn       : VPN of the faulting PTE.
413 * @ returns EXCP_NON_FATAL / EXCP_USER_ERROR / EXCP_KERNEL_PANIC after analysis
414 ********************************************************************************************/
415error_t vmm_handle_cow( struct process_s * process,
416                        vpn_t              vpn );
417
418/*********************************************************************************************
419 * This function is called by the vmm_get_pte() function when a page is unmapped.
420 * Depending on the vseg type, defined by the <vseg> argument, it returns the PPN
421 * (Physical Page Number) associated to a missing page defined by the <vpn> argument.
422 * - For the FILE type, it returns directly the physical page from the file mapper.
423 * - For the CODE and DATA types, it allocates a new physical page from the cluster defined
424 *   by the <vseg->cxy> field, or by the <vpn> MSB bits for a distributed vseg,
425 *   and initialize this page from the .elf file mapper.
426 * - For all other types, it allocates a new physical page from the cluster defined
427 *   by the <vseg->cxy> field, or by the <vpn> MSB bits for a distributed vseg,
428 *   but the new page is not initialized.
429 *********************************************************************************************
430 * @ vseg   : local pointer on vseg containing the mising page.
431 * @ vpn    : Virtual Page Number identifying the missing page.
432 * @ ppn    : [out] returned Physical Page Number.
433 * return 0 if success / return EINVAL or ENOMEM if error.
434 ********************************************************************************************/
435error_t vmm_get_one_ppn( vseg_t * vseg,
436                         vpn_t    vpn,
437                         ppn_t  * ppn );
438
439
440#endif /* _VMM_H_ */
Note: See TracBrowser for help on using the repository browser.