source: trunk/kernel/mm/vmm.h @ 623

Last change on this file since 623 was 623, checked in by alain, 5 years ago

Introduce three new types of vsegs (KCODE,KDATA,KDEV)
to map the kernel vsegs in the process VSL and GPT.
This now used by both the TSAR and the I86 architectures.

File size: 27.6 KB
RevLine 
[1]1/*
2 * vmm.h - virtual memory management related operations
3 *
4 * Authors   Ghassan Almaless (2008,2009,2010,2011, 2012)
5 *           Mohamed Lamine Karaoui (2015)
[623]6 *           Alain Greiner (2016,2017,2018,2019)
[18]7 *
[1]8 * Copyright (c) UPMC Sorbonne Universites
9 *
10 * This file is part of ALMOS-MKH.
11 *
12 * ALMOS-MKH is free software; you can redistribute it and/or modify it
13 * under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; version 2.0 of the License.
15 *
16 * ALMOS-MKH is distributed in the hope that it will be useful, but
17 * WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 * General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with ALMOS-MKH; if not, write to the Free Software Foundation,
23 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24 */
25
26#ifndef _VMM_H_
27#define _VMM_H_
28
[457]29#include <hal_kernel_types.h>
[1]30#include <bits.h>
31#include <list.h>
[567]32#include <queuelock.h>
[1]33#include <hal_gpt.h>
34#include <vseg.h>
35#include <page.h>
36
37/****  Forward declarations  ****/
38
39struct process_s;
[611]40struct vseg_s;
[1]41
42/*********************************************************************************************
[407]43 * This structure defines the STACK allocator used by the VMM to dynamically handle
[611]44 * vseg allocation or release requests for an user thread.
45 * This allocator handles a fixed size array of fixed size slots in STACK zone of user space.
[1]46 * The stack size and the number of slots are defined by the CONFIG_VMM_STACK_SIZE, and
[407]47 * CONFIG_VMM_STACK_BASE parameters.
[611]48 * Each slot can contain one user stack vseg. The first 4 Kbytes page in the slot is not
49 * mapped to detect stack overflow.
[1]50 * The slot index can be computed form the slot base address, and reversely.
51 * All allocation / release operations are registered in the stack_bitmap, that completely
[611]52 * define the STACK zone status.
[1]53 ********************************************************************************************/
54
55typedef struct stack_mgr_s
56{
[567]57    busylock_t     lock;               /*! lock protecting STACK allocator                  */
[1]58    vpn_t          vpn_base;           /*! first page of STACK zone                         */
59    bitmap_t       bitmap;             /*! bit bector of allocated stacks                   */
60}
61stack_mgr_t;
62
63/*********************************************************************************************
[407]64 * This structure defines the MMAP allocator used by the VMM to dynamically handle 
[1]65 * MMAP vsegs requested or released by an user process.
[18]66 * This allocator should be only used in the reference cluster.
67 * - allocation policy : all allocated vsegs occupy an integer number of pages that is
[1]68 *   power of 2, and are aligned on a page boundary. The requested number of pages is
[18]69 *   rounded if required. The first_free_vpn variable defines completely the MMAP zone state.
[1]70 *   It is never decremented, as the released vsegs are simply registered in a zombi_list.
[18]71 *   The relevant zombi_list is checked first for each allocation request.
[1]72 * - release policy : a released MMAP vseg is registered in an array of zombi_lists.
73 *   This array is indexed by ln(number of pages), and each entry contains the root of
74 *   a local list of zombi vsegs that have the same size. The physical memory allocated
75 *   for a zombi vseg descriptor is not released, to use the "list" field.
76 *   This physical memory allocated for MMAP vseg descriptors is actually released
77 *   when the VMM is destroyed.
78 ********************************************************************************************/
79
80typedef struct mmap_mgr_s
81{
[567]82    busylock_t     lock;               /*! lock protecting MMAP allocator                   */
[1]83    vpn_t          vpn_base;           /*! first page of MMAP zone                          */
84    vpn_t          vpn_size;           /*! number of pages in MMAP zone                     */
85    vpn_t          first_free_vpn;     /*! first free page in MMAP zone                     */
86    list_entry_t   zombi_list[32];     /*! array of roots of released vsegs lists           */
87}
88mmap_mgr_t;
89
90/*********************************************************************************************
91 * This structure defines the Virtual Memory Manager for a given process in a given cluster.
[585]92 * This local VMM implements four main services:
[567]93 * 1) It contains the local copy of vseg list (VSL), only complete in referrence.
94 * 2) It contains the local copy of the generic page table (GPT), only complete in reference.
[408]95 * 3) The stack manager dynamically allocates virtual memory space for the STACK vsegs.
96 * 4) The mmap manager dynamically allocates virtual memory for the (FILE/ANON/REMOTE) vsegs.
97 ******************************************************a**************************************
98 * Implementation notes:
[585]99 * 1. In most clusters, the VSL and GPT are only partial copies of the reference VSL and GPT
100 *    structures, stored in the reference cluster.
101 * 2. The VSL contains only local vsegs, but it is implemented as an xlist, and protected by
[408]102 *    a remote_rwlock, because it can be accessed by a thread running in a remote cluster.
103 *    An exemple is the vmm_fork_copy() function.
[585]104 * 3. The GPT in the reference cluster can be directly accessed by remote threads to handle
105 *    false page-fault (page is mapped in the reference GPT, but the PTE copy is missing
106 *    in the local GPT). It is also protected by a remote_rwlock.
[1]107 ********************************************************************************************/
108
109typedef struct vmm_s
110{
[567]111        remote_rwlock_t  vsegs_lock;         /*! lock protecting the local VSL                  */
[585]112        xlist_entry_t    vsegs_root;         /*! Virtual Segment List (complete in reference)   */
[408]113        uint32_t         vsegs_nr;           /*! total number of local vsegs                    */
[1]114
[585]115    remote_rwlock_t  gpt_lock;           /*! lock protecting the local GPT                  */
[408]116    gpt_t            gpt;                /*! Generic Page Table (complete in reference)     */
[1]117
[408]118    stack_mgr_t      stack_mgr;          /*! embedded STACK vsegs allocator                 */
119    mmap_mgr_t       mmap_mgr;           /*! embedded MMAP vsegs allocator                  */
[1]120
[408]121        uint32_t         pgfault_nr;         /*! page fault counter (instrumentation)           */
[1]122
[408]123    vpn_t            kent_vpn_base;      /*! kentry vseg first page                         */
124    vpn_t            args_vpn_base;      /*! args vseg first page                           */
125    vpn_t            envs_vpn_base;      /*! envs zone first page                           */
126    vpn_t            heap_vpn_base;      /*! envs zone first page                           */
127        vpn_t            code_vpn_base;      /*! code zone first page                           */
128        vpn_t            data_vpn_base;      /*! data zone first page                           */
[1]129
[408]130        intptr_t         entry_point;        /*! main thread entry point                        */
[1]131}
132vmm_t;
133
134/*********************************************************************************************
[406]135 * This function initialises the virtual memory manager attached to an user process.
[407]136 * - It initializes the STACK and MMAP allocators.
137 * - It registers the "kentry", "args", "envs" vsegs in the VSL.
[409]138 * - It initializes the generic page table, calling the HAL specific hal_gpt_init() function.
139 * - For TSAR it map all pages for the "kentry" vseg, that must be identity mapping.
[614]140 ******************************************************a**************************************
141 * Implementation notes:
[407]142 * - The "code" and "data" vsegs are registered by the elf_load_process() function.
143 * - The "stack" vsegs are dynamically created by the thread_user_create() function.
[409]144 * - The "file", "anon", "remote" vsegs are dynamically created by the mmap() syscall.
[1]145 *********************************************************************************************
146 * @ process   : pointer on process descriptor
[415]147 * @ return 0 if success / return -1 if failure.
[1]148 ********************************************************************************************/
[415]149error_t vmm_init( struct process_s * process );
[1]150
151/*********************************************************************************************
[407]152 * This function displays on TXY0 the list or registered vsegs for a given <process>.
[429]153 * It must be executed by a thread running in reference cluster.
154 * If the <mapping> argument is true, it displays for each vseg all mapped PTEs in GPT.
[23]155 *********************************************************************************************
[407]156 * @ process   : pointer on process descriptor.
157 * @ mapping   : detailed mapping if true.
158 ********************************************************************************************/
159void vmm_display( struct process_s * process,
160                  bool_t             mapping );
161
[610]162/*********************************************************************************************
[433]163 * This function is called by the process_make_fork() function. It partially copies
[408]164 * the content of a remote parent process VMM to the local child process VMM:
165 * - all DATA, MMAP, REMOTE vsegs registered in the parent VSL are registered in the child
166 *   VSL, and all valid GPT entries in parent GPT are copied to the child GPT.
167 *   The WRITABLE flag is reset and the COW flag is set in child GPT.
168 * - all CODE vsegs registered in the parent VSL are registered in the child VSL, but the
169 *   GPT entries are not copied in the chilf GPT, that will be dynamically updated from
170 *   the .elf file when a page fault is reported.
171 * - all FILE vsegs registered in the parent VSL are registered in the child VSL, and all
172 *   valid GPT entries in parent GPT are copied to the child GPT. The COW flag is not set.
173 * - no STACK vseg is copied from  parent VMM to child VMM, because the child STACK vseg
[469]174 *   must be copied later from the cluster containing the user thread requesting the fork().
[407]175 *********************************************************************************************
[408]176 * @ child_process     : local pointer on local child process descriptor.
177 * @ parent_process_xp : extended pointer on remote parent process descriptor.
[415]178 * @ return 0 if success / return -1 if failure.
[23]179 ********************************************************************************************/
[408]180error_t vmm_fork_copy( struct process_s * child_process,
181                       xptr_t             parent_process_xp );
[23]182
183/*********************************************************************************************
[433]184 * This function is called by the process_make_fork() function executing the fork syscall.
[408]185 * It set the COW flag, and reset the WRITABLE flag of all GPT entries of the DATA, MMAP,
186 * and REMOTE vsegs of a process identified by the <process> argument.
187 * It must be called by a thread running in the reference cluster, that contains the complete
[433]188 * VSL and GPT (use the rpc_vmm_set_cow_client() when the calling thread client is remote).
[408]189 * It updates all copies of the process in all clusters, to maintain coherence in GPT copies,
190 * using the list of copies stored in the owner process, and using remote_write accesses to
[433]191 * update the remote GPTs. It atomically increment the pending_fork counter, in all involved
192 * physical page descriptors. It cannot fail, as only mapped entries in GPTs are updated.
[1]193 *********************************************************************************************
[408]194 * @ process   : local pointer on local reference process descriptor.
195 ********************************************************************************************/
196void vmm_set_cow( struct process_s * process );
197
198/*********************************************************************************************
[585]199 * This global function modifies a GPT entry identified by the <process> and <vpn>
[433]200 * arguments in all clusters containing a process copy.
201 * It must be called by a thread running in the reference cluster.
[408]202 * It updates all copies of the process in all clusters, to maintain coherence in GPT copies,
203 * using the list of copies stored in the owner process, and using remote_write accesses to
204 * update the remote GPTs. It cannot fail, as only mapped entries in GPT copies are updated.
205 *********************************************************************************************
206 * @ process   : local pointer on local process descriptor.
207 * @ vpn       : PTE index.
208 * @ attr      : PTE / attributes.
209 * @ ppn       : PTE / physical page index.
210 ********************************************************************************************/
[433]211void vmm_global_update_pte( struct process_s * process,
212                            vpn_t              vpn,
213                            uint32_t           attr,
214                            ppn_t              ppn );
[408]215
216/*********************************************************************************************
[433]217 * This function deletes, in the local cluster, all vsegs registered in the VSL
218 * of the process identified by the <process> argument. For each vseg:
219 * - it unmaps all vseg PTEs from the GPT (release the physical pages when required).
220 * - it removes the vseg from the local VSL.
221 * - it releases the memory allocated to the local vseg descriptors.
[611]222 * - it releases the memory allocated to the GPT itself.
[408]223 *********************************************************************************************
[23]224 * @ process   : pointer on process descriptor.
[1]225 ********************************************************************************************/
226void vmm_destroy( struct process_s * process );
227
228/*********************************************************************************************
[18]229 * This function scans the list of vsegs registered in the VMM of a given process descriptor
[1]230 * to check if a given virtual region (defined by a base and size) overlap an existing vseg.
231 *********************************************************************************************
232 * @ process  : pointer on process descriptor.
233 * @ base     : region virtual base address.
234 * @ size     : region size (bytes).
235 * @ returns NULL if no conflict / return conflicting vseg pointer if conflict.
236 ********************************************************************************************/
237vseg_t * vmm_check_conflict( struct process_s * process,
238                             vpn_t              base,
239                             vpn_t              size );
240
241/*********************************************************************************************
[18]242 * This function allocates memory for a vseg descriptor, initialises it, and register it
[595]243 * in the VMM of the local process descriptor, that must be the reference process.
[407]244 * For the 'stack", "file", "anon", & "remote" types, it does not use the <base> argument,
245 * but uses the STACK and MMAP virtual memory allocators.
246 * It checks collision with all pre-existing vsegs.
247 * To comply with the "on-demand" paging policy, this function does NOT modify the page table,
248 * and does not allocate physical memory for vseg data.
249 * It should be called by a local thread (could be a RPC thread if the client thread is not
250 * running in the regerence cluster).
[1]251 *********************************************************************************************
[407]252 * @ process     : pointer on local processor descriptor.
253 * @ type        : vseg type.
254 * @ base        : vseg base address (not used for dynamically allocated vsegs).
255 * @ size        : vseg size (bytes).
256 * @ file_offset : offset in file for CODE, DATA, FILE types.
257 * @ file_size   : can be smaller than "size" for DATA type.
258 * @ mapper_xp   : extended pointer on mapper for CODE, DATA, FILE types.
259 * @ cxy         : physical mapping cluster (for non distributed vsegs).
260 * @ returns pointer on vseg if success / returns NULL if no memory, or conflict.
[1]261 ********************************************************************************************/
262vseg_t * vmm_create_vseg( struct process_s * process,
[407]263                          vseg_type_t        type,
[18]264                          intptr_t           base,
[407]265                              uint32_t           size,
266                          uint32_t           file_offset,
267                          uint32_t           file_size,
268                          xptr_t             mapper_xp,
269                          cxy_t              cxy );
[1]270
271/*********************************************************************************************
[611]272 * This function removes from the local VMM of a process descriptor identified by the <pid>
273 * argument a local vseg identified by its base address <vaddr> in user space.
274 * It can be used for any type of vseg, but must be called by a local thread.
275 * Use the RPC_VMM_DELETE_VSEG if the client thread is not local.
276 * It does nothing if the process is not registered in the local cluster.
277 * It does nothing if the vseg is not registered in the local process VSL.
278 * - It removes from the local GPT all registered PTEs. If it is executed in the reference
279 *   cluster, it releases the referenced physical pages, to the relevant kmem allocator,
280 *   depending on vseg type and the pending forks counter.
281 * - It removes the vseg from the local VSL, and release the vseg descriptor if not MMAP.
[1]282 *********************************************************************************************
[611]283 * @ process  : process identifier.
284 * @ vaddr    : vseg base address in user space.
[1]285 ********************************************************************************************/
[611]286void vmm_delete_vseg( pid_t    pid,
287                      intptr_t vaddr );
[1]288
289/*********************************************************************************************
[611]290 * This function insert a new <vseg> descriptor in the VSL identifed by the <vmm> argument.
291 * and updates the vmm field in the vseg descriptor.
292 * It takes the lock protecting VSL.
293 *********************************************************************************************
294 * @ vmm       : local pointer on local VMM.
295 * @ vseg      : local pointer on local vseg descriptor.
296 ********************************************************************************************/
297void vmm_attach_vseg_to_vsl( vmm_t  * vmm,
298                             vseg_t * vseg );
299
300/*********************************************************************************************
301 * This function removes a vseg identified by the <vseg> argument from the local VSL
302 * identified by the <vmm> argument and release the memory allocated to vseg descriptor,
303 * for all vseg types, BUT the MMAP type (i.e. ANON or REMOTE).
304 * - If the vseg has not the STACK or MMAP type, it is simply removed from the VSL,
305 *   and vseg descriptor is released.
306 * - If the vseg has the STACK type, it is removed from VSL, vseg descriptor is released,
307 *   and the stack slot is returned to the local VMM_STACK allocator.
308 * - If the vseg has the MMAP type, it is removed from VSL and is registered in zombi_list
309 *   of the VMM_MMAP allocator for future reuse. The vseg descriptor is NOT released.
310 *********************************************************************************************
311 * @ vmm       : local pointer on local VMM.
312 * @ vseg      : local pointer on local vseg to be removed.
313 ********************************************************************************************/
314void vmm_detach_vseg_from_vsl( vmm_t  * vmm,
315                               vseg_t * vseg );
316
317/*********************************************************************************************
[18]318 * This function removes a given region (defined by a base address and a size) from
[407]319 * the VMM of a given process descriptor. This can modify the number of vsegs:
[1]320 * (a) if the region is not entirely mapped in an existing vseg, it's an error.
321 * (b) if the region has same base and size as an existing vseg, the vseg is removed.
[406]322 * (c) if the removed region cut the vseg in two parts, it is modified.
323 * (d) if the removed region cut the vseg in three parts, it is modified, and a new
324 *     vseg is created with same type.
[610]325 * FIXME [AG] this function should be called by a thread running in the reference cluster,
326 *       and the VMM should be updated in all process descriptors copies.
[1]327 *********************************************************************************************
328 * @ process   : pointer on process descriptor
329 * @ base      : vseg base address
330 * @ size      : vseg size (bytes)
331 ********************************************************************************************/
332error_t vmm_resize_vseg( struct process_s * process,
333                         intptr_t           base,
334                         intptr_t           size );
335
336/*********************************************************************************************
[611]337 * This low-level function scan the local VSL in <vmm> to find the unique vseg containing
338 * a given virtual address <vaddr>.
339 * It is called by the vmm_get_vseg(), vmm_get_pte(), and vmm_resize_vseg() functions.
340 *********************************************************************************************
341 * @ vmm     : pointer on the process VMM.
342 * @ vaddr   : virtual address.
343 * @ return vseg pointer if success / return NULL if not found.
344 ********************************************************************************************/
345struct vseg_s * vmm_vseg_from_vaddr( vmm_t    * vmm,
346                                     intptr_t   vaddr );
347
348/*********************************************************************************************
[388]349 * This function checks that a given virtual address is contained in a registered vseg.
[399]350 * It can be called by any thread running in any cluster:
351 * - if the vseg is registered in the local process VMM, it returns the local vseg pointer.
[388]352 * - if the vseg is missing in local VMM, it uses a RPC to get it from the reference cluster,
353 *   register it in local VMM and returns the local vseg pointer, if success.
[406]354 * - it returns an user error if the vseg is missing in the reference VMM, or if there is
[611]355 *   not enough memory for a new vseg descriptor in the calling thread cluster.
[1]356 *********************************************************************************************
[388]357 * @ process   : [in] pointer on process descriptor
358 * @ vaddr     : [in] virtual address
[440]359 * @ vseg      : [out] local pointer on local vseg
360 * @ returns 0 if success / returns -1 if user error (out of segment).
[611]361 ********************************************************************************************/
[388]362error_t vmm_get_vseg( struct process_s  * process,
363                      intptr_t            vaddr,
[394]364                      vseg_t           ** vseg );           
[1]365
366/*********************************************************************************************
[585]367 * This function is called by the generic exception handler in case of page-fault event,
[610]368 * detected for a given <vpn>. The <process> argument is used to access the relevant VMM.
[585]369 * It checks the missing VPN and returns an user error if it is not in a registered vseg.
370 * For a legal VPN, there is actually 3 cases:
371 * 1) if the missing VPN belongs to a private vseg (STACK or CODE segment types, non
372 *    replicated in all clusters), it allocates a new physical page, computes the attributes,
373 *    depending on vseg type, and updates directly the local GPT.
374 * 2) if the missing VPN belongs to a public vseg, it can be a false page-fault, when the VPN
375 *    is mapped in the reference GPT, but not in the local GPT. For this false page-fault,
376 *    the local GPT is simply updated from the reference GPT.
377 * 3) if the missing VPN is public, and unmapped in the reference GPT, it's a true page fault.
378 *    The calling thread  allocates a new physical page, computes the attributes, depending
379 *    on vseg type, and updates directly (without RPC) the local GPT and the reference GPT.
380 *    Other GPT copies  will updated on demand.
[610]381 * Concurrent accesses to the GPT are handled, thanks to the
[585]382 * remote_rwlock protecting each GPT copy.
[1]383 *********************************************************************************************
[610]384 * @ process  : local pointer on local process.
385 * @ vpn      : VPN of the missing PTE.
[585]386 * @ returns EXCP_NON_FATAL / EXCP_USER_ERROR / EXCP_KERNEL_PANIC after analysis
[1]387 ********************************************************************************************/
388error_t vmm_handle_page_fault( struct process_s * process,
[585]389                               vpn_t              vpn );
[1]390
391/*********************************************************************************************
[610]392 * This function is called by the generic exception handler in case of WRITE violation event,
393 * detected for a given <vpn>. The <process> argument is used to access the relevant VMM.
[585]394 * It returns a kernel panic if VPN is not in a registered vseg or is not mapped.
395 * For a legal mapped vseg there is two cases:
396 * 1) If the missing VPN belongs to a private vseg (STACK or CODE segment types, non
397 *    replicated in all clusters), it access the local GPT to get the current PPN and ATTR.
398 *    It access the forks counter in the current physical page descriptor.
399 *    If there is a pending fork, it allocates a new physical page from the cluster defined
400 *    by the vseg type, copies the old physical page content to the new physical page,
401 *    and decrements the pending_fork counter in old physical page descriptor.
402 *    Finally, it reset the COW flag and set the WRITE flag in local GPT.
403 * 2) If the missing VPN is public, it access the reference GPT to get the current PPN and
404 *    ATTR. It access the forks counter in the current physical page descriptor.
405 *    If there is a pending fork, it allocates a new physical page from the cluster defined
406 *    by the vseg type, copies the old physical page content to the new physical page,
407 *    and decrements the pending_fork counter in old physical page descriptor.
408 *    Finally it calls the vmm_global_update_pte() function to reset the COW flag and set
409 *    the WRITE flag in all the GPT copies, using a RPC if the reference cluster is remote.
[610]410 * In both cases, concurrent accesses to the GPT are protected by the remote_rwlock
411 * atached to the GPT copy in VMM.
[407]412 *********************************************************************************************
[585]413 * @ process   : pointer on local process descriptor copy.
414 * @ vpn       : VPN of the faulting PTE.
415 * @ returns EXCP_NON_FATAL / EXCP_USER_ERROR / EXCP_KERNEL_PANIC after analysis
[1]416 ********************************************************************************************/
[585]417error_t vmm_handle_cow( struct process_s * process,
418                        vpn_t              vpn );
[1]419
420/*********************************************************************************************
[401]421 * This function is called by the vmm_get_pte() function when a page is unmapped.
[313]422 * Depending on the vseg type, defined by the <vseg> argument, it returns the PPN
423 * (Physical Page Number) associated to a missing page defined by the <vpn> argument.
[406]424 * - For the FILE type, it returns directly the physical page from the file mapper.
[433]425 * - For the CODE and DATA types, it allocates a new physical page from the cluster defined
[406]426 *   by the <vseg->cxy> field, or by the <vpn> MSB bits for a distributed vseg,
427 *   and initialize this page from the .elf file mapper.
428 * - For all other types, it allocates a new physical page from the cluster defined
429 *   by the <vseg->cxy> field, or by the <vpn> MSB bits for a distributed vseg,
430 *   but the new page is not initialized.
[313]431 *********************************************************************************************
432 * @ vseg   : local pointer on vseg containing the mising page.
433 * @ vpn    : Virtual Page Number identifying the missing page.
434 * @ ppn    : [out] returned Physical Page Number.
[401]435 * return 0 if success / return EINVAL or ENOMEM if error.
[313]436 ********************************************************************************************/
437error_t vmm_get_one_ppn( vseg_t * vseg,
438                         vpn_t    vpn,
439                         ppn_t  * ppn );
440
[1]441
442#endif /* _VMM_H_ */
Note: See TracBrowser for help on using the repository browser.