source: trunk/kernel/mm/vmm.h @ 624

Last change on this file since 624 was 624, checked in by alain, 5 years ago

Fix several bugs to use the instruction MMU in kernel mode
in replacement of the instruction address extension register,
and remove the "kentry" segment.

This version is running on the tsar_generic_iob" platform.

One interesting bug: the cp0_ebase defining the kernel entry point
(for interrupts, exceptions and syscalls) must be initialized
early in kernel_init(), because the VFS initialisation done by
kernel_ini() uses RPCs, and RPCs uses Inter-Processor-Interrup.

File size: 27.5 KB
Line 
1/*
2 * vmm.h - virtual memory management related operations
3 *
4 * Authors   Ghassan Almaless (2008,2009,2010,2011, 2012)
5 *           Mohamed Lamine Karaoui (2015)
6 *           Alain Greiner (2016,2017,2018,2019)
7 *
8 * Copyright (c) UPMC Sorbonne Universites
9 *
10 * This file is part of ALMOS-MKH.
11 *
12 * ALMOS-MKH is free software; you can redistribute it and/or modify it
13 * under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; version 2.0 of the License.
15 *
16 * ALMOS-MKH is distributed in the hope that it will be useful, but
17 * WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 * General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with ALMOS-MKH; if not, write to the Free Software Foundation,
23 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24 */
25
26#ifndef _VMM_H_
27#define _VMM_H_
28
29#include <hal_kernel_types.h>
30#include <bits.h>
31#include <list.h>
32#include <queuelock.h>
33#include <hal_gpt.h>
34#include <vseg.h>
35#include <page.h>
36
37/****  Forward declarations  ****/
38
39struct process_s;
40struct vseg_s;
41
42/*********************************************************************************************
43 * This structure defines the STACK allocator used by the VMM to dynamically handle
44 * vseg allocation or release requests for an user thread.
45 * This allocator handles a fixed size array of fixed size slots in STACK zone of user space.
46 * The stack size and the number of slots are defined by the CONFIG_VMM_STACK_SIZE, and
47 * CONFIG_VMM_STACK_BASE parameters.
48 * Each slot can contain one user stack vseg. The first 4 Kbytes page in the slot is not
49 * mapped to detect stack overflow.
50 * The slot index can be computed form the slot base address, and reversely.
51 * All allocation / release operations are registered in the stack_bitmap, that completely
52 * define the STACK zone status.
53 ********************************************************************************************/
54
55typedef struct stack_mgr_s
56{
57    busylock_t     lock;               /*! lock protecting STACK allocator                  */
58    vpn_t          vpn_base;           /*! first page of STACK zone                         */
59    bitmap_t       bitmap;             /*! bit bector of allocated stacks                   */
60}
61stack_mgr_t;
62
63/*********************************************************************************************
64 * This structure defines the MMAP allocator used by the VMM to dynamically handle 
65 * MMAP vsegs requested or released by an user process.
66 * This allocator should be only used in the reference cluster.
67 * - allocation policy : all allocated vsegs occupy an integer number of pages that is
68 *   power of 2, and are aligned on a page boundary. The requested number of pages is
69 *   rounded if required. The first_free_vpn variable defines completely the MMAP zone state.
70 *   It is never decremented, as the released vsegs are simply registered in a zombi_list.
71 *   The relevant zombi_list is checked first for each allocation request.
72 * - release policy : a released MMAP vseg is registered in an array of zombi_lists.
73 *   This array is indexed by ln(number of pages), and each entry contains the root of
74 *   a local list of zombi vsegs that have the same size. The physical memory allocated
75 *   for a zombi vseg descriptor is not released, to use the "list" field.
76 *   This physical memory allocated for MMAP vseg descriptors is actually released
77 *   when the VMM is destroyed.
78 ********************************************************************************************/
79
80typedef struct mmap_mgr_s
81{
82    busylock_t     lock;               /*! lock protecting MMAP allocator                   */
83    vpn_t          vpn_base;           /*! first page of MMAP zone                          */
84    vpn_t          vpn_size;           /*! number of pages in MMAP zone                     */
85    vpn_t          first_free_vpn;     /*! first free page in MMAP zone                     */
86    list_entry_t   zombi_list[32];     /*! array of roots of released vsegs lists           */
87}
88mmap_mgr_t;
89
90/*********************************************************************************************
91 * This structure defines the Virtual Memory Manager for a given process in a given cluster.
92 * This local VMM implements four main services:
93 * 1) It contains the local copy of vseg list (VSL), only complete in referrence.
94 * 2) It contains the local copy of the generic page table (GPT), only complete in reference.
95 * 3) The stack manager dynamically allocates virtual memory space for the STACK vsegs.
96 * 4) The mmap manager dynamically allocates virtual memory for the (FILE/ANON/REMOTE) vsegs.
97 ******************************************************a**************************************
98 * Implementation notes:
99 * 1. In most clusters, the VSL and GPT are only partial copies of the reference VSL and GPT
100 *    structures, stored in the reference cluster.
101 * 2. The VSL contains only local vsegs, but it is implemented as an xlist, and protected by
102 *    a remote_rwlock, because it can be accessed by a thread running in a remote cluster.
103 *    An exemple is the vmm_fork_copy() function.
104 * 3. The GPT in the reference cluster can be directly accessed by remote threads to handle
105 *    false page-fault (page is mapped in the reference GPT, but the PTE copy is missing
106 *    in the local GPT). It is also protected by a remote_rwlock.
107 ********************************************************************************************/
108
109typedef struct vmm_s
110{
111        remote_rwlock_t  vsegs_lock;         /*! lock protecting the local VSL                  */
112        xlist_entry_t    vsegs_root;         /*! Virtual Segment List (complete in reference)   */
113        uint32_t         vsegs_nr;           /*! total number of local vsegs                    */
114
115    remote_rwlock_t  gpt_lock;           /*! lock protecting the local GPT                  */
116    gpt_t            gpt;                /*! Generic Page Table (complete in reference)     */
117
118    stack_mgr_t      stack_mgr;          /*! embedded STACK vsegs allocator                 */
119    mmap_mgr_t       mmap_mgr;           /*! embedded MMAP vsegs allocator                  */
120
121        uint32_t         pgfault_nr;         /*! page fault counter (instrumentation)           */
122
123    vpn_t            args_vpn_base;      /*! args vseg first page                           */
124    vpn_t            envs_vpn_base;      /*! envs vseg first page                           */
125        vpn_t            code_vpn_base;      /*! code vseg first page                           */
126        vpn_t            data_vpn_base;      /*! data vseg first page                           */
127    vpn_t            heap_vpn_base;      /*! heap zone first page                           */
128
129        intptr_t         entry_point;        /*! main thread entry point                        */
130}
131vmm_t;
132
133/*********************************************************************************************
134 * This function initialises the virtual memory manager attached to an user process.
135 * - It initializes the STACK and MMAP allocators.
136 * - It registers the "kentry", "args", "envs" vsegs in the VSL.
137 * - It initializes the generic page table, calling the HAL specific hal_gpt_init() function.
138 * - For TSAR it map all pages for the "kentry" vseg, that must be identity mapping.
139 ******************************************************a**************************************
140 * Implementation notes:
141 * - The "code" and "data" vsegs are registered by the elf_load_process() function.
142 * - The "stack" vsegs are dynamically created by the thread_user_create() function.
143 * - The "file", "anon", "remote" vsegs are dynamically created by the mmap() syscall.
144 *********************************************************************************************
145 * @ process   : pointer on process descriptor
146 * @ return 0 if success / return -1 if failure.
147 ********************************************************************************************/
148error_t vmm_init( struct process_s * process );
149
150/*********************************************************************************************
151 * This function displays on TXY0 the list or registered vsegs for a given <process>.
152 * It must be executed by a thread running in reference cluster.
153 * If the <mapping> argument is true, it displays for each vseg all mapped PTEs in GPT.
154 *********************************************************************************************
155 * @ process   : pointer on process descriptor.
156 * @ mapping   : detailed mapping if true.
157 ********************************************************************************************/
158void hal_vmm_display( struct process_s * process,
159                  bool_t             mapping );
160
161/*********************************************************************************************
162 * This function is called by the process_make_fork() function. It partially copies
163 * the content of a remote parent process VMM to the local child process VMM:
164 * - all DATA, MMAP, REMOTE vsegs registered in the parent VSL are registered in the child
165 *   VSL, and all valid GPT entries in parent GPT are copied to the child GPT.
166 *   The WRITABLE flag is reset and the COW flag is set in child GPT.
167 * - all CODE vsegs registered in the parent VSL are registered in the child VSL, but the
168 *   GPT entries are not copied in the chilf GPT, that will be dynamically updated from
169 *   the .elf file when a page fault is reported.
170 * - all FILE vsegs registered in the parent VSL are registered in the child VSL, and all
171 *   valid GPT entries in parent GPT are copied to the child GPT. The COW flag is not set.
172 * - no STACK vseg is copied from  parent VMM to child VMM, because the child STACK vseg
173 *   must be copied later from the cluster containing the user thread requesting the fork().
174 *********************************************************************************************
175 * @ child_process     : local pointer on local child process descriptor.
176 * @ parent_process_xp : extended pointer on remote parent process descriptor.
177 * @ return 0 if success / return -1 if failure.
178 ********************************************************************************************/
179error_t vmm_fork_copy( struct process_s * child_process,
180                       xptr_t             parent_process_xp );
181
182/*********************************************************************************************
183 * This function is called by the process_make_fork() function executing the fork syscall.
184 * It set the COW flag, and reset the WRITABLE flag of all GPT entries of the DATA, MMAP,
185 * and REMOTE vsegs of a process identified by the <process> argument.
186 * It must be called by a thread running in the reference cluster, that contains the complete
187 * VSL and GPT (use the rpc_vmm_set_cow_client() when the calling thread client is remote).
188 * It updates all copies of the process in all clusters, to maintain coherence in GPT copies,
189 * using the list of copies stored in the owner process, and using remote_write accesses to
190 * update the remote GPTs. It atomically increment the pending_fork counter, in all involved
191 * physical page descriptors. It cannot fail, as only mapped entries in GPTs are updated.
192 *********************************************************************************************
193 * @ process   : local pointer on local reference process descriptor.
194 ********************************************************************************************/
195void vmm_set_cow( struct process_s * process );
196
197/*********************************************************************************************
198 * This global function modifies a GPT entry identified by the <process> and <vpn>
199 * arguments in all clusters containing a process copy.
200 * It must be called by a thread running in the reference cluster.
201 * It updates all copies of the process in all clusters, to maintain coherence in GPT copies,
202 * using the list of copies stored in the owner process, and using remote_write accesses to
203 * update the remote GPTs. It cannot fail, as only mapped entries in GPT copies are updated.
204 *********************************************************************************************
205 * @ process   : local pointer on local process descriptor.
206 * @ vpn       : PTE index.
207 * @ attr      : PTE / attributes.
208 * @ ppn       : PTE / physical page index.
209 ********************************************************************************************/
210void vmm_global_update_pte( struct process_s * process,
211                            vpn_t              vpn,
212                            uint32_t           attr,
213                            ppn_t              ppn );
214
215/*********************************************************************************************
216 * This function deletes, in the local cluster, all vsegs registered in the VSL
217 * of the process identified by the <process> argument. For each vseg:
218 * - it unmaps all vseg PTEs from the GPT (release the physical pages when required).
219 * - it removes the vseg from the local VSL.
220 * - it releases the memory allocated to the local vseg descriptors.
221 * - it releases the memory allocated to the GPT itself.
222 *********************************************************************************************
223 * @ process   : pointer on process descriptor.
224 ********************************************************************************************/
225void vmm_destroy( struct process_s * process );
226
227/*********************************************************************************************
228 * This function scans the list of vsegs registered in the VMM of a given process descriptor
229 * to check if a given virtual region (defined by a base and size) overlap an existing vseg.
230 *********************************************************************************************
231 * @ process  : pointer on process descriptor.
232 * @ base     : region virtual base address.
233 * @ size     : region size (bytes).
234 * @ returns NULL if no conflict / return conflicting vseg pointer if conflict.
235 ********************************************************************************************/
236vseg_t * vmm_check_conflict( struct process_s * process,
237                             vpn_t              base,
238                             vpn_t              size );
239
240/*********************************************************************************************
241 * This function allocates memory for a vseg descriptor, initialises it, and register it
242 * in the VMM of the local process descriptor, that must be the reference process.
243 * For the 'stack", "file", "anon", & "remote" types, it does not use the <base> argument,
244 * but uses the STACK and MMAP virtual memory allocators.
245 * It checks collision with all pre-existing vsegs.
246 * To comply with the "on-demand" paging policy, this function does NOT modify the page table,
247 * and does not allocate physical memory for vseg data.
248 * It should be called by a local thread (could be a RPC thread if the client thread is not
249 * running in the regerence cluster).
250 *********************************************************************************************
251 * @ process     : pointer on local processor descriptor.
252 * @ type        : vseg type.
253 * @ base        : vseg base address (not used for dynamically allocated vsegs).
254 * @ size        : vseg size (bytes).
255 * @ file_offset : offset in file for CODE, DATA, FILE types.
256 * @ file_size   : can be smaller than "size" for DATA type.
257 * @ mapper_xp   : extended pointer on mapper for CODE, DATA, FILE types.
258 * @ cxy         : physical mapping cluster (for non distributed vsegs).
259 * @ returns pointer on vseg if success / returns NULL if no memory, or conflict.
260 ********************************************************************************************/
261vseg_t * vmm_create_vseg( struct process_s * process,
262                          vseg_type_t        type,
263                          intptr_t           base,
264                              uint32_t           size,
265                          uint32_t           file_offset,
266                          uint32_t           file_size,
267                          xptr_t             mapper_xp,
268                          cxy_t              cxy );
269
270/*********************************************************************************************
271 * This function removes from the local VMM of a process descriptor identified by the <pid>
272 * argument a local vseg identified by its base address <vaddr> in user space.
273 * It can be used for any type of vseg, but must be called by a local thread.
274 * Use the RPC_VMM_DELETE_VSEG if the client thread is not local.
275 * It does nothing if the process is not registered in the local cluster.
276 * It does nothing if the vseg is not registered in the local process VSL.
277 * - It removes from the local GPT all registered PTEs. If it is executed in the reference
278 *   cluster, it releases the referenced physical pages, to the relevant kmem allocator,
279 *   depending on vseg type and the pending forks counter.
280 * - It removes the vseg from the local VSL, and release the vseg descriptor if not MMAP.
281 *********************************************************************************************
282 * @ process  : process identifier.
283 * @ vaddr    : vseg base address in user space.
284 ********************************************************************************************/
285void vmm_delete_vseg( pid_t    pid,
286                      intptr_t vaddr );
287
288/*********************************************************************************************
289 * This function insert a new <vseg> descriptor in the VSL identifed by the <vmm> argument.
290 * and updates the vmm field in the vseg descriptor.
291 * It takes the lock protecting VSL.
292 *********************************************************************************************
293 * @ vmm       : local pointer on local VMM.
294 * @ vseg      : local pointer on local vseg descriptor.
295 ********************************************************************************************/
296void vmm_attach_vseg_to_vsl( vmm_t  * vmm,
297                             vseg_t * vseg );
298
299/*********************************************************************************************
300 * This function removes a vseg identified by the <vseg> argument from the local VSL
301 * identified by the <vmm> argument and release the memory allocated to vseg descriptor,
302 * for all vseg types, BUT the MMAP type (i.e. ANON or REMOTE).
303 * - If the vseg has not the STACK or MMAP type, it is simply removed from the VSL,
304 *   and vseg descriptor is released.
305 * - If the vseg has the STACK type, it is removed from VSL, vseg descriptor is released,
306 *   and the stack slot is returned to the local VMM_STACK allocator.
307 * - If the vseg has the MMAP type, it is removed from VSL and is registered in zombi_list
308 *   of the VMM_MMAP allocator for future reuse. The vseg descriptor is NOT released.
309 *********************************************************************************************
310 * @ vmm       : local pointer on local VMM.
311 * @ vseg      : local pointer on local vseg to be removed.
312 ********************************************************************************************/
313void vmm_detach_vseg_from_vsl( vmm_t  * vmm,
314                               vseg_t * vseg );
315
316/*********************************************************************************************
317 * This function removes a given region (defined by a base address and a size) from
318 * the VMM of a given process descriptor. This can modify the number of vsegs:
319 * (a) if the region is not entirely mapped in an existing vseg, it's an error.
320 * (b) if the region has same base and size as an existing vseg, the vseg is removed.
321 * (c) if the removed region cut the vseg in two parts, it is modified.
322 * (d) if the removed region cut the vseg in three parts, it is modified, and a new
323 *     vseg is created with same type.
324 * FIXME [AG] this function should be called by a thread running in the reference cluster,
325 *       and the VMM should be updated in all process descriptors copies.
326 *********************************************************************************************
327 * @ process   : pointer on process descriptor
328 * @ base      : vseg base address
329 * @ size      : vseg size (bytes)
330 ********************************************************************************************/
331error_t vmm_resize_vseg( struct process_s * process,
332                         intptr_t           base,
333                         intptr_t           size );
334
335/*********************************************************************************************
336 * This low-level function scan the local VSL in <vmm> to find the unique vseg containing
337 * a given virtual address <vaddr>.
338 * It is called by the vmm_get_vseg(), vmm_get_pte(), and vmm_resize_vseg() functions.
339 *********************************************************************************************
340 * @ vmm     : pointer on the process VMM.
341 * @ vaddr   : virtual address.
342 * @ return vseg pointer if success / return NULL if not found.
343 ********************************************************************************************/
344struct vseg_s * vmm_vseg_from_vaddr( vmm_t    * vmm,
345                                     intptr_t   vaddr );
346
347/*********************************************************************************************
348 * This function checks that a given virtual address is contained in a registered vseg.
349 * It can be called by any thread running in any cluster:
350 * - if the vseg is registered in the local process VMM, it returns the local vseg pointer.
351 * - if the vseg is missing in local VMM, it uses a RPC to get it from the reference cluster,
352 *   register it in local VMM and returns the local vseg pointer, if success.
353 * - it returns an user error if the vseg is missing in the reference VMM, or if there is
354 *   not enough memory for a new vseg descriptor in the calling thread cluster.
355 *********************************************************************************************
356 * @ process   : [in] pointer on process descriptor
357 * @ vaddr     : [in] virtual address
358 * @ vseg      : [out] local pointer on local vseg
359 * @ returns 0 if success / returns -1 if user error (out of segment).
360 ********************************************************************************************/
361error_t vmm_get_vseg( struct process_s  * process,
362                      intptr_t            vaddr,
363                      vseg_t           ** vseg );           
364
365/*********************************************************************************************
366 * This function is called by the generic exception handler in case of page-fault event,
367 * detected for a given <vpn>. The <process> argument is used to access the relevant VMM.
368 * It checks the missing VPN and returns an user error if it is not in a registered vseg.
369 * For a legal VPN, there is actually 3 cases:
370 * 1) if the missing VPN belongs to a private vseg (STACK or CODE segment types, non
371 *    replicated in all clusters), it allocates a new physical page, computes the attributes,
372 *    depending on vseg type, and updates directly the local GPT.
373 * 2) if the missing VPN belongs to a public vseg, it can be a false page-fault, when the VPN
374 *    is mapped in the reference GPT, but not in the local GPT. For this false page-fault,
375 *    the local GPT is simply updated from the reference GPT.
376 * 3) if the missing VPN is public, and unmapped in the reference GPT, it's a true page fault.
377 *    The calling thread  allocates a new physical page, computes the attributes, depending
378 *    on vseg type, and updates directly (without RPC) the local GPT and the reference GPT.
379 *    Other GPT copies  will updated on demand.
380 * Concurrent accesses to the GPT are handled, thanks to the
381 * remote_rwlock protecting each GPT copy.
382 *********************************************************************************************
383 * @ process  : local pointer on local process.
384 * @ vpn      : VPN of the missing PTE.
385 * @ returns EXCP_NON_FATAL / EXCP_USER_ERROR / EXCP_KERNEL_PANIC after analysis
386 ********************************************************************************************/
387error_t vmm_handle_page_fault( struct process_s * process,
388                               vpn_t              vpn );
389
390/*********************************************************************************************
391 * This function is called by the generic exception handler in case of WRITE violation event,
392 * detected for a given <vpn>. The <process> argument is used to access the relevant VMM.
393 * It returns a kernel panic if VPN is not in a registered vseg or is not mapped.
394 * For a legal mapped vseg there is two cases:
395 * 1) If the missing VPN belongs to a private vseg (STACK or CODE segment types, non
396 *    replicated in all clusters), it access the local GPT to get the current PPN and ATTR.
397 *    It access the forks counter in the current physical page descriptor.
398 *    If there is a pending fork, it allocates a new physical page from the cluster defined
399 *    by the vseg type, copies the old physical page content to the new physical page,
400 *    and decrements the pending_fork counter in old physical page descriptor.
401 *    Finally, it reset the COW flag and set the WRITE flag in local GPT.
402 * 2) If the missing VPN is public, it access the reference GPT to get the current PPN and
403 *    ATTR. It access the forks counter in the current physical page descriptor.
404 *    If there is a pending fork, it allocates a new physical page from the cluster defined
405 *    by the vseg type, copies the old physical page content to the new physical page,
406 *    and decrements the pending_fork counter in old physical page descriptor.
407 *    Finally it calls the vmm_global_update_pte() function to reset the COW flag and set
408 *    the WRITE flag in all the GPT copies, using a RPC if the reference cluster is remote.
409 * In both cases, concurrent accesses to the GPT are protected by the remote_rwlock
410 * atached to the GPT copy in VMM.
411 *********************************************************************************************
412 * @ process   : pointer on local process descriptor copy.
413 * @ vpn       : VPN of the faulting PTE.
414 * @ returns EXCP_NON_FATAL / EXCP_USER_ERROR / EXCP_KERNEL_PANIC after analysis
415 ********************************************************************************************/
416error_t vmm_handle_cow( struct process_s * process,
417                        vpn_t              vpn );
418
419/*********************************************************************************************
420 * This function is called by the vmm_get_pte() function when a page is unmapped.
421 * Depending on the vseg type, defined by the <vseg> argument, it returns the PPN
422 * (Physical Page Number) associated to a missing page defined by the <vpn> argument.
423 * - For the FILE type, it returns directly the physical page from the file mapper.
424 * - For the CODE and DATA types, it allocates a new physical page from the cluster defined
425 *   by the <vseg->cxy> field, or by the <vpn> MSB bits for a distributed vseg,
426 *   and initialize this page from the .elf file mapper.
427 * - For all other types, it allocates a new physical page from the cluster defined
428 *   by the <vseg->cxy> field, or by the <vpn> MSB bits for a distributed vseg,
429 *   but the new page is not initialized.
430 *********************************************************************************************
431 * @ vseg   : local pointer on vseg containing the mising page.
432 * @ vpn    : Virtual Page Number identifying the missing page.
433 * @ ppn    : [out] returned Physical Page Number.
434 * return 0 if success / return EINVAL or ENOMEM if error.
435 ********************************************************************************************/
436error_t vmm_get_one_ppn( vseg_t * vseg,
437                         vpn_t    vpn,
438                         ppn_t  * ppn );
439
440
441#endif /* _VMM_H_ */
Note: See TracBrowser for help on using the repository browser.