/* * kernel_init.c - kernel parallel initialization * * Authors : Alain Greiner (2016) * * Copyright (c) Sorbonne Universites * * This file is part of ALMOS-MKH. * * ALMOS-MKH is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by * the Free Software Foundation; version 2.0 of the License. * * ALMOS-MKH is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with ALMOS-MKH; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define KERNEL_INIT_SYNCHRO 0xA5A5B5B5 /////////////////////////////////////////////////////////////////////////////////////////// // All these global variables are replicated in all clusters. // They are initialised by the kernel_init() function. /////////////////////////////////////////////////////////////////////////////////////////// // This variable defines the local boot_info structure __attribute__((section(".kinfo"))) boot_info_t boot_info CACHELINE_ALIGNED; // This variable defines the local cluster manager __attribute__((section(".kdata"))) cluster_t cluster_manager CACHELINE_ALIGNED; // These variables define the kernel process0 descriptor and associated thread __attribute__((section(".kdata"))) process_t process_zero CACHELINE_ALIGNED; thread_t thread_zero CACHELINE_ALIGNED; // This variable contains the extended pointers on the device descriptors __attribute__((section(".kdata"))) chdev_directory_t chdev_dir CACHELINE_ALIGNED; // This variable contains the input IRQ indexes for the PIC device __attribute__((section(".kdata"))) chdev_pic_input_t chdev_pic_input CACHELINE_ALIGNED; // This variable contains the input IRQ indexes for the ICU device __attribute__((section(".kdata"))) chdev_icu_input_t chdev_icu_input CACHELINE_ALIGNED; // This variable synchronizes the local cores during kernel_init() __attribute__((section(".kdata"))) volatile uint32_t local_sync_init CACHELINE_ALIGNED; // This variable defines the local cluster identifier __attribute__((section(".kdata"))) cxy_t local_cxy CACHELINE_ALIGNED; // This variable is the lock protecting the kernel TXT terminal (used by printk) __attribute__((section(".kdata"))) remote_spinlock_t txt0_lock CACHELINE_ALIGNED; /////////////////////////////////////////////////////////////////////////////////////////// // This function displays the ALMOS_MKH banner. /////////////////////////////////////////////////////////////////////////////////////////// static void print_banner( uint32_t nclusters , uint32_t ncores ) { printk("\n" " _ __ __ _____ ______ __ __ _ __ _ _ \n" " /\\ | | | \\ / | / ___ \\ / _____| | \\ / | | | / / | | | | \n" " / \\ | | | \\/ | | / \\ | | / | \\/ | | |/ / | | | | \n" " / /\\ \\ | | | |\\ /| | | | | | | |_____ ___ | |\\ /| | | / | |___| | \n" " / /__\\ \\ | | | | \\/ | | | | | | \\_____ \\ |___| | | \\/ | | | \\ | ___ | \n" " / ______ \\ | | | | | | | | | | | | | | | | | |\\ \\ | | | | \n" " / / \\ \\ | |____ | | | | | \\___/ | _____/ | | | | | | | \\ \\ | | | | \n" " /_/ \\_\\ |______| |_| |_| \\_____/ |______/ |_| |_| |_| \\_\\ |_| |_| \n" "\n\n\t\t Advanced Locality Management Operating System / Multi Kernel Hybrid\n" "\n\n\t\t\t Version 0.0 : %d clusters / %d cores per cluster\n\n", nclusters , ncores ); } /////////////////////////////////////////////////////////////////////////////////////////// // This static function allocates memory and initializes the TXT0 chdev descriptor, // associated to the kernel terminal, shared by all kernel instances for debug messages. // It should be called by a thread running in the I/O cluster, because the TXT0 chdev // is created in the I/O cluster. /////////////////////////////////////////////////////////////////////////////////////////// // @ info : pointer on the local boot-info structure. /////////////////////////////////////////////////////////////////////////////////////////// static void txt0_device_init( boot_info_t * info ) { boot_device_t * dev_tbl; // pointer on array of devices in boot_info uint32_t dev_nr; // actual number of devices in this cluster xptr_t base; // remote pointer on segment base uint32_t size; // channel size (bytes) uint32_t type; // peripheral type uint32_t func; // device functionnal index uint32_t impl; // device implementation index uint32_t i; // device index in dev_tbl uint32_t x; // X cluster coordinate uint32_t y; // Y cluster coordinate chdev_t * chdev; // local pointer on created chdev // get number of peripherals and base of devices array from boot_info dev_nr = info->ext_dev_nr; dev_tbl = info->ext_dev; // loop on external peripherals to find TXT for( i = 0 ; i < dev_nr ; i++ ) { size = dev_tbl[i].size; base = dev_tbl[i].base; type = dev_tbl[i].type; func = FUNC_FROM_TYPE( type ); impl = IMPL_FROM_TYPE( type ); if (func == DEV_FUNC_TXT ) { // allocate and initialize a local chdev for TXT0 chdev = chdev_create( func, impl, 0, // channel 0, // direction base ); // Complete TXT specific initialisation if( impl == IMPL_TXT_TTY ) { chdev->cmd = &soclib_tty_cmd; chdev->isr = &soclib_tty_isr; soclib_tty_init( chdev ); } // initialize the replicated chdev_dir[x][y] structures for( x = 0 ; x < info->x_size ; x++ ) { for( y = 0 ; y < info->y_size ; y++ ) { cxy_t cxy = (x<y_width) + y; hal_remote_swd( XPTR( cxy , &chdev_dir.txt[0] ) , XPTR( local_cxy , chdev ) ); } } kinit_dmsg("\n[INFO] %s : core[%x][0] created TXT0 chdev / paddr = %l at cycle %d\n", __FUNCTION__ , local_cxy , chdev_func_str( func ), chdev_xp , hal_time_stamp() ); } } // end loop on devices } // end txt0_device_init() /////////////////////////////////////////////////////////////////////////////////////////// // This static function allocates memory for the chdev (channel_device) descriptors // associated to the internal peripherals contained in the local cluster. These internal // devices (ICU, MMC, DMA) chdev descriptors are placed in the local cluster. // It initialises these device descriptors as specified by the boot_info_t structure, // including the dynamic linking with the driver for the specified implementation. // Finally, all copies of the devices directory are initialised. /////////////////////////////////////////////////////////////////////////////////////////// // @ info : pointer on the local boot-info structure. /////////////////////////////////////////////////////////////////////////////////////////// static void internal_devices_init( boot_info_t * info ) { boot_device_t * dev_tbl; // pointer on array of devices in boot_info uint32_t dev_nr; // actual number of devices in this cluster xptr_t base; // remote pointer on segment base uint32_t size; // channel size (bytes) uint32_t type; // peripheral type uint32_t func; // device functionnal index uint32_t impl; // device implementation index uint32_t i; // device index in dev_tbl uint32_t x; // X cluster coordinate uint32_t y; // Y cluster coordinate uint32_t channels_nr; // number of channels in device uint32_t channel; // channel index uint32_t p0; // device parameter 0 uint32_t p1; // device parameter 1 uint32_t p2; // device parameter 2 uint32_t p3; // device parameter 3 chdev_t * chdev; // local pointer on one channel_device descriptor xptr_t chdev_xp; // extended pointer on channel_device descriptor // get number of internal devices and base of devices array from boot_info dev_nr = info->int_dev_nr; dev_tbl = info->int_dev; // loop on all internal devices in cluster for( i = 0 ; i < dev_nr ; i++ ) { size = dev_tbl[i].size; base = dev_tbl[i].base; type = dev_tbl[i].type; channels_nr = dev_tbl[i].channels; p0 = dev_tbl[i].param0; p1 = dev_tbl[i].param1; p2 = dev_tbl[i].param2; p3 = dev_tbl[i].param3; func = FUNC_FROM_TYPE( type ); impl = IMPL_FROM_TYPE( type ); // do nothing for RAM, that does not require a chdev descriptor. if( func == DEV_FUNC_RAM ) continue; // check internal device functional type if( (func != DEV_FUNC_MMC) && (func != DEV_FUNC_ICU) && (func != DEV_FUNC_DMA) ) { assert( false , __FUNCTION__ , "illegal internal peripheral type" ); } // loop on channels for( channel = 0 ; channel < channels_nr ; channel++ ) { // create one chdev in local cluster chdev = chdev_create( func , impl, channel, false, // TX base ); assert( (chdev != NULL) , __FUNCTION__ , "cannot allocate internal chdev" ); // get extended pointer on channel descriptor chdev_xp = XPTR( local_cxy , chdev ); // TODO ??? AG // devfs_register( dev ); // make device type specific initialisation // the number of parameters depends on the device type // TODO : remove these parameters that must be provided by the driver if ( func == DEV_FUNC_ICU ) dev_icu_init( chdev , p0 , p1 , p2 ); else if( func == DEV_FUNC_MMC ) dev_mmc_init( chdev ); else dev_dma_init( chdev ); // initialize the replicated chdev_dir[x][y] structures // containing extended pointers on all devices descriptors xptr_t * entry; if ( func == DEV_FUNC_ICU ) entry = &chdev_dir.icu[local_cxy]; else if( func == DEV_FUNC_MMC ) entry = &chdev_dir.mmc[local_cxy]; else entry = &chdev_dir.dma[channel]; if( func != DEV_FUNC_DMA ) // ICU and MMC devices are remotely accessible { for( x = 0 ; x < info->x_size ; x++ ) { for( y = 0 ; y < info->y_size ; y++ ) { cxy_t cxy = (x<y_width) + y; hal_remote_swd( XPTR( cxy , entry ) , chdev_xp ); } } } else // DMA devices are NOT remotely accessible { *entry = chdev_xp; } kinit_dmsg("\n[INFO] %s :core[%x][0] created chdev %s / channel %d" " / paddr = %l at cycle %d\n", __FUNCTION__ , local_cxy , chdev_func_str( func ) , channel , chdev_xp , hal_time_stamp() ); } // end loop on channels // initialize the entries of the local chdev_icu_input structure // defining how internal peripherals are connected to ICU if( func == DEV_FUNC_ICU ) { uint32_t id; uint8_t valid; uint32_t dev_type; uint8_t channel; // loop on ICU inputs for( id = 0 ; id < CONFIG_MAX_HWIS_PER_ICU ; id++ ) { valid = dev_tbl[i].irq[id].valid; dev_type = dev_tbl[i].irq[id].dev_type; channel = dev_tbl[i].irq[id].channel; if( valid ) // only valid local IRQs are registered { uint32_t * index; // local pointer on the entry to be set uint16_t dev_func = FUNC_FROM_TYPE( dev_type ); if( dev_func == DEV_FUNC_MMC ) index = &chdev_icu_input.mmc; else if( dev_func == DEV_FUNC_DMA ) index = &chdev_icu_input.dma[channel]; else { assert( false , __FUNCTION__ , "illegal source device for ICU input" ); } // set entry in local structure *index = id; } } // end loop on ICU inputs } // end if ICU } // end loop on peripherals } // end internal_devices_init() /////////////////////////////////////////////////////////////////////////////////////////// // This static function allocates memory for the chdev descriptors associated // to the external (shared) peripherals contained in the local cluster. These external // devices (IOB, IOC, TXT, NIC, etc ) are distributed on all clusters. // It initialises these device descriptors as specified by the boot_info_t structure, // including the dynamic linking with the driver for the specified implementation. // Finally, all copies of the devices directory are initialised. // // The number of channel_devices depends on the device functionnal type. // There is three nested loops to scan the full set of external channel_devices: // - loop on external devices. // - loop on channels for multi-channels devices. // - loop on directions (RX/TX) for NIC device. // The set of channel_devices is indexed by the chdev_gid global index, that is used // to select the cluster containing a given chdev[func,channel,direction]. // All clusters scan the full set of chdevs, but only the cluster matching // (chdev_gid % (x_size*y_size)) create the corresponding chdev. // // TODO check that cluster IO contains a PIC [AG] /////////////////////////////////////////////////////////////////////////////////////////// // @ info : pointer on the local boot-info structure. /////////////////////////////////////////////////////////////////////////////////////////// static void external_devices_init( boot_info_t * info ) { boot_device_t * dev_tbl; // pointer on array of devices in boot_info uint32_t dev_nr; // actual number of devices in this cluster xptr_t base; // remote pointer on segment base uint32_t size; // channel size (bytes) uint32_t type; // peripheral type uint32_t func; // device functionnal index uint32_t impl; // device implementation index uint32_t i; // device index in dev_tbl uint32_t x; // X cluster coordinate uint32_t y; // Y cluster coordinate uint32_t channels_nr; // number of channels uint32_t channel; // channel index uint32_t directions_nr; // number of directions uint32_t direction; // direction index uint32_t p0; // device parameter 0 uint32_t p1; // device parameter 1 uint32_t p2; // device parameter 2 uint32_t p3; // device parameter 3 uint32_t first_channel; // used in loop on channels chdev_t * chdev; // local pointer on one channel_device descriptor xptr_t chdev_xp; // extended pointer on channel_device descriptor uint32_t chdev_gid = 0; // global index of channel_device descriptor // get number of peripherals and base of devices array from boot_info dev_nr = info->ext_dev_nr; dev_tbl = info->ext_dev; // loop on external peripherals for( i = 0 ; i < dev_nr ; i++ ) { size = dev_tbl[i].size; base = dev_tbl[i].base; type = dev_tbl[i].type; channels_nr = dev_tbl[i].channels; p0 = dev_tbl[i].param0; p1 = dev_tbl[i].param1; p2 = dev_tbl[i].param2; p3 = dev_tbl[i].param3; func = FUNC_FROM_TYPE( type ); impl = IMPL_FROM_TYPE( type ); // There is one chdev per direction for NIC if (func == DEV_FUNC_NIC) directions_nr = 2; else directions_nr = 1; // The TXT0 chdev has already been created if (func == DEV_FUNC_TXT) first_channel = 1; else first_channel = 0; // do nothing for ROM, that does not require a device descriptor. if( func == DEV_FUNC_ROM ) continue; // check external device functionnal type if( (func != DEV_FUNC_IOB) && (func != DEV_FUNC_PIC) && (func != DEV_FUNC_IOC) && (func != DEV_FUNC_TXT) && (func != DEV_FUNC_NIC) && (func != DEV_FUNC_FBF) ) { assert( false , __FUNCTION__ , "undefined external peripheral type" ); } // loops on channels for( channel = first_channel ; channel < channels_nr ; channel++ ) { // loop on directions for( direction = 0 ; direction < directions_nr ; direction++ ) { // get target cluster for chdev[func,channel,direction] uint32_t offset = chdev_gid % ( info->x_size * info->y_size ); uint32_t cx = offset / info->y_size; uint32_t cy = offset % info->y_size; uint32_t target_cxy = (cx<y_width) + cy; // allocate and initialize a local chdev // if local cluster matches target cluster if( target_cxy == local_cxy ) { chdev = chdev_create( func, impl, channel, direction, base ); assert( (chdev != NULL), __FUNCTION__ , "cannot allocate external device" ); // get extended pointer on chdev chdev_xp = XPTR( local_cxy , chdev ); // make device type specific initialisation // the number of parameters depends on the device type // TODO : remove the parameters that must be provided by the drivers if ( func == DEV_FUNC_IOB ) dev_iob_init( chdev ); else if( func == DEV_FUNC_IOC ) dev_ioc_init( chdev ); else if( func == DEV_FUNC_TXT ) dev_txt_init( chdev ); else if( func == DEV_FUNC_NIC ) dev_nic_init( chdev ); else if( func == DEV_FUNC_PIC ) dev_pic_init( chdev , p0 ); else if( func == DEV_FUNC_FBF ) dev_fbf_init( chdev , p0 , p1 ); else { assert( false , __FUNCTION__ , "undefined device type" ); } // all external (shared) devices are remotely accessible // initialize the replicated chdev_dir[x][y] structures // defining the extended pointers on chdev descriptors xptr_t * entry; if( func == DEV_FUNC_IOB ) entry = &chdev_dir.iob; if( func == DEV_FUNC_PIC ) entry = &chdev_dir.pic; if( func == DEV_FUNC_TXT ) entry = &chdev_dir.txt[channel]; if( func == DEV_FUNC_IOC ) entry = &chdev_dir.ioc[channel]; if( func == DEV_FUNC_FBF ) entry = &chdev_dir.fbf[channel]; if( func == DEV_FUNC_NIC ) entry = &chdev_dir.nic_tx[channel]; for( x = 0 ; x < info->x_size ; x++ ) { for( y = 0 ; y < info->y_size ; y++ ) { cxy_t cxy = (x<y_width) + y; hal_remote_swd( XPTR( cxy , entry ) , chdev_xp ); } } kinit_dmsg("\n[INFO] %s : core[%x][0] created chdev %s / channel = %d" " / paddr = %l at cycle %d\n", __FUNCTION__ , local_cxy , chdev_func_str( func ), channel , chdev_xp , hal_time_stamp() ); } // end if match // increment chdev global index (matching or not) chdev_gid++; } // end loop on directions } // end loop on channels // initialize the entries of the local chdev_pic_input structure // defining how external peripherals are connected to PIC if( func == DEV_FUNC_PIC ) { uint32_t id; uint8_t valid; uint32_t dev_type; uint8_t channel; uint8_t is_rx; // loop on PIC inputs for( id = 0 ; id < CONFIG_MAX_IRQS_PER_PIC ; id++ ) { valid = dev_tbl[i].irq[id].valid; dev_type = dev_tbl[i].irq[id].dev_type; channel = dev_tbl[i].irq[id].channel; is_rx = dev_tbl[i].irq[id].is_rx; if( valid ) // only valid inputs are registered { uint32_t * index; // local pointer on one entry uint16_t dev_func = FUNC_FROM_TYPE( dev_type ); if( dev_func == DEV_FUNC_TXT ) { index = &chdev_pic_input.txt[channel]; } else if( dev_func == DEV_FUNC_IOC ) { index = &chdev_pic_input.ioc[channel]; } else if( (dev_func == DEV_FUNC_NIC) && (is_rx == 0) ) { index = &chdev_pic_input.nic_tx[channel]; } else if( (dev_func == DEV_FUNC_NIC) && (is_rx != 0) ) { index = &chdev_pic_input.nic_rx[channel]; } else { assert( false , __FUNCTION__ , "illegal source device for PIC input" ); } // set entry in local structure *index = id; } } // end loop on PIC inputs } // end PIC } // end loop on devices } // end external_devices_init() /////////////////////////////////////////////////////////////////////////////////////////// // This function is the entry point for the kernel initialisation. // It is executed by all cores in all clusters, but only core[0] in each cluster // initialize the cluster manager, ant the local peripherals. // To comply with the multi-kernels paradigm, it access only local cluster memory, using // only informations contained in the local boot_info_t structure, set by the bootloader. /////////////////////////////////////////////////////////////////////////////////////////// // @ info : pointer on the local boot-info structure. /////////////////////////////////////////////////////////////////////////////////////////// void kernel_init( boot_info_t * info ) { uint32_t core_lid; // running core local index cxy_t core_cxy; // running core cluster identifier gid_t core_gid; // running core hardware identifier cluster_t * cluster; // pointer on local cluster manager core_t * core; // pointer on running core descriptor thread_t * thread_idle; // pointer on thread_idle uint32_t i; bool_t found; error_t error; // initialise global cluster identifier local_cxy = info->cxy; // each core get its global index from hardware register core_gid = hal_get_gid(); // Each core makes an associative search in boot_info // to get its (cxy,lid) composite index from its gid found = false; core_cxy = 0; core_lid = 0; for( i = 0 ; i < info->cores_nr ; i++ ) { if( core_gid == info->core[i].gid ) { core_lid = info->core[i].lid; core_cxy = info->core[i].cxy; found = true; break; } } // suicide if not found if( (found == false) || (core_cxy != local_cxy) ) hal_core_sleep(); ////////////////////////////////////////////////////////////// // In first step, only CP0 initialises local resources ////////////////////////////////////////////////////////////// if( core_lid == 0 ) { // initialize local cluster manager (cores and memory allocators) error = cluster_init( info ); // suicide if failure if( error ) hal_core_sleep(); // get pointer on local cluster manager and on core descriptor cluster = LOCAL_CLUSTER; core = &cluster->core_tbl[core_lid]; // initialize process_zero descriptor process_zero_init( info ); // CP0 initialize its private thread_zero descriptor memset( &thread_zero , 0 , sizeof(thread_t) ); thread_zero.type = THREAD_KERNEL; thread_zero.process = &process_zero; hal_set_current_thread( &thread_zero ); // CP0 in I/O cluster initialize the kernel TXT0 chdev descriptor. // this TXTO device is shared by the all kernel instances for debug messages: // the printk() function call the dev_txt_sync_write() function that call // directly the relevant TXT driver, without desheduling. if( core_cxy == info->io_cxy ) txt0_device_init( info ); // synchronise all CP0s before using TXT0 remote_barrier( XPTR( info->io_cxy , &cluster->barrier ) , (cluster->x_size * cluster->y_size) ); // All CP0 initialise internal peripheral chdev descriptors. // Each CP0[cxy] scan the set of its internal (private) peripherals, // and allocate memory for the corresponding chdev descriptors. internal_devices_init( info ); // All CP0 contribute to initialise external peripheral chdev descriptors. // Each CP0[cxy] scan the set of external (shared) peripherals (but the TXT0), // and allocates memory for the chdev descriptors that must be placed // on the (cxy) cluster according to its global index. external_devices_init( info ); // TODO initialize devFS and sysFS // devfs_root_init(); // sysfs_root_init(); // TODO ??? [AG] // clusters_sysfs_register(); // TODO initialize virtual file system // vfs_init(); // TODO ??? [AG] // sysconf_init(); // activate other cores in same cluster local_sync_init = KERNEL_INIT_SYNCHRO; hal_wbflush(); } else // other cores { // other cores wait synchro from core[0] while( local_sync_init != KERNEL_INIT_SYNCHRO ) { uint32_t retval = hal_time_stamp() + 1000; while( hal_time_stamp() < retval ) asm volatile ("nop"); } // get pointer on local cluster manager and on core descriptor cluster = LOCAL_CLUSTER; core = &cluster->core_tbl[core_lid]; // core initialise its private thread_zero descriptor memset( &thread_zero , 0 , sizeof(thread_t) ); thread_zero.type = THREAD_KERNEL; thread_zero.process = &process_zero; hal_set_current_thread( &thread_zero ); } // each core creates its private idle thread descriptor error = thread_kernel_create( &thread_idle, THREAD_IDLE, &thread_idle_func, NULL, core_lid ); assert( (error == 0) , __FUNCTION__ , "cannot create idle thread" ); // each core register thread_idle in scheduler core->scheduler.idle = thread_idle; // each core register thread pointer in core hardware register hal_set_current_thread( thread_idle ); kinit_dmsg("\n[INFO] %s : thread idle created for core[%x][%d] at cycle %d\n", __FUNCTION__ , core_cxy , core_lid , hal_time_stamp()); // global syncho for all core[0] in all clusters if( core_lid == 0 ) { remote_barrier( XPTR( info->io_cxy , &cluster->barrier ) , (cluster->x_size * cluster->y_size) ); } // local synchro for all cores in local cluster remote_barrier( XPTR( local_cxy , &cluster->barrier ) , cluster->cores_nr ); if( (core_lid == 0) && (local_cxy == info->io_cxy) ) { print_banner( (info->x_size * info->y_size) , info->cores_nr ); } // load idle thread context on calling core hal_cpu_context_load( thread_idle ); } // end kernel_init()