source: trunk/kernel/kern/dqdt.c @ 582

Last change on this file since 582 was 582, checked in by alain, 6 years ago

New DQDT implementation supporting missing clusters
thanks to the cluster_info[x][y] array.

File size: 16.6 KB
RevLine 
[1]1/*
2 * dqdt.c - Distributed Quaternary Decision Tree implementation.
[19]3 *
[437]4 * Author : Alain Greiner (2016,2017,2018)
[1]5 *
6 * Copyright (c)  UPMC Sorbonne Universites
7 *
8 * This file is part of ALMOS-MKH.
9 *
10 * ALMOS-MKH is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; version 2.0 of the License.
13 *
14 * ALMOS-MKH is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17 * General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with ALMOS-MKH; if not, write to the Free Software Foundation,
21 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
[14]24#include <kernel_config.h>
[457]25#include <hal_kernel_types.h>
[1]26#include <hal_special.h>
[582]27#include <hal_macros.h>
[1]28#include <hal_atomic.h>
29#include <hal_remote.h>
30#include <printk.h>
[438]31#include <chdev.h>
[1]32#include <cluster.h>
33#include <bits.h>
34#include <dqdt.h>
35
36
[438]37///////////////////////////////////////////////////////////////////////////////////////////
38//      Extern variables
39///////////////////////////////////////////////////////////////////////////////////////////
[1]40
[438]41extern chdev_directory_t  chdev_dir;  // defined in chdev.h / allocated in kernel_init.c
42
43///////////////////////////////////////////////////////////////////////////////////////////
44// This static recursive function traverse the DQDT quad-tree from root to bottom.
45///////////////////////////////////////////////////////////////////////////////////////////
46static void dqdt_recursive_print( xptr_t  node_xp )
[1]47{
[582]48        uint32_t x;
49        uint32_t y;
[438]50    dqdt_node_t node;
[1]51
[438]52    // get node local copy
53    hal_remote_memcpy( XPTR( local_cxy , &node ), node_xp , sizeof(dqdt_node_t) );
[1]54
[438]55    // display node content
56        nolock_printk("- level %d in cluster %x (node %x) : threads = %x / pages = %x\n",
57    node.level, GET_CXY( node_xp ), GET_PTR( node_xp ), node.threads, node.pages );
[1]58
59    // recursive call on children if node is not terminal
[438]60    if ( node.level > 0 )
[1]61    {
[582]62        for ( x = 0 ; x < 2 ; x++ )
[1]63        {
[582]64            for ( y = 0 ; y < 2 ; y++ )
65            {
66                xptr_t iter_xp = node.children[x][y];
67                if ( iter_xp != XPTR_NULL ) dqdt_recursive_print( iter_xp );
68            }
[1]69        }
70    }
[19]71}
72
[564]73/////////////////////////
[485]74void dqdt_display( void )
[438]75{
[582]76    // get extended pointer on DQDT root node
77        cluster_t * cluster = &cluster_manager;
78    xptr_t      root_xp = cluster->dqdt_root_xp;
[438]79
80    // get pointers on TXT0 chdev
81    xptr_t    txt0_xp  = chdev_dir.txt_tx[0];
82    cxy_t     txt0_cxy = GET_CXY( txt0_xp );
83    chdev_t * txt0_ptr = GET_PTR( txt0_xp );
84
[564]85    // get extended pointer on remote TXT0 lock
[438]86    xptr_t  lock_xp = XPTR( txt0_cxy , &txt0_ptr->wait_lock );
87
[564]88    // get TXT0 lock
89    remote_busylock_acquire( lock_xp );
[438]90
91    // print header
92    nolock_printk("\n***** DQDT state\n\n");
93
94    // call recursive function
95    dqdt_recursive_print( root_xp );
96
[582]97    // release TXT0 lock
[564]98    remote_busylock_release( lock_xp );
[438]99}
100
[582]101///////////////////////////////////////////////////////////////////////////////////////
102// This static function initializes recursively, from top to bottom, the quad-tree
103// infrastructure. The DQDT nodes are allocated as global variables in each local
104//  cluster manager. At each level in the quad-tree, this function initializes the
105// parent DQDT node in the cluster identified by the <cxy> and <level> arguments.
106// A each level, it selects in each child macro-cluster the precise cluster where
107// will be placed the the subtree root node, and call recursively itself to
108// initialize the child node in this cluster.
109///////////////////////////////////////////////////////////////////////////////////////
110// @ node cxy  : cluster containing the node to initialize
111// @ level     : level of node to be initialised
112// @ parent_xp : extended pointer on the parent node
113///////////////////////////////////////////////////////////////////////////////////////
114static void dqdt_recursive_build( cxy_t    node_cxy,
115                                  uint32_t level,
116                                  xptr_t   parent_xp )
[1]117{
[582]118    assert( (level < 5) , __FUNCTION__, "illegal DQDT level %d\n", level );
119 
120    uint32_t node_x;         // node X coordinate
121    uint32_t node_y;         // node Y coordinate
122    uint32_t mask;           // to compute associated macro-cluster coordinates
123    uint32_t node_base_x;    // associated macro_cluster X coordinate
124    uint32_t node_base_y;    // associated macro_cluster y coordinate
125    uint32_t half;           // associated macro-cluster half size
[1]126
[582]127    // get remote node cluster coordinates
128    node_x = HAL_X_FROM_CXY( node_cxy );
129    node_y = HAL_Y_FROM_CXY( node_cxy );
130       
131    // get macro-cluster mask and half-size
132    mask   = (1 << level) - 1;
133    half   = (level > 0) ? (1 << (level - 1)) : 0;
[564]134
[582]135    // get macro-cluster coordinates
136    node_base_x = node_x & ~mask;
137    node_base_y = node_y & ~mask;
[1]138
[582]139    // get pointer on local cluster manager
140    cluster_t * cluster = LOCAL_CLUSTER;
[1]141
[582]142    // get local pointer on remote node to be initialized
143    dqdt_node_t * node  = &cluster->dqdt_tbl[level];
[1]144
[582]145#if DEBUG_DQDT_INIT
146printk("\n[DBG] %s : cxy(%d,%d) / level %d / mask %x / half %d / ptr %x\n",
147__FUNCTION__, node_x, node_y, level, mask, half, node );
148#endif
149 
150    // make remote node default initialisation
151    hal_remote_memset( XPTR( node_cxy , node ) , 0 , sizeof( dqdt_node_t ) );
152
153    // recursive initialisation
154    if( level == 0 )                      // terminal case
[1]155    {
[582]156        // update parent field
157        hal_remote_s64( XPTR( node_cxy , &node->parent ) , parent_xp );
158    }
159    else                                  // non terminal
160    {
161        uint32_t x;
162        uint32_t y;
163        cxy_t    cxy;
164        bool_t   found;
[1]165
[582]166        // update <level> in remote node
167        hal_remote_s32( XPTR( node_cxy , &node->level ) , level );
[19]168
[582]169        // try to find a valid cluster in child[0][0] macro-cluster
170        found = false; 
171        for( x = node_base_x ; 
172        (x < (node_base_x + half)) && (found == false) ; x++ )
[1]173        {
[582]174            for( y = node_base_y ; 
175            (y < (node_base_y + half)) && (found == false) ; y++ )
176            {
177                cxy = HAL_CXY_FROM_XY( x , y );
178                if( cluster_is_active( cxy ) )
179                {
180                    // update <child[0][0]> in remote inode
181                    hal_remote_s64( XPTR( node_cxy , &node->children[0][0] ), 
182                                    XPTR( cxy , &cluster->dqdt_tbl[level - 1] ) );
[1]183
[582]184                    // udate <arity> in remote node
185                    hal_remote_atomic_add( XPTR( node_cxy , &node->arity ) , 1 );
186
187                    // initialize recursively child[0][0] node
188                    dqdt_recursive_build( cxy , level-1 , XPTR( node_cxy , node ) );
189   
190                    // exit loops
191                    found = true;
192                }
[1]193            }
[582]194        }
[1]195
[582]196        // try to find a valid cluster in child[0][1] macro-cluster
197        found = false; 
198        for( x = node_base_x ; 
199        (x < (node_base_x + half)) && (found == false) ; x++ )
200        {
201            for( y = (node_base_y + half) ; 
202            (y < (node_base_y + (half<<2))) && (found == false) ; y++ )
[1]203            {
[582]204                cxy = HAL_CXY_FROM_XY( x , y );
205                if( cluster_is_active( cxy ) )
206                {
207                    // update <child[0][1]> in remote inode
208                    hal_remote_s64( XPTR( node_cxy , &node->children[0][1] ), 
209                                    XPTR( cxy , &cluster->dqdt_tbl[level - 1] ) );
210
211                    // udate <arity> in remote node
212                    hal_remote_atomic_add( XPTR( node_cxy , &node->arity ) , 1 );
213
214                    // initialize recursively child[0][1] node
215                    dqdt_recursive_build( cxy , level-1 , XPTR( node_cxy , node ) );
216   
217                    // exit loops
218                    found = true;
219                }
[1]220            }
[582]221        }
222           
223        // try to find a valid cluster in child[1][0] macro-cluster
224        found = false; 
225        for( x = (node_base_x + half) ; 
226        (x < (node_base_x + (half<<1))) && (found == false) ; x++ )
227        {
228            for( y = node_base_y ; 
229            (y < (node_base_y + half)) && (found == false) ; y++ )
230            {
231                cxy = HAL_CXY_FROM_XY( x , y );
232                if( cluster_is_active( cxy ) )
233                {
234                    // update <child[1][0]> in remote inode
235                    hal_remote_s64( XPTR( node_cxy , &node->children[1][0] ), 
236                                    XPTR( cxy , &cluster->dqdt_tbl[level - 1] ) );
[1]237
[582]238                    // udate <arity> in remote node
239                    hal_remote_atomic_add( XPTR( node_cxy , &node->arity ) , 1 );
240
241                    // initialize recursively child[1][0] node
242                    dqdt_recursive_build( cxy , level-1 , XPTR( node_cxy , node ) );
243   
244                    // exit loops
245                    found = true;
246                }
[1]247            }
[582]248        }
[1]249
[582]250        // try to find a valid cluster in child[1][1] macro-cluster
251        found = false; 
252        for( x = (node_base_x + half) ; 
253        (x < (node_base_x + (half<<1))) && (found == false) ; x++ )
254        {
255            for( y = (node_base_y + half) ; 
256            (y < (node_base_y + (half<<2))) && (found == false) ; y++ )
[1]257            {
[582]258                cxy = HAL_CXY_FROM_XY( x , y );
259                if( cluster_is_active( cxy ) )
260                {
261                    // update <child[1][1]> in remote inode
262                    hal_remote_s64( XPTR( node_cxy , &node->children[1][1] ), 
263                                    XPTR( cxy , &cluster->dqdt_tbl[level - 1] ) );
264
265                    // udate <arity> in remote node
266                    hal_remote_atomic_add( XPTR( node_cxy , &node->arity ) , 1 );
267
268                    // initialize recursively child[1][1] node
269                    dqdt_recursive_build( cxy , level-1 , XPTR( node_cxy , node ) );
270   
271                    // exit loops
272                    found = true;
273                }
[1]274            }
[582]275        }
276    }
277}  // end dqdt_recursive_build()
[1]278
[582]279//////////////////////
280void dqdt_init( void )
281{
282    // get x_size & y_size from cluster manager
283    cluster_t * cluster = &cluster_manager;
284    uint32_t    x_size  = cluster->x_size;
285    uint32_t    y_size  = cluster->y_size;
[1]286
[582]287    assert( ((x_size <= 16) && (y_size <= 16)) , "illegal mesh size\n");
288 
289    // compute level_max
290    uint32_t  x_size_ext = POW2_ROUNDUP( x_size );
291    uint32_t  y_size_ext = POW2_ROUNDUP( y_size );
292    uint32_t  size_ext   = MAX( x_size_ext , y_size_ext );
293    uint32_t  level_max  = bits_log2( size_ext );
[1]294
[582]295    // each CP0 register the DQDT root in local cluster manager
296    cluster->dqdt_root_xp = XPTR( 0 , &cluster->dqdt_tbl[level_max] );
297
298#if DEBUG_DQDT_INIT
299if( local_cxy == 0 )
300printk("\n[DBG] %s : x_size = %d / y_size = %d / level_max = %d\n",
301__FUNCTION__, x_size, y_size, level_max );
302#endif
303   
304    // only CP0 in cluster 0 call the recursive function to build the quad-tree
305    if (local_cxy == 0) dqdt_recursive_build( local_cxy , level_max , XPTR_NULL );
306
307#if DEBUG_DQDT_INIT
308if( local_cxy == 0 ) dqdt_display();
309#endif
310
311}  // end dqdt_init()
312
[1]313///////////////////////////////////////////////////////////////////////////
[438]314// This recursive function is called by the dqdt_update_threads() function.
[1]315// It traverses the quad tree from clusters to root.
316///////////////////////////////////////////////////////////////////////////
[438]317// @ node       : extended pointer on current node
318// @ increment  : number of threads variation
319///////////////////////////////////////////////////////////////////////////
320static void dqdt_propagate_threads( xptr_t  node,
321                                    int32_t increment )
[1]322{
323    // get current node cluster identifier and local pointer
[438]324    cxy_t         cxy = GET_CXY( node );
325    dqdt_node_t * ptr = GET_PTR( node );
[1]326
327    // update current node threads number
[438]328    hal_remote_atomic_add( XPTR( cxy , &ptr->threads ) , increment );
[1]329
330    // get extended pointer on parent node
[564]331    xptr_t parent = (xptr_t)hal_remote_l64( XPTR( cxy , &ptr->parent ) );
[1]332
333    // propagate if required
[438]334    if ( parent != XPTR_NULL ) dqdt_propagate_threads( parent, increment );
[1]335}
336
[438]337///////////////////////////////////////////////////////////////////////////
338// This recursive function is called by the dqdt_update_pages() function.
339// It traverses the quad tree from clusters to root.
340///////////////////////////////////////////////////////////////////////////
341// @ node       : extended pointer on current node
342// @ increment  : number of pages variation
343///////////////////////////////////////////////////////////////////////////
344static void dqdt_propagate_pages( xptr_t  node,
345                                  int32_t increment )
[1]346{
[438]347    // get current node cluster identifier and local pointer
348    cxy_t         cxy = GET_CXY( node );
349    dqdt_node_t * ptr = GET_PTR( node );
[1]350
[438]351    // update current node threads number
352    hal_remote_atomic_add( XPTR( cxy , &ptr->pages ) , increment );
[1]353
[438]354    // get extended pointer on parent node
[564]355    xptr_t parent = (xptr_t)hal_remote_l64( XPTR( cxy , &ptr->parent ) );
[1]356
[438]357    // propagate if required
358    if ( parent != XPTR_NULL ) dqdt_propagate_pages( parent, increment );
[1]359}
360
[438]361/////////////////////////////////////////////
[582]362void dqdt_update_threads( int32_t increment )
[1]363{
[438]364        cluster_t   * cluster = LOCAL_CLUSTER;
365    dqdt_node_t * node    = &cluster->dqdt_tbl[0];
[19]366
[438]367    // update DQDT node level 0
368    hal_atomic_add( &node->threads , increment );
[1]369
[438]370    // propagate to DQDT upper levels
371    if( node->parent != XPTR_NULL ) dqdt_propagate_threads( node->parent , increment );
[1]372}
373
[438]374///////////////////////////////////////////
[582]375void dqdt_update_pages( int32_t increment )
[1]376{
[438]377        cluster_t   * cluster = LOCAL_CLUSTER;
378    dqdt_node_t * node    = &cluster->dqdt_tbl[0];
[19]379
[438]380    // update DQDT node level 0
381    hal_atomic_add( &node->pages , increment );
[1]382
[438]383    // propagate to DQDT upper levels
384    if( node->parent != XPTR_NULL ) dqdt_propagate_pages( node->parent , increment );
[1]385}
386
387////////////////////////////////////////////////////////////////////////////////
388// This recursive function is called by both the dqdt_get_cluster_for_process()
389// and by the dqdt_get_cluster_for_memory() functions to select the cluster
390// with smallest number of thread, or smallest number of allocated pages.
391// It traverses the quad tree from root to clusters.
392///////////////////////////////////////////////////////////////////////////////
393static cxy_t dqdt_select_cluster( xptr_t node,
394                                  bool_t for_memory )
395{
396    dqdt_node_t   node_copy;     // local copy of the current DQDT node
[582]397    xptr_t        child_xp;      // extended pointer on a DQDT child node
398    uint32_t      x;             // child node X coordinate
399    uint32_t      y;             // child node Y coordinate
400    uint32_t      select_x;      // selected child X coordinate
401    uint32_t      select_y;      // selected child Y coordinate
[1]402    uint32_t      load;          // load of the child (threads or pages)
403    uint32_t      load_min;      // current value of the minimal load
404
405    // get DQDT node local copy
406    hal_remote_memcpy( XPTR( local_cxy , &node_copy ), node , sizeof(dqdt_node_t) );
407
408    // return cluster identifier for a terminal mode
409    if( node_copy.level == 0 ) return GET_CXY(node);
410
411    // analyse load for all children in non terminal node
412    load_min = 0xFFFFFFFF;
[582]413    select_x = 0;
414    select_y = 0;
415    for( x = 0 ; x < 2 ; x++ )
[1]416    {
[582]417        for( y = 0 ; y < 2 ; y++ )
[1]418        {
[582]419            child_xp = node_copy.children[x][y];
420            if( child_xp != XPTR_NULL )
[1]421            {
[582]422                cxy_t         cxy  = GET_CXY( child_xp );
423                dqdt_node_t * ptr  = GET_PTR( child_xp );
424                if( for_memory ) load = hal_remote_l32( XPTR( cxy , &ptr->pages ) );
425                else             load = hal_remote_l32( XPTR( cxy , &ptr->threads ) );
426                if( load < load_min )
427                {
428                    load_min = load;
429                    select_x = x;
430                    select_y = y;
431                }
[19]432            }
[1]433        }
434    }
435
436    // select the child with the lowest load
[582]437    return dqdt_select_cluster( node_copy.children[select_x][select_y], for_memory );
[1]438}
439
[564]440//////////////////////////////////////////
[485]441cxy_t dqdt_get_cluster_for_process( void )
[1]442{
443    // call recursive function
[582]444    return dqdt_select_cluster( LOCAL_CLUSTER->dqdt_root_xp , false );
[1]445}
446
[564]447/////////////////////////////////////////
[485]448cxy_t dqdt_get_cluster_for_memory( void )
[1]449{
450    // call recursive function
[582]451    return dqdt_select_cluster( LOCAL_CLUSTER->dqdt_root_xp , true );
[1]452}
453
Note: See TracBrowser for help on using the repository browser.