source: trunk/kernel/kern/ksocket.c @ 668

Last change on this file since 668 was 668, checked in by alain, 22 months ago

typos...

File size: 78.8 KB
Line 
1/*
2 * ksocket.c - kernel socket API implementation.
3 *
4 * Authors  Alain Greiner   (2016,2017,2018,2019,2020)
5 *
6 * Copyright (c) UPMC Sorbonne Universites
7 *
8 * This file is part of ALMOS-MKH.
9 *
10 * ALMOS-MKH is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; version 2.0 of the License.
13 *
14 * ALMOS-MKH is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17 * General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with ALMOS-MKH.; if not, write to the Free Software Foundation,
21 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24#include <kernel_config.h>
25#include <hal_kernel_types.h>
26#include <hal_remote.h>
27#include <hal_uspace.h>
28#include <shared_socket.h>
29#include <process.h>
30#include <remote_buf.h>
31#include <bits.h>
32#include <printk.h>
33#include <kmem.h>
34#include <thread.h>
35#include <vfs.h>
36#include <ksocket.h>
37#include <dev_nic.h>
38
39//////////////////////////////////////////////////////////////////////////////////////
40// Extern global variables
41//////////////////////////////////////////////////////////////////////////////////////
42
43extern chdev_directory_t  chdev_dir;         // allocated in kernel_init.c
44
45///////////////////////////////////////////
46char * socket_domain_str( uint32_t domain )
47{
48    switch( domain )
49    {
50        case AF_INET         : return "INET";
51        case AF_LOCAL        : return "LOCAL";
52
53        default              : return "undefined";
54    }
55}
56   
57///////////////////////////////////////
58char * socket_type_str( uint32_t type )
59{
60    switch( type )
61    {
62        case SOCK_DGRAM         : return "UDP";
63        case SOCK_STREAM        : return "TCP";
64
65        default                 : return "undefined";
66    }
67}
68   
69/////////////////////////////////////////
70char * socket_state_str( uint32_t state )
71{
72    switch( state )
73    {
74        case UDP_STATE_UNBOUND    : return "UDP_UNBOUND";
75        case UDP_STATE_BOUND      : return "UDP_BOUND";
76        case UDP_STATE_ESTAB      : return "UDP_ESTAB";
77
78        case TCP_STATE_UNBOUND    : return "TCP_UNBOUND";
79        case TCP_STATE_BOUND      : return "TCP_BOUND";
80        case TCP_STATE_LISTEN     : return "TCP_LISTEN";
81        case TCP_STATE_SYN_SENT   : return "TCP_SYN_SENT";
82        case TCP_STATE_SYN_RCVD   : return "TCP_SYN_RCVD";
83        case TCP_STATE_ESTAB      : return "TCP_ESTAB";
84        case TCP_STATE_FIN_WAIT1  : return "TCP_FIN_WAIT1";
85        case TCP_STATE_FIN_WAIT2  : return "TCP_FIN_WAIT2";
86        case TCP_STATE_CLOSING    : return "TCP_CLOSING";
87        case TCP_STATE_TIME_WAIT  : return "TCP_TIME_WAIT";
88        case TCP_STATE_CLOSE_WAIT : return "TCP_CLOSE_WAIT";
89        case TCP_STATE_LAST_ACK   : return "TCP_LAST_ACK";
90        case TCP_STATE_CLOSED     : return "TCP_CLOSED";
91
92        default                   : return "undefined";
93    }
94}
95
96///////////////////////////////////////////
97char * socket_cmd_type_str( uint32_t type )
98{
99    switch( type )
100    {
101        case CMD_TX_CONNECT  : return "TX_CONNECT";
102        case CMD_TX_ACCEPT   : return "TX_ACCEPT";
103        case CMD_TX_CLOSE    : return "TX_CLOSE";
104        case CMD_TX_SEND     : return "TX_SEND";
105
106        case CMD_RX_ACCEPT   : return "RX_ACCEPT";
107        case CMD_RX_RECV     : return "RX_RECV";
108       
109        default                 : return "undefined";
110    }
111}
112   
113///////////////////////////////////////////
114char * socket_cmd_sts_str( uint32_t sts )
115{
116    switch( sts )
117    {
118        case CMD_STS_SUCCESS  : return "TX_CONNECT";
119        case CMD_STS_EOF      : return "EOF";
120        case CMD_STS_RST      : return "RST";
121        case CMD_STS_BADACK   : return "BADACK";
122        case CMD_STS_BADSTATE : return "BADSTATE";
123        case CMD_STS_BADCMD   : return "BADCMD";
124       
125        default               : return "undefined";
126    }
127}
128
129/////////////////////////////////////////////////////////////////////////////////////////
130// This static function registers the socket defined by the <socket_xp> argument into
131// the lists of sockets attached to the relevant NIC_TX and NIC_TX chdevs identified
132// by the <channel> argument, and update the channel field in socket descriptor.
133/////////////////////////////////////////////////////////////////////////////////////////
134// @ socket_xp   : [in]  extended pointer on socket descriptor.
135// @ channel     : [in]  NIC channel index.
136/////////////////////////////////////////////////////////////////////////////////////////
137static void socket_link_to_servers( xptr_t   socket_xp,
138                                    uint32_t channel )
139{
140    cxy_t      socket_cxy = GET_CXY( socket_xp );
141    socket_t * socket_ptr = GET_PTR( socket_xp );
142
143#if DEBUG_SOCKET_LINK
144thread_t  * this        = CURRENT_THREAD;
145process_t * process     = this->process;
146pid_t       socket_pid  = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->pid ));
147fdid_t      socket_fdid = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->pid ));
148uint32_t   cycle        = (uint32_t)hal_get_cycles();
149if( DEBUG_SOCKET_LINK < cycle )
150printk("\n[%s] thread[%x,%x] enter for socket[%x,%d] / cycle %d\n",
151__FUNCTION__, process->pid, this->trdid, socket_pid, socket_fdid, cycle ); 
152#endif
153
154    // get pointers on NIC_TX[channel] chdev
155    xptr_t    tx_chdev_xp  = chdev_dir.nic_tx[channel];
156    chdev_t * tx_chdev_ptr = GET_PTR( tx_chdev_xp );
157    cxy_t     tx_chdev_cxy = GET_CXY( tx_chdev_xp );
158
159    // build various TX extended pointers
160    xptr_t    tx_root_xp = XPTR( tx_chdev_cxy , &tx_chdev_ptr->wait_root );
161    xptr_t    tx_lock_xp = XPTR( tx_chdev_cxy , &tx_chdev_ptr->wait_lock );
162    xptr_t    tx_list_xp = XPTR( socket_cxy   , &socket_ptr->tx_list );
163
164    // get pointers on NIC_RX[channel] chdev
165    xptr_t    rx_chdev_xp  = chdev_dir.nic_rx[channel];
166    chdev_t * rx_chdev_ptr = GET_PTR( rx_chdev_xp );
167    cxy_t     rx_chdev_cxy = GET_CXY( rx_chdev_xp );
168
169    // build various RX extended pointers
170    xptr_t    rx_root_xp = XPTR( rx_chdev_cxy , &rx_chdev_ptr->wait_root );
171    xptr_t    rx_lock_xp = XPTR( rx_chdev_cxy , &rx_chdev_ptr->wait_lock );
172    xptr_t    rx_list_xp = XPTR( socket_cxy   , &socket_ptr->rx_list );
173
174    // register socket in the NIC_TX[channel] chdev clients queue
175    remote_busylock_acquire( tx_lock_xp );
176    xlist_add_last( tx_root_xp , tx_list_xp );
177    remote_busylock_release( tx_lock_xp );
178
179    // register socket in the NIC_RX[channel] chdev clients queue
180    remote_busylock_acquire( rx_lock_xp );
181    xlist_add_last( rx_root_xp , rx_list_xp );
182    remote_busylock_release( rx_lock_xp );
183
184#if DEBUG_SOCKET_LINK
185cycle = (uint32_t)hal_get_cycles();
186if( DEBUG_SOCKET_LINK < cycle )
187printk("\n[%s] thread[%x,%x] linked socket[%x,%d] to channel %d / cycle %d\n",
188__FUNCTION__, process->pid, this->trdid, process->pid, socket_pid, socket_fdid, channel, cycle );
189#endif
190
191}  // end socket_link_to_servers()
192
193/////////////////////////////////////////////////////////////////////////////////////////
194// This function removes the socket defined by the <socket_xp> argument from the
195// lists of sockets attached to the relevant NIC_TX and NIC_TX chdevs.
196/////////////////////////////////////////////////////////////////////////////////////////
197// @ socket_xp   : [in]  extended pointer on socket descriptor
198/////////////////////////////////////////////////////////////////////////////////////////
199static void socket_unlink_from_servers( xptr_t socket_xp )
200{
201    cxy_t      socket_cxy = GET_CXY( socket_xp );
202    socket_t * socket_ptr = GET_PTR( socket_xp );
203
204#if DEBUG_SOCKET_LINK
205thread_t  * this        = CURRENT_THREAD;
206process_t * process     = this->process;
207pid_t       socket_pid  = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->pid ));
208fdid_t      socket_fdid = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->pid ));
209uint32_t   cycle        = (uint32_t)hal_get_cycles();
210if( DEBUG_SOCKET_LINK < cycle )
211printk("\n[%s] thread[%x,%x] enter for socket[%x,%d] / cycle %d\n",
212__FUNCTION__, process->pid, this->trdid, socket_pid, socket_fdid, cycle ); 
213#endif
214
215    // get NIC channel
216    uint32_t channel = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->nic_channel ));
217
218    // get pointers on NIC_TX[channel] chdev
219    xptr_t    tx_chdev_xp  = chdev_dir.nic_tx[channel];
220    chdev_t * tx_chdev_ptr = GET_PTR( tx_chdev_xp );
221    cxy_t     tx_chdev_cxy = GET_CXY( tx_chdev_xp );
222
223    // build various TX extended pointers
224    xptr_t    tx_lock_xp = XPTR( tx_chdev_cxy , &tx_chdev_ptr->wait_lock );
225    xptr_t    tx_list_xp = XPTR( socket_cxy   , &socket_ptr->tx_list );
226
227    // get pointers on NIC_RX[channel] chdev
228    xptr_t    rx_chdev_xp  = chdev_dir.nic_rx[channel];
229    chdev_t * rx_chdev_ptr = GET_PTR( rx_chdev_xp );
230    cxy_t     rx_chdev_cxy = GET_CXY( rx_chdev_xp );
231
232    // build various RX extended pointers
233    xptr_t    rx_lock_xp = XPTR( rx_chdev_cxy , &rx_chdev_ptr->wait_lock );
234    xptr_t    rx_list_xp = XPTR( socket_cxy   , &socket_ptr->rx_list );
235
236    // remove socket from the NIC_TX[channel] chdev clients queue
237    remote_busylock_acquire( tx_lock_xp );
238    xlist_unlink( tx_list_xp );
239    remote_busylock_release( tx_lock_xp );
240
241    // remove socket from the NIC_RX[channel] chdev clients queue
242    remote_busylock_acquire( rx_lock_xp );
243    xlist_unlink( rx_list_xp );
244    remote_busylock_release( rx_lock_xp );
245
246#if DEBUG_SOCKET_LINK
247cycle = (uint32_t)hal_get_cycles();
248if( DEBUG_SOCKET_LINK < cycle )
249printk("\n[%s] thread[%x,%x] unlinked socket [%x,%d] / cycle %d\n",
250__FUNCTION__, process->pid, this->trdid, socket_pid, socket_fdid, cycle ); 
251#endif
252
253}  // end socket_unlink_from_servers()
254       
255/////////////////////////////////////////////////////////////////////////////////////////
256// This static function is called by the socket_build() and socket_accept() functions.
257// It allocates memory in cluster defined by the <cxy> argument for all structures
258// associated to a socket: file descriptor, socket descriptor, RX buffer, R2T queue,
259// and CRQ queue. It allocates an fdid, and register it in the process fd_array.
260// It initialise the  the socket desccriptor static fields, other than local_addr,
261// local_port, remote_addr, remote_port), and set the socket state to UNBOUND.
262// It returns the local pointer on socket descriptor and the fdid value in buffers
263// defined by the <socket_ptr> & <fdid_ptr> arguments.
264/////////////////////////////////////////////////////////////////////////////////////////
265// @ cxy        : [in]  target cluster fo socket & file descriptors.
266// @ domain     : [in]  socket domain.
267// @ type       : [in]  socket type.
268// @ socket_ptr : [out] local pointer on buffer for socket pointer.
269// @ fdid_ptr   : [out] local pointer on buffer for fdid value.
270// # return 0 if success / return -1 if no memory.
271/////////////////////////////////////////////////////////////////////////////////////////
272static error_t socket_create( cxy_t       cxy,
273                              uint32_t    domain,
274                              uint32_t    type,
275                              socket_t ** socket_ptr,
276                              uint32_t  * fdid_ptr )
277{
278    uint32_t    fdid;
279
280    thread_t  * this    = CURRENT_THREAD;
281    process_t * process = this->process;
282
283    kmem_req_t     req;
284    socket_t     * socket;
285    vfs_file_t   * file;
286    uint32_t       state;
287    error_t        error;
288
289#if DEBUG_SOCKET_CREATE
290uint32_t cycle = (uint32_t)hal_get_cycles();
291if( DEBUG_SOCKET_CREATE < cycle )
292printk("\n[%s] thread[%x,%x] enter / cycle %d\n",
293__FUNCTION__, process->pid, this->trdid, cycle ); 
294#endif
295   
296    // allocate memory for socket descriptor
297    req.type   = KMEM_KCM;
298    req.order  = bits_log2( sizeof(socket_t) );
299    req.flags  = AF_ZERO;
300    socket     = kmem_remote_alloc( cxy , &req );
301
302    if( socket == NULL )
303    {
304        printk("\n[ERROR] in %s : cannot allocate socket descriptor / thread[%x,%x]\n",
305        __FUNCTION__, process->pid, this->trdid );
306        return -1;
307    }
308
309    // allocate memory for rx_buf buffer
310    error = remote_buf_init( XPTR( cxy , &socket->rx_buf ),
311                             NIC_RX_BUF_SIZE );
312
313    if( error )
314    {
315        printk("\n[ERROR] in %s : cannot allocate rx_buf / thread[%x,%x]\n",
316        __FUNCTION__, process->pid, this->trdid );
317        req.type = KMEM_KCM;
318        req.ptr  = socket;
319        kmem_remote_free( cxy , &req );
320        return -1;
321    }
322
323    // allocate memory for r2tq queue
324    error = remote_buf_init( XPTR( cxy , &socket->r2tq ),
325                             NIC_R2T_QUEUE_SIZE );
326    if( error )
327    {
328        printk("\n[ERROR] in %s : cannot allocate R2T queue / thread[%x,%x]\n",
329        __FUNCTION__, process->pid, this->trdid );
330        remote_buf_destroy( XPTR( cxy , &socket->rx_buf ) );
331        req.type = KMEM_KCM;
332        req.ptr  = socket;
333        kmem_remote_free( cxy , &req );
334        return -1;
335    }
336
337    // don't allocate memory for crqq queue, as it is done by the socket_listen function
338
339    //  allocate memory for file descriptor
340        req.type  = KMEM_KCM;
341        req.order = bits_log2( sizeof(vfs_file_t) );
342    req.flags = AF_ZERO;
343        file      = kmem_remote_alloc( cxy , &req );
344
345    if( file == NULL ) 
346    {
347        printk("\n[ERROR] in %s : cannot allocate file descriptor / thread[%x,%x]\n",
348        __FUNCTION__, process->pid, this->trdid );
349        remote_buf_destroy( XPTR( cxy , &socket->r2tq ) );
350        remote_buf_destroy( XPTR( cxy , &socket->rx_buf ) );
351        req.type = KMEM_KCM;
352        req.ptr  = socket;
353        kmem_remote_free( cxy , &req );
354        return -1;
355    }
356   
357    // get an fdid value, and register file descriptor in fd_array[]
358    error = process_fd_register( process->ref_xp,
359                                 XPTR( cxy , file ),
360                                 &fdid );
361    if ( error ) 
362    {
363        printk("\n[ERROR] in %s : cannot register file descriptor / thread[%x,%x]\n",
364        __FUNCTION__, process->pid, this->trdid );
365        req.type = KMEM_KCM;
366        req.ptr  = file;
367        kmem_free( &req );
368        remote_buf_destroy( XPTR( cxy , &socket->r2tq ) );
369        remote_buf_destroy( XPTR( cxy , &socket->rx_buf ) );
370        req.ptr  = socket;
371        kmem_free( &req );
372        return -1;
373    }
374
375    state = (type == SOCK_STREAM) ? TCP_STATE_UNBOUND : UDP_STATE_UNBOUND;
376
377    // initialise socket descriptor
378    hal_remote_s32( XPTR( cxy , &socket->pid         ) , process->pid );
379    hal_remote_s32( XPTR( cxy , &socket->fdid        ) , fdid );
380    hal_remote_s32( XPTR( cxy , &socket->domain      ) , domain );
381    hal_remote_s32( XPTR( cxy , &socket->type        ) , type );
382    hal_remote_s32( XPTR( cxy , &socket->state       ) , state );
383    hal_remote_s64( XPTR( cxy , &socket->tx_client   ) , XPTR_NULL );
384    hal_remote_s64( XPTR( cxy , &socket->rx_client   ) , XPTR_NULL );
385    hal_remote_s32( XPTR( cxy , &socket->tx_valid    ) , false );
386    hal_remote_s32( XPTR( cxy , &socket->rx_valid    ) , false );
387    hal_remote_s32( XPTR( cxy , &socket->nic_channel ) , 0 );
388
389    // initialize file descriptor
390    hal_remote_s32( XPTR( cxy , &file->type        ) , INODE_TYPE_SOCK );
391    hal_remote_spt( XPTR( cxy , &file->socket      ) , socket );
392    hal_remote_s32( XPTR( cxy , &file->refcount    ) , 1 );
393
394    // initialize socket lock
395    remote_queuelock_init( XPTR( cxy , &socket->lock ) , LOCK_SOCKET_STATE );
396
397#if DEBUG_SOCKET_CREATE
398if( DEBUG_SOCKET_CREATE < cycle )
399printk("\n[%s] thread[%x,%x] exit / socket[%x,%d] / xptr[%x,%x] / cycle %d\n",
400__FUNCTION__, process->pid, this->trdid, process->pid, fdid, cxy, socket, cycle );
401#endif
402   
403    // return success
404    *socket_ptr = socket;
405    *fdid_ptr   = fdid;
406
407    return 0;
408
409}  // end socket_create
410
411/////////////////////////////////////////////////////////////////////////////////////////
412// This static function is called by the socket_close() function to destroy a socket
413// identified by the <file_xp> argument.
414// It remove the associated file from the reference process fd_array. It unlink the
415// socket from the NIC_TX [k] and NIC_RX[k] chdevs. It release all memory allocated
416// for the structures associated to the target socket socket : file descriptor,
417// socket descriptor, RX buffer, R2T queue, CRQ queue.
418/////////////////////////////////////////////////////////////////////////////////////////
419// @ file_xp  : extended pointer on the file descriptor.
420/////////////////////////////////////////////////////////////////////////////////////////
421static void socket_destroy( xptr_t file_xp )
422{
423    kmem_req_t          req;
424
425    thread_t  * this    = CURRENT_THREAD;
426    process_t * process = this->process;
427
428// check file_xp argument
429assert( (file_xp != XPTR_NULL), "illegal argument\n" );
430
431    // get cluster & local pointer for file descriptor
432    vfs_file_t * file_ptr = GET_PTR( file_xp );
433    cxy_t        file_cxy = GET_CXY( file_xp );
434
435#if DEBUG_SOCKET_DESTROY
436uint32_t cycle = (uint32_t)hal_get_cycles();
437if( DEBUG_SOCKET_DESTROY < cycle )
438printk("\n[%s] thread[%x,%x] enter / file[%x,%x] / cycle %d\n",
439__FUNCTION__, process->pid, this->trdid, file_cxy, file_ptr, cycle );
440#endif
441
442    // get local pointer for socket and file type
443    socket_t * socket_ptr = hal_remote_lpt( XPTR( file_cxy , &file_ptr->socket ) );
444    uint32_t   file_type  = hal_remote_l32( XPTR( file_cxy , &file_ptr->type ) );
445   
446// check file descriptor type
447assert( (file_type == INODE_TYPE_SOCK), "illegal file type\n" );
448
449    // get socket nic_channel and fdid
450    uint32_t channel = hal_remote_l32( XPTR( file_cxy , &socket_ptr->nic_channel ));
451    uint32_t fdid    = hal_remote_l32( XPTR( file_cxy , &socket_ptr->fdid ));
452
453    // remove socket from NIC_TX & NIC_RX chdev queues when socket is connected
454    if( channel < LOCAL_CLUSTER->nb_nic_channels )
455    {
456        socket_unlink_from_servers( XPTR( file_cxy , socket_ptr ) );
457    }
458
459    // remove the file descriptor from the fd_array
460    process_fd_remove( process->owner_xp , fdid );
461
462    // release memory allocated for file descriptor
463    req.type = KMEM_KCM;
464    req.ptr  = file_ptr;
465    kmem_remote_free( file_cxy , &req );
466
467    // release memory allocated for buffers attached to socket descriptor
468    remote_buf_destroy( XPTR( file_cxy , &socket_ptr->crqq ) );
469    remote_buf_destroy( XPTR( file_cxy , &socket_ptr->r2tq ) );
470    remote_buf_destroy( XPTR( file_cxy , &socket_ptr->rx_buf ) );
471
472    // release memory allocated for socket descriptor
473    req.type = KMEM_KCM;
474    req.ptr  = socket_ptr;
475    kmem_remote_free( file_cxy , &req );
476
477#if DEBUG_SOCKET_DESTROY
478cycle = (uint32_t)hal_get_cycles();
479if( DEBUG_SOCKET_DESTROY < cycle )
480printk("\n[%s] thread[%x,%x] exit / cycle %d\n",
481__FUNCTION__, process->pid, this->trdid, cycle );
482#endif
483
484}  // end socket_destroy()
485
486////////////////////////////////////////////////
487void socket_put_r2t_request( xptr_t    queue_xp,
488                             uint32_t  flags,
489                             uint32_t  channel )
490{
491    xptr_t     chdev_xp;
492    cxy_t      chdev_cxy;
493    chdev_t  * chdev_ptr;
494    thread_t * server_ptr;
495    xptr_t     server_xp;
496
497    while( 1 )
498    {
499        // try to register R2T request
500        error_t error = remote_buf_put_from_kernel( queue_xp,
501                                                    (uint8_t *)(&flags),
502                                                    1 );
503        if( error )
504        {
505            // queue full => wait and retry
506            sched_yield( "waiting R2T queue" );
507        }
508        else
509        {
510            // get NIC_TX chdev pointers
511            chdev_xp = chdev_dir.nic_tx[channel];
512            chdev_cxy = GET_CXY( chdev_xp );
513            chdev_ptr = GET_PTR( chdev_xp );
514 
515            // get NIC_TX server thread pointers
516            server_ptr = hal_remote_lpt( XPTR( chdev_cxy , &chdev_ptr->server ) );
517            server_xp  = XPTR( chdev_cxy , server_ptr );
518
519            // unblocks NIC_TX server thread
520            thread_unblock( server_xp , THREAD_BLOCKED_CLIENT );
521
522            return;
523        }
524    }
525}  // end socket_put_r2t_request()
526 
527///////////////////////////////////////////////////
528error_t socket_put_crq_request( xptr_t    queue_xp,
529                                uint32_t  remote_addr,
530                                uint32_t  remote_port,
531                                uint32_t  remote_iss,
532                                uint32_t  remote_window )
533{
534    connect_request_t   req;
535
536    // build request
537    req.addr   = remote_addr;
538    req.port   = remote_port;
539    req.iss    = remote_iss;
540    req.window = remote_window;
541
542    // try to register request in CRQ
543    return remote_buf_put_from_kernel( queue_xp,
544                                       (uint8_t *)(&req),
545                                       sizeof(connect_request_t) );
546}  // end socket_put_crq_request()
547 
548////////////////////////////////////////////////////
549error_t socket_get_crq_request( xptr_t     queue_xp,
550                                uint32_t * remote_addr,
551                                uint32_t * remote_port,
552                                uint32_t * remote_iss,
553                                uint32_t * remote_window )
554{
555    connect_request_t   req;
556    error_t             error;
557
558    // get request from CRQ
559    error = remote_buf_get_to_kernel( queue_xp,
560                                      (uint8_t *)(&req),
561                                      sizeof(connect_request_t) );
562    // extract request arguments
563    *remote_addr   = req.addr;
564    *remote_port   = req.port;
565    *remote_iss    = req.iss;
566    *remote_window = req.window;
567
568    return error;
569
570}  // end socket_get_crq_request()
571 
572
573/////////////////////////////////////////////////////////////////////////////////////////
574//                 Functions implementing the SOCKET related syscalls
575/////////////////////////////////////////////////////////////////////////////////////////
576
577//////////////////////////////////////
578int socket_build( uint32_t   domain,
579                  uint32_t   type )
580{
581    uint32_t    fdid;
582    socket_t  * socket;
583    error_t     error;
584
585#if DEBUG_SOCKET_BUILD
586uint32_t    cycle   = (uint32_t)hal_get_cycles();
587thread_t  * this    = CURRENT_THREAD;
588process_t * process = this->process;
589if( DEBUG_SOCKET_BUILD < cycle )
590printk("\n[%s] thread[%x,%x] enter / %s / %s / cycle %d\n",
591__FUNCTION__, process->pid, this->trdid, 
592socket_domain_str(domain), socket_type_str(type), cycle );
593#endif
594
595
596    // allocate memory for the file descriptor and for the socket
597    error = socket_create( local_cxy,
598                           domain,
599                           type,
600                           &socket, 
601                           &fdid );
602
603#if DEBUG_SOCKET_BUILD
604cycle = (uint32_t)hal_get_cycles();
605if( DEBUG_SOCKET_BUILD < cycle )
606printk("\n[%s] thread[%x,%x] exit / socket %x / fdid %d / %s / cycle %d\n",
607__FUNCTION__, process->pid, this->trdid, socket, fdid, 
608socket_state_str(hal_remote_l32(XPTR(local_cxy , &socket->state))),
609cycle );
610#endif
611
612    if( error ) return -1;
613    return fdid;
614}
615
616////////////////////////////////
617int socket_bind( uint32_t  fdid,
618                 uint32_t  addr,
619                 uint16_t  port )
620{
621    vfs_inode_type_t    file_type;
622    socket_t          * socket;
623    uint32_t            socket_type;
624    uint32_t            socket_state;
625
626    thread_t  * this    = CURRENT_THREAD;
627    process_t * process = this->process;
628
629#if DEBUG_SOCKET_BIND
630uint32_t cycle = (uint32_t)hal_get_cycles();
631if( DEBUG_SOCKET_BIND < cycle )
632printk("\n[%s] thread[%x,%x] enter / socket[%x,%d] / addr %x / port %x / cycle %d\n",
633__FUNCTION__, process->pid, this->trdid, process->pid, fdid, addr, port, cycle );
634#endif
635
636    // get pointers on file descriptor
637    xptr_t       file_xp  = process_fd_get_xptr_from_local( process , fdid );
638    vfs_file_t * file_ptr = GET_PTR( file_xp );
639    cxy_t        file_cxy = GET_CXY( file_xp );
640
641    // check file_xp
642    if( file_xp == XPTR_NULL )
643    {
644        printk("\n[ERROR] in %s : undefined fdid %d / thread[%x,%x]\n",
645        __FUNCTION__, fdid, process->pid, this->trdid );
646        return -1;
647    }
648
649    file_type = hal_remote_l32( XPTR( file_cxy , &file_ptr->type ) );
650    socket    = hal_remote_lpt( XPTR( file_cxy , &file_ptr->socket ) );
651
652    // check file descriptor type
653    if( file_type != INODE_TYPE_SOCK )
654    {
655        printk("\n[ERROR] in %s : illegal file type %s / thread[%x,%x]",
656        __FUNCTION__, vfs_inode_type_str( file_type ), process->pid, this->trdid );
657        return -1;
658    }
659
660    // get socket type
661    socket_type = hal_remote_l32(XPTR( file_cxy , &socket->type ));
662
663    // compute socket state
664    socket_state = (socket_type == SOCK_STREAM) ? TCP_STATE_BOUND : UDP_STATE_BOUND;
665
666    // update the socket descriptor
667    hal_remote_s32( XPTR( file_cxy , &socket->local_addr ) , addr );
668    hal_remote_s32( XPTR( file_cxy , &socket->local_port ) , port );
669    hal_remote_s32( XPTR( file_cxy , &socket->state      ) , socket_state );
670
671#if DEBUG_SOCKET_BIND
672cycle = (uint32_t)hal_get_cycles();
673if( DEBUG_SOCKET_BIND < cycle )
674printk("\n[%s] thread[%x,%x] exit / socket[%x,%d] / %s / addr %x / port %x / cycle %d\n",
675__FUNCTION__, process->pid, this->trdid, process->pid, fdid,
676socket_state_str(hal_remote_l32( XPTR( file_cxy , &socket->state ))),
677hal_remote_l32( XPTR( file_cxy , &socket->local_addr )),
678hal_remote_l32( XPTR( file_cxy , &socket->local_port )),
679cycle );
680#endif
681
682    return 0;
683
684}  // end socket_bind()
685
686//////////////////////////////////
687int socket_listen( uint32_t fdid,
688                   uint32_t crq_depth )
689{
690    xptr_t              file_xp;
691    vfs_file_t        * file_ptr;
692    cxy_t               file_cxy;
693    vfs_inode_type_t    file_type;
694    socket_t          * socket_ptr;
695    uint32_t            socket_type;
696    uint32_t            socket_state;
697    uint32_t            socket_local_addr;
698    uint32_t            socket_local_port;
699    error_t             error;
700
701    thread_t  * this    = CURRENT_THREAD;
702    process_t * process = this->process;
703
704#if DEBUG_SOCKET_LISTEN
705uint32_t cycle = (uint32_t)hal_get_cycles();
706if( DEBUG_SOCKET_LISTEN < cycle )
707printk("\n[%s] thread[%x,%x] enter / socket[%x,%d] / crq_depth %x / cycle %d\n",
708__FUNCTION__, process->pid, this->trdid, process->pid, fdid, crq_depth, cycle );
709#endif
710
711    // get pointers on file descriptor
712    file_xp  = process_fd_get_xptr_from_local( process , fdid );
713    file_ptr = GET_PTR( file_xp );
714    file_cxy = GET_CXY( file_xp );
715
716    // check file_xp
717    if( file_xp == XPTR_NULL )
718    {
719        printk("\n[ERROR] in %s : undefined fdid %d / thread[%x,%x]\n",
720        __FUNCTION__, fdid, process->pid, this->trdid );
721        return -1;
722    }
723
724    file_type  = hal_remote_l32( XPTR( file_cxy , &file_ptr->type ) );
725    socket_ptr = hal_remote_lpt( XPTR( file_cxy , &file_ptr->socket ) );
726
727    // check file descriptor type
728    if( file_type != INODE_TYPE_SOCK )
729    {
730        printk("\n[ERROR] in %s : illegal file type %s / thread[%x,%x]\n",
731        __FUNCTION__, vfs_inode_type_str(file_type), process->pid, this->trdid );
732        return -1;
733    }
734
735    // get relevant infos from <fdid> socket descriptor
736    socket_type       = hal_remote_l32( XPTR( file_cxy , &socket_ptr->type )); 
737    socket_state      = hal_remote_l32( XPTR( file_cxy , &socket_ptr->state )); 
738    socket_local_addr = hal_remote_l32( XPTR( file_cxy , &socket_ptr->local_addr )); 
739    socket_local_port = hal_remote_l32( XPTR( file_cxy , &socket_ptr->local_port )); 
740
741    // check socket type
742    if( socket_type != SOCK_STREAM )
743    {
744        printk("\n[ERROR] in %s : illegal socket type %s / thread[%x,%x]\n",
745        __FUNCTION__, socket_type_str(socket_type), process->pid, this->trdid );
746        return -1;
747    }
748   
749    // check socket state
750    if( socket_state != TCP_STATE_BOUND )
751    {
752        printk("\n[ERROR] in %s : illegal socket state %s / thread[%x,%x]\n",
753        __FUNCTION__, socket_state_str(socket_state), process->pid, this->trdid );
754        return -1;
755    }
756   
757    // compute CRQ queue depth : max( crq_depth , NIC_CRQ_QUEUE_SIZE )
758    uint32_t depth = ( crq_depth > NIC_CRQ_QUEUE_SIZE ) ? crq_depth : NIC_CRQ_QUEUE_SIZE;
759
760    // allocate memory for the CRQ queue
761    error = remote_buf_init( XPTR( file_cxy , &socket_ptr->crqq ),
762                                   depth * sizeof(connect_request_t) );
763    if( error )
764    {
765        printk("\n[ERROR] in %s : cannot allocate CRQ queue / thread[%x,%x]\n",
766        __FUNCTION__, process->pid, this->trdid );
767        return -1;
768    }
769
770    // update socket.state
771    hal_remote_s32( XPTR( file_cxy , &socket_ptr->state ) , TCP_STATE_LISTEN );
772
773    // get pointers on NIC_RX[0] chdev
774    xptr_t    rx0_chdev_xp  = chdev_dir.nic_rx[0];
775    chdev_t * rx0_chdev_ptr = GET_PTR( rx0_chdev_xp );
776    cxy_t     rx0_chdev_cxy = GET_CXY( rx0_chdev_xp );
777   
778    // build extended pointers on list of listening sockets
779    xptr_t    rx0_root_xp = XPTR( rx0_chdev_cxy , &rx0_chdev_ptr->ext.nic.root );
780    xptr_t    rx0_lock_xp = XPTR( rx0_chdev_cxy , &rx0_chdev_ptr->ext.nic.lock );
781
782    // build extended pointer on socket rx_list field
783    xptr_t    list_entry_xp = XPTR( file_cxy , &socket_ptr->rx_list );
784
785    // register  <fdid> socket in listening sockets list
786    remote_busylock_acquire( rx0_lock_xp );
787    xlist_add_last( rx0_root_xp , list_entry_xp );
788    remote_busylock_release( rx0_lock_xp );
789
790#if DEBUG_SOCKET_LISTEN
791cycle = (uint32_t)hal_get_cycles();
792if( DEBUG_SOCKET_LISTEN < cycle )
793printk("\n[%s] thread[%x,%x] exit / socket[%x,%d] / %s / cycle %d\n",
794__FUNCTION__, process->pid, this->trdid, process->pid, fdid,
795socket_state_str(socket_state), cycle );
796#endif
797
798    return 0;
799
800}  // end socket_listen()
801
802///////////////////////////////////
803int socket_accept( uint32_t   fdid,
804                   uint32_t * remote_addr,
805                   uint16_t * remote_port )
806{
807    xptr_t              file_xp;             // extended pointer on remote file
808    vfs_file_t        * file_ptr;
809    cxy_t               file_cxy;
810    vfs_inode_type_t    file_type;           // file descriptor type
811    socket_t          * socket_ptr;          // local pointer on remote waiting socket
812    uint32_t            socket_type;         // listening socket type   
813    uint32_t            socket_state;        // listening socket state
814    uint32_t            socket_domain;       // listening socket domain
815    uint32_t            socket_local_addr;   // listening socket local IP address
816    uint32_t            socket_local_port;   // listening socket local port
817    uint32_t            socket_tx_nxt;       // listening socket tx_nxt
818    bool_t              socket_tx_valid;     // listening socket tx_valid
819    xptr_t              socket_tx_client;    // listening socket tx_client thread
820    bool_t              socket_rx_valid;     // listening socket rx_valid
821    xptr_t              socket_rx_client;    // listening socket rx_client thread
822    xptr_t              socket_lock_xp;      // listening socket lock
823    xptr_t              crq_xp;              // listening socket CRQ queue
824    uint32_t            crq_status;          // number of bytes in CRQ
825    cxy_t               new_socket_cxy;      // new socket cluster identifier
826    socket_t          * new_socket_ptr;      // local pointer on new socket
827    xptr_t              new_socket_xp;       // extended pointer on new socket
828    volatile uint32_t   new_state;           // new socket state (modified by NIC_RX thread)
829    uint32_t            new_fdid;            // new socket file descriptor index
830    uint32_t            new_remote_addr;     // new socket remote IP address
831    uint32_t            new_remote_port;     // new socket remote port
832    uint32_t            new_remote_iss;      // new socket remote iss
833    uint32_t            new_remote_window;   // new socket receive window
834    xptr_t              tx_server_xp;        // extended pointer on TX server thread
835    thread_t          * tx_server_ptr;       // local pointer on TX server thread
836    uint32_t            cmd_status;          // command status (rx_sts or tx_sts)
837    bool_t              cmd_valid;           // valid command (rx_valid or tx_valid)
838    error_t             error;
839
840    thread_t  * this      = CURRENT_THREAD;
841    xptr_t      client_xp = XPTR( local_cxy , this );
842    process_t * process   = this->process;
843
844#if DEBUG_SOCKET_ACCEPT
845uint32_t cycle = (uint32_t)hal_get_cycles();
846if( DEBUG_SOCKET_ACCEPT < cycle )
847printk("\n[%s] thread[%x,%x] enter for socket[%x,%d] / cycle %d\n",
848__FUNCTION__, process->pid, this->trdid, process->pid, fdid, cycle );
849#endif
850
851    // 1) get pointers on file descriptor
852    file_xp  = process_fd_get_xptr_from_local( process , fdid );
853    file_ptr = GET_PTR( file_xp );
854    file_cxy = GET_CXY( file_xp );
855
856    // check file_xp
857    if( file_xp == XPTR_NULL )
858    {
859        printk("\n[ERROR] in %s : undefined fdid %d",
860        __FUNCTION__, fdid );
861        return -1;
862    }
863 
864    file_type  = hal_remote_l32( XPTR( file_cxy , &file_ptr->type ) );
865    socket_ptr = hal_remote_lpt( XPTR( file_cxy , &file_ptr->socket ) );
866
867    // check file descriptor type
868    if( file_type != INODE_TYPE_SOCK )
869    {
870        printk("\n[ERROR] in %s : illegal file type %s / thread[%x,%x]\n",
871        __FUNCTION__, vfs_inode_type_str(file_type), process->pid, this->trdid );
872        return -1;
873    }
874
875    // build extended pointer on listening socket lock
876    socket_lock_xp = XPTR( file_cxy , &socket_ptr->lock );
877
878    // acquire listening socket lock
879    remote_queuelock_acquire( socket_lock_xp );
880                   
881    // get listening socket type, domain, state, local_addr, local_port & tx_nxt
882    socket_type       = hal_remote_l32( XPTR( file_cxy , &socket_ptr->type )); 
883    socket_state      = hal_remote_l32( XPTR( file_cxy , &socket_ptr->state )); 
884    socket_domain     = hal_remote_l32( XPTR( file_cxy , &socket_ptr->domain )); 
885    socket_local_addr = hal_remote_l32( XPTR( file_cxy , &socket_ptr->local_addr )); 
886    socket_local_port = hal_remote_l32( XPTR( file_cxy , &socket_ptr->local_port )); 
887    socket_tx_nxt     = hal_remote_l32( XPTR( file_cxy , &socket_ptr->tx_nxt ));
888    socket_tx_valid   = hal_remote_l32( XPTR( file_cxy , &socket_ptr->tx_valid )); 
889    socket_tx_client  = hal_remote_l64( XPTR( file_cxy , &socket_ptr->tx_client ));
890    socket_rx_valid   = hal_remote_l32( XPTR( file_cxy , &socket_ptr->rx_valid )); 
891    socket_rx_client  = hal_remote_l64( XPTR( file_cxy , &socket_ptr->rx_client ));
892
893    // check socket type
894    if( socket_type != SOCK_STREAM )
895    {
896        // release listening socket lock
897        remote_queuelock_release( socket_lock_xp );
898                   
899        printk("\n[ERROR] in %s : illegal socket type %s / thread[%x,%x]\n",
900        __FUNCTION__, socket_type_str(socket_type), process->pid , this->trdid );
901        return -1;
902    }
903   
904    // check socket state
905    if( socket_state != TCP_STATE_LISTEN ) 
906    {
907        // release listening socket lock
908        remote_queuelock_release( socket_lock_xp );
909                   
910        printk("\n[ERROR] in %s : illegal socket state %s / thread[%x,%x]\n",
911        __FUNCTION__, socket_state_str(socket_state), process->pid, this->trdid );
912        return -1;
913    }
914   
915    // check no previous RX command
916    if( (socket_rx_valid == true) || (socket_rx_client != XPTR_NULL) )
917    { 
918        // release listening socket lock
919        remote_queuelock_release( socket_lock_xp );
920                   
921        printk("\n[ERROR] in %s : previous RX cmd on socket[%x,%d] / thread[%x,%x]\n",
922        __FUNCTION__, process->pid, fdid, process->pid, this->trdid );
923        return -1;
924    }
925
926    // check no previous TX command
927    if( (socket_tx_valid == true) || (socket_tx_client != XPTR_NULL) )
928    { 
929        // release socket lock
930        remote_queuelock_release( socket_lock_xp );
931                   
932        printk("\n[ERROR] in %s : previous TX cmd on socket[%x,%d] / thread[%x,%x]\n",
933        __FUNCTION__, process->pid, fdid, process->pid, this->trdid );
934        return -1;
935    }
936
937    // 2) build extended pointer on listening socket.crq
938    crq_xp  = XPTR( file_cxy , &socket_ptr->crqq );
939
940    // get CRQ status
941    crq_status = remote_buf_status( crq_xp );
942
943    // block & deschedule when CRQ empty
944    if( crq_status == 0 )
945    {
946        // register command arguments in listening socket
947        hal_remote_s32( XPTR( file_cxy , &socket_ptr->rx_cmd    ), CMD_RX_ACCEPT );
948        hal_remote_s64( XPTR( file_cxy , &socket_ptr->rx_client ), client_xp );
949        hal_remote_s32( XPTR( file_cxy , &socket_ptr->rx_valid  ), true );
950
951        // release listening socket lock
952        remote_queuelock_release( socket_lock_xp );
953
954#if DEBUG_SOCKET_ACCEPT
955cycle = (uint32_t)hal_get_cycles();
956if( DEBUG_SOCKET_ACCEPT < cycle )
957printk("\n[%s] thread[%x,%x] socket[%x,%d] / CRQ empty => blocks on <IO> / cycle %d\n",
958__FUNCTION__, process->pid, this->trdid, process->pid, fdid, cycle );
959#endif
960        // block & deschedule when CRQQ empty
961        thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_IO );
962        sched_yield( "CRQ queue empty");
963
964#if DEBUG_SOCKET_ACCEPT
965cycle = (uint32_t)hal_get_cycles();
966if( DEBUG_SOCKET_ACCEPT < cycle )
967printk("\n[%s] thread[%x,%x] socket[%x,%d] / resumes / cycle %d\n",
968__FUNCTION__, process->pid, this->trdid, process->pid, fdid, cycle );
969#endif
970        // take listening socket lock
971        remote_queuelock_acquire( socket_lock_xp );
972
973        // get CRQ status & command status
974        cmd_valid    = hal_remote_l32( XPTR( file_cxy , &socket_ptr->rx_valid ) );
975        cmd_status   = hal_remote_l32( XPTR( file_cxy , &socket_ptr->rx_sts ) );
976        crq_status   = remote_buf_status( crq_xp );
977
978assert( (((crq_status > 0) || (cmd_status!= CMD_STS_SUCCESS)) && (cmd_valid == false)),
979"illegal socket state when client thread resumes after RX_ACCEPT" );
980
981        // reset socket.rx_client
982        hal_remote_s32( XPTR( file_cxy , &socket_ptr->rx_client ) , XPTR_NULL );
983
984        if( cmd_status != CMD_STS_SUCCESS )
985        {
986            // release socket lock
987            remote_queuelock_release( socket_lock_xp );
988
989            printk("\n[ERROR] in %s for RX_ACCEPT command / socket[%x,%d] / thread[%x,%x]\n",
990            __FUNCTION__, process->pid, fdid, process->pid, this->trdid );
991            return -1;
992        }
993
994        // extract first request from the listening socket CRQ
995        error = socket_get_crq_request( crq_xp,
996                                    &new_remote_addr,
997                                    &new_remote_port,
998                                    &new_remote_iss,
999                                    &new_remote_window );
1000
1001assert( (error == 0),
1002"cannot get a connection request from a non-empty CRQ" ); 
1003
1004        // reset listening socket rx_client
1005        hal_remote_s32( XPTR( file_cxy , &socket_ptr->rx_client ) , XPTR_NULL );
1006
1007        // release socket lock
1008        remote_queuelock_release( socket_lock_xp );
1009
1010    }  // end blocking on CRQ status
1011
1012    // from this point, we can create a new socket
1013    // and ask the NIC_TX to send a SYN-ACK segment
1014
1015#if DEBUG_SOCKET_ACCEPT
1016cycle = (uint32_t)hal_get_cycles();
1017if( DEBUG_SOCKET_ACCEPT < cycle )
1018printk("\n[%s] thread[%x,%x] socket[%x,%d] / got a CRQ request / cycle %d\n",
1019__FUNCTION__, process->pid, this->trdid, process->pid, fdid, cycle );
1020#endif
1021
1022    // 3) select a cluster for the new socket
1023    new_socket_cxy = cluster_random_select();
1024
1025    // allocate memory for the new socket descriptor
1026    error = socket_create( new_socket_cxy,
1027                           socket_domain,
1028                           socket_type,
1029                           &new_socket_ptr,
1030                           &new_fdid );
1031    if( error )
1032    {
1033        printk("\n[ERROR] in %s : cannot allocate new socket / thread[%x,%x]\n",
1034        __FUNCTION__, process->pid, this->trdid );
1035        return -1;
1036    }
1037   
1038    // build extended poiner on new socket
1039    new_socket_xp = XPTR( new_socket_cxy , new_socket_ptr );
1040
1041#if DEBUG_SOCKET_ACCEPT
1042cycle = (uint32_t)hal_get_cycles();
1043if( DEBUG_SOCKET_ACCEPT < cycle )
1044printk("\n[%s] thread[%x,%x] created new socket[%x,%d] / cycle %d\n",
1045__FUNCTION__, process->pid, this->trdid, process->pid, new_fdid, cycle );
1046#endif
1047       
1048    // compute NIC channel index from remote_addr and remote_port
1049    uint32_t new_nic_channel = dev_nic_get_key( new_remote_addr , new_remote_port );
1050
1051    // update new socket descriptor
1052    hal_remote_s32(XPTR(new_socket_cxy , &new_socket_ptr->local_addr ) , socket_local_addr );
1053    hal_remote_s32(XPTR(new_socket_cxy , &new_socket_ptr->local_port ) , socket_local_port );
1054    hal_remote_s32(XPTR(new_socket_cxy , &new_socket_ptr->remote_addr) , new_remote_addr );
1055    hal_remote_s32(XPTR(new_socket_cxy , &new_socket_ptr->remote_port) , new_remote_port );
1056    hal_remote_s32(XPTR(new_socket_cxy , &new_socket_ptr->nic_channel) , new_nic_channel );
1057    hal_remote_s32(XPTR(new_socket_cxy , &new_socket_ptr->state      ) , TCP_STATE_SYN_RCVD );
1058
1059    // set new socket TCB : increment tx_nxt / initialize rx_nxt, rx_irs, rx_wnd
1060    hal_remote_s32( XPTR( new_socket_cxy , &new_socket_ptr->tx_nxt ), socket_tx_nxt + 1 );
1061    hal_remote_s32( XPTR( new_socket_cxy , &new_socket_ptr->rx_nxt ), new_remote_iss + 1 );
1062    hal_remote_s32( XPTR( new_socket_cxy , &new_socket_ptr->rx_irs ), new_remote_iss );
1063    hal_remote_s32( XPTR( new_socket_cxy , &new_socket_ptr->rx_wnd ), new_remote_window );
1064
1065    // link new socket to chdev servers
1066    socket_link_to_servers( new_socket_xp , new_nic_channel );
1067
1068    // 3) get pointers on NIC_TX[channel] chdev
1069    xptr_t    tx_chdev_xp  = chdev_dir.nic_tx[new_nic_channel];
1070    chdev_t * tx_chdev_ptr = GET_PTR( tx_chdev_xp );
1071    cxy_t     tx_chdev_cxy = GET_CXY( tx_chdev_xp );
1072
1073    // get pointers on NIC_TX[channel] server thread
1074    tx_server_ptr = hal_remote_lpt( XPTR( tx_chdev_cxy , &tx_chdev_ptr->server ));
1075    tx_server_xp  = XPTR( tx_chdev_cxy , tx_server_ptr );
1076
1077    // register command arguments in new socket to request a SYN_ACK segment
1078    hal_remote_s32( XPTR( new_socket_cxy , &new_socket_ptr->tx_cmd    ), CMD_TX_ACCEPT );
1079    hal_remote_s64( XPTR( new_socket_cxy , &new_socket_ptr->tx_client ), client_xp );
1080    hal_remote_s32( XPTR( new_socket_cxy , &new_socket_ptr->tx_valid  ), true );
1081
1082    // unblock NIC_TX server thread
1083    thread_unblock( tx_server_xp , THREAD_BLOCKED_CLIENT );
1084 
1085#if DEBUG_SOCKET_ACCEPT
1086cycle = (uint32_t)hal_get_cycles();
1087if( DEBUG_SOCKET_ACCEPT < cycle )
1088printk("\n[%s] thread[%x,%x] new_socket[%x,%d] blocks on <IO> waiting ESTAB / cycle %d\n",
1089__FUNCTION__, process->pid, this->trdid, process->pid, new_fdid, cycle );
1090#endif
1091
1092    // client thread blocks & deschedules
1093    thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_IO );
1094    sched_yield( "waiting new socket connection");
1095
1096#if DEBUG_SOCKET_ACCEPT
1097cycle = (uint32_t)hal_get_cycles();
1098if( DEBUG_SOCKET_ACCEPT < cycle )
1099printk("\n[%s] thread[%x,%x] new_socket[%x,%d] resumes  / cycle %d\n",
1100__FUNCTION__, process->pid, this->trdid, process->pid, new_fdid, cycle );
1101#endif
1102
1103    // get new socket state, tx_valid and tx_sts
1104    new_state  = hal_remote_l32( XPTR( new_socket_cxy , &new_socket_ptr->state ));
1105    cmd_valid  = hal_remote_l32( XPTR( new_socket_cxy , &new_socket_ptr->tx_valid ));
1106    cmd_status = hal_remote_l32( XPTR( new_socket_cxy , &new_socket_ptr->tx_sts ));
1107
1108assert( (((new_state == TCP_STATE_ESTAB) || (cmd_status != CMD_STS_SUCCESS))
1109        && (cmd_valid == false)), 
1110"illegal socket state when client thread resumes after TX_ACCEPT" ); 
1111
1112    // reset socket.tx_client
1113    hal_remote_s64( XPTR( new_socket_cxy , &new_socket_ptr->tx_client ) , XPTR_NULL );
1114
1115    if( cmd_status != CMD_STS_SUCCESS ) 
1116    {
1117        printk("\n[ERROR] in %s for TX_ACCEPT command / socket[%x,%d] / thread[%x,%x]\n",
1118        __FUNCTION__, process->pid, new_fdid, process->pid, this->trdid );
1119        return -1;
1120    }
1121    else
1122    {
1123
1124#if DEBUG_SOCKET_ACCEPT
1125cycle = (uint32_t)hal_get_cycles();
1126if( DEBUG_SOCKET_ACCEPT < cycle )
1127printk("\n[%s] thread[%x,%x] new_socket[%x,%d] / state %s / addr %x / port %x / cycle %d\n",
1128__FUNCTION__, process->pid, this->trdid, process->pid, new_fdid,
1129socket_state_str(new_state), new_remote_addr, new_remote_port, cycle );
1130#endif
1131
1132        // return success
1133        *remote_addr = new_remote_addr;
1134        *remote_port = new_remote_port;
1135        return new_fdid;
1136    }
1137 
1138}  // end socket_accept()
1139
1140//////////////////////////////////
1141int socket_connect( uint32_t fdid,
1142                    uint32_t remote_addr,
1143                    uint16_t remote_port )
1144{
1145    vfs_inode_type_t    file_type;
1146    socket_t          * socket_ptr;       // local pointer on thread descriptor
1147    volatile uint32_t   socket_state;     // socket state (modified by the NIC_TX thread)
1148    uint32_t            socket_type;      // socket type 
1149    uint32_t            local_addr;       // local IP address
1150    uint32_t            local_port;       // local port
1151    xptr_t              tx_server_xp;     // extended pointer on TX server thread
1152    thread_t          * tx_server_ptr;    // local pointer on TX server thread
1153    uint32_t            nic_channel;      // NIC channel index
1154    uint32_t            cmd_status;       // command status (tx_sts field)
1155    bool_t              cmd_valid;        // command valid (tx_valid field)
1156
1157    thread_t  * this      = CURRENT_THREAD;
1158    xptr_t      client_xp = XPTR( local_cxy , this );
1159    pid_t       pid       = this->process->pid;
1160    trdid_t     trdid     = this->trdid;
1161
1162    // get pointers on file descriptor
1163    xptr_t       file_xp  = process_fd_get_xptr_from_local( this->process , fdid );
1164    vfs_file_t * file_ptr = GET_PTR( file_xp );
1165    cxy_t        file_cxy = GET_CXY( file_xp );
1166
1167    // check file_xp
1168    if( file_xp == XPTR_NULL )
1169    {
1170        printk("\n[ERROR] in %s : undefined fdid %d",
1171        __FUNCTION__, fdid );
1172        return -1;
1173    }
1174
1175    file_type  = hal_remote_l32( XPTR( file_cxy , &file_ptr->type ) );
1176    socket_ptr = hal_remote_lpt( XPTR( file_cxy , &file_ptr->socket ) );
1177
1178#if DEBUG_SOCKET_CONNECT
1179uint32_t cycle = (uint32_t)hal_get_cycles();
1180if( DEBUG_SOCKET_CONNECT < cycle )
1181printk("\n[%s] thread[%x,%x] enter for socket[%x,%d] / addr %x / port %d / cycle %d\n",
1182__FUNCTION__,  pid, trdid, pid, fdid, remote_addr, remote_port, cycle );
1183#endif
1184
1185    // check file descriptor type
1186    if( file_type != INODE_TYPE_SOCK )
1187    {
1188        printk("\n[ERROR] in %s : illegal file type %s",
1189        __FUNCTION__, vfs_inode_type_str( file_type ) );
1190        return -1;
1191    }
1192
1193    // get relevant socket infos
1194    socket_type   = hal_remote_l32( XPTR( file_cxy , &socket_ptr->type ) );
1195    socket_state  = hal_remote_l32( XPTR( file_cxy , &socket_ptr->state ) );
1196    local_addr    = hal_remote_l32( XPTR( file_cxy , &socket_ptr->local_addr ) );
1197    local_port    = hal_remote_l32( XPTR( file_cxy , &socket_ptr->local_port ) );
1198
1199    if( socket_type == SOCK_DGRAM )       // UDP
1200    {
1201        if( socket_state != UDP_STATE_BOUND )
1202        {
1203            printk("\n[ERROR] in %s : illegal socket state %s for type %s",
1204            __FUNCTION__, socket_state_str(socket_state), socket_type_str(socket_type) );
1205            return -1;
1206        }
1207    }
1208    else if( socket_type == SOCK_STREAM )  // TCP
1209    {
1210        if( socket_state != TCP_STATE_BOUND )
1211        {
1212            printk("\n[ERROR] in %s : illegal socket state %s for type %s",
1213            __FUNCTION__, socket_state_str(socket_state), socket_type_str(socket_type) );
1214            return -1;
1215        }
1216    }
1217    else
1218    {
1219        printk("\n[ERROR] in %s : illegal socket type %s",
1220        __FUNCTION__,  socket_type_str(socket_type) );
1221        return -1;
1222    }
1223
1224    // compute nic_channel index from remote_addr and remote_port
1225    nic_channel = dev_nic_get_key( remote_addr , remote_port );
1226
1227    // link socket to chdev servers
1228    socket_link_to_servers( XPTR( file_cxy , socket_ptr ), nic_channel );
1229
1230    // update the socket descriptor
1231    hal_remote_s32( XPTR( file_cxy , &socket_ptr->remote_addr ) , remote_addr  );
1232    hal_remote_s32( XPTR( file_cxy , &socket_ptr->remote_port ) , remote_port  );
1233    hal_remote_s32( XPTR( file_cxy , &socket_ptr->nic_channel ) , nic_channel  );
1234
1235    // the actual connection mechanism depends on socket type
1236    // UDP : client thread updates the local socket state without blocking
1237    // TCP : client thread request TX server thread to start the 3 steps handshake
1238
1239    if( socket_type == SOCK_DGRAM )  // UDP
1240    {
1241        // directly update the local socket state
1242        hal_remote_s32( XPTR( file_cxy , &socket_ptr->state ) , UDP_STATE_ESTAB );
1243
1244        return 0;
1245    }
1246    else                             // TCP
1247    {
1248        // get pointers on NIC_TX[channel] chdev
1249        xptr_t    tx_chdev_xp  = chdev_dir.nic_tx[nic_channel];
1250        chdev_t * tx_chdev_ptr = GET_PTR( tx_chdev_xp );
1251        cxy_t     tx_chdev_cxy = GET_CXY( tx_chdev_xp );
1252
1253        // get pointers on NIC_TX[channel] server thread
1254        tx_server_ptr = hal_remote_lpt( XPTR( tx_chdev_cxy , &tx_chdev_ptr->server ));
1255        tx_server_xp  = XPTR( tx_chdev_cxy , tx_server_ptr );
1256
1257        // register command arguments in socket descriptor for a SYN segment
1258        hal_remote_s32( XPTR( file_cxy , &socket_ptr->tx_cmd    ), CMD_TX_CONNECT );
1259        hal_remote_s64( XPTR( file_cxy , &socket_ptr->tx_client ), client_xp );
1260        hal_remote_s32( XPTR( file_cxy , &socket_ptr->tx_valid  ), true );
1261
1262        // unblock NIC_TX server thread
1263        thread_unblock( tx_server_xp , THREAD_BLOCKED_CLIENT );
1264 
1265#if DEBUG_SOCKET_CONNECT
1266cycle = (uint32_t)hal_get_cycles();
1267if( DEBUG_SOCKET_CONNECT < cycle )
1268printk("\n[%s] thread[%x,%x] socket[%x,%d] blocks on <IO> waiting connexion / cycle %d \n",
1269__FUNCTION__, pid, trdid, pid, fdid, cycle );
1270#endif
1271        // block itself and deschedule
1272        thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_IO );
1273        sched_yield( "waiting connection" );
1274
1275#if DEBUG_SOCKET_CONNECT
1276cycle = (uint32_t)hal_get_cycles();
1277if( DEBUG_SOCKET_CONNECT < cycle )
1278printk("\n[%s] thread[%x,%x] socket[%x,%d] / resumes / cycle %d \n",
1279__FUNCTION__, pid, trdid, pid, fdid, cycle );
1280#endif
1281
1282        // get socket state, tx_valid and tx_sts
1283        cmd_valid    = hal_remote_l32( XPTR( file_cxy , &socket_ptr->tx_valid ));
1284        cmd_status   = hal_remote_l32( XPTR( file_cxy , &socket_ptr->tx_sts ));
1285        socket_state = hal_remote_l32( XPTR( file_cxy , &socket_ptr->state ));
1286
1287assert( (((socket_state == TCP_STATE_ESTAB) || (cmd_status != CMD_STS_SUCCESS))
1288        && (cmd_valid == false)),
1289"illegal socket state when client thread resumes after TX_CONNECT" );
1290
1291        // reset socket.tx_client
1292        hal_remote_s32( XPTR( file_cxy , &socket_ptr->tx_client ) , XPTR_NULL );
1293
1294        if( cmd_status != CMD_STS_SUCCESS )
1295        {
1296            printk("\n[ERROR] in %s : for command TX_CONNECT / socket[%x,%d] / thread[%x,%x]\n",
1297            __FUNCTION__, pid, fdid, pid, trdid );
1298            return -1;
1299        }
1300        else
1301        {
1302
1303#if DEBUG_SOCKET_CONNECT
1304cycle = (uint32_t)hal_get_cycles();
1305if( DEBUG_SOCKET_CONNECT < cycle )
1306printk("\n[%s] thread[%x,%x] exit for socket[%x,%d] / %s / cycle %d \n",
1307__FUNCTION__, pid, trdid, pid, fdid, socket_state_str(socket_state),cycle );
1308#endif
1309             return 0;
1310        }
1311    }  // end TCP
1312
1313}  // end socket_connect()
1314
1315///////////////////////////////////
1316int socket_close( xptr_t   file_xp,
1317                  uint32_t fdid )
1318{
1319    uint32_t     socket_type;
1320    uint32_t     socket_state;
1321    uint32_t     nic_channel;
1322    uint32_t     cmd_status;      // socket.tx_sts
1323    bool_t       cmd_valid;       // socket.tx_valid
1324    thread_t   * tx_server_ptr;   // local pointer on NIC_TX server thread
1325    xptr_t       tx_server_xp;    // extended pointer on NIC_TX server thread
1326    xptr_t       socket_lock_xp;  // extended pointer on socket lock
1327
1328    thread_t   * this      = CURRENT_THREAD;
1329    xptr_t       client_xp = XPTR( local_cxy , this );
1330    pid_t        pid       = this->process->pid;
1331    trdid_t      trdid     = this->trdid;
1332
1333    // get pointer on socket descriptor
1334    cxy_t        file_cxy    = GET_CXY( file_xp );
1335    vfs_file_t * file_ptr    = GET_PTR( file_xp );
1336    socket_t   * socket_ptr = hal_remote_lpt( XPTR( file_cxy , &file_ptr->socket ) );
1337
1338assert( (hal_remote_l32( XPTR( file_cxy , &socket_ptr->fdid )) == fdid),
1339"unconsistent file_xp & fdid arguments");
1340
1341#if DEBUG_SOCKET_CLOSE
1342uint32_t cycle = (uint32_t)hal_get_cycles();
1343if (DEBUG_SOCKET_CLOSE < cycle )
1344printk("\n[%s] thread[%x,%x] enters for socket[%x,%d] / cycle %d\n",
1345__FUNCTION__, pid, trdid, pid, fdid, cycle );
1346#endif
1347
1348    // build extended pointer on lock protecting socket
1349    socket_lock_xp = XPTR( file_cxy , &socket_ptr->lock );
1350
1351    // take socket lock
1352    remote_queuelock_acquire( socket_lock_xp );
1353
1354    // check no previous TX command
1355    if( (hal_remote_l32( XPTR( file_cxy , &socket_ptr->tx_valid )) == true) || 
1356        (hal_remote_l64( XPTR( file_cxy , &socket_ptr->tx_client)) != XPTR_NULL) )
1357    { 
1358        // release socket lock
1359        remote_queuelock_release( socket_lock_xp );
1360                   
1361        printk("\n[ERROR] in %s : previous TX cmd on socket[%x,%d] / thread[%x,%x]\n",
1362        __FUNCTION__, pid, fdid, pid, trdid );
1363        return -1;
1364    }
1365
1366    // get relevant socket infos
1367    socket_type   = hal_remote_l32( XPTR( file_cxy , &socket_ptr->type ));
1368    nic_channel   = hal_remote_l32( XPTR( file_cxy , &socket_ptr->nic_channel ));
1369    socket_state  = hal_remote_l32( XPTR( file_cxy , &socket_ptr->state ));
1370
1371
1372    // the actual close mechanism depends on socket type and state:
1373    // UDP or TCP not connected : client thread directly destroy the socket descriptor
1374    // TCP connected : client thread request TX server thread to make the TCP close handshake
1375
1376    if( socket_type == SOCK_DGRAM )                   // UDP
1377    {
1378
1379#if DEBUG_SOCKET_CLOSE
1380cycle = (uint32_t)hal_get_cycles();
1381if( cycle > DEBUG_DEV_NIC_TX )
1382printk("\n[%s] thread[%x,%x] socket[%x,%d] %s / destroy socket / cycle %d\n",
1383__FUNCTION__, pid, trdid, pid, fdid, socket_state_str( socket_state ), cycle );
1384#endif
1385        // directly destroy socket
1386        socket_destroy( file_xp );
1387
1388        return 0;
1389    }
1390    else if( (socket_state == TCP_STATE_BOUND) ||
1391             (socket_state == TCP_STATE_LISTEN) ||
1392             (socket_state == TCP_STATE_SYN_SENT) )   // TCP not connected
1393    {
1394
1395#if DEBUG_SOCKET_CLOSE
1396cycle = (uint32_t)hal_get_cycles();
1397if( cycle > DEBUG_DEV_NIC_TX )
1398printk("\n[%s] thread[%x,%x] socket[%x,%d] %s / destroy socket / cycle %d\n",
1399__FUNCTION__, pid, trdid, pid, fdid, socket_state_str( socket_state ), cycle );
1400#endif
1401        // directly destroy socket
1402        socket_destroy( file_xp );
1403
1404        return 0;
1405    }
1406    else                                             // TCP connected
1407    {
1408        // get pointers on NIC_TX[index] chdev
1409        xptr_t    tx_chdev_xp  = chdev_dir.nic_tx[nic_channel];
1410        chdev_t * tx_chdev_ptr = GET_PTR( tx_chdev_xp );
1411        cxy_t     tx_chdev_cxy = GET_CXY( tx_chdev_xp );
1412
1413        // get pointers on NIC_TX[channel] server thread
1414        tx_server_ptr = hal_remote_lpt( XPTR( tx_chdev_cxy , &tx_chdev_ptr->server ));
1415        tx_server_xp  = XPTR( tx_chdev_cxy , tx_server_ptr );
1416
1417        // register command arguments in socket descriptor
1418        hal_remote_s32( XPTR( file_cxy , &socket_ptr->tx_cmd    ), CMD_TX_CLOSE );
1419        hal_remote_s64( XPTR( file_cxy , &socket_ptr->tx_client ), client_xp );
1420        hal_remote_s32( XPTR( file_cxy , &socket_ptr->tx_valid  ), true );
1421       
1422        // unblock NIC_TX server thread
1423        thread_unblock( tx_server_xp , THREAD_BLOCKED_CLIENT );
1424 
1425        // release socket lock
1426        remote_queuelock_release( socket_lock_xp );
1427
1428#if DEBUG_SOCKET_CLOSE
1429cycle = (uint32_t)hal_get_cycles();
1430if( DEBUG_SOCKET_CLOSE < cycle )
1431printk("\n[%s] thread[%x,%x] socket[%x,%d] blocks on <IO> waiting close / cycle %d \n",
1432__FUNCTION__, pid, trdid, pid, fdid, cycle );
1433#endif
1434        // block itself and deschedule
1435        thread_block( client_xp , THREAD_BLOCKED_IO );
1436        sched_yield( "blocked in close" );
1437
1438#if DEBUG_SOCKET_CLOSE
1439cycle = (uint32_t)hal_get_cycles();
1440if( DEBUG_SOCKET_CLOSE < cycle )
1441printk("\n[%s] thread[%x,%x] socket[%x,%d] / resumes / cycle %d \n",
1442__FUNCTION__, pid, trdid, pid, fdid, cycle );
1443#endif
1444        // take socket lock
1445        remote_queuelock_acquire( socket_lock_xp );
1446
1447        // get socket state & command status
1448        socket_state = hal_remote_l32( XPTR( file_cxy , &socket_ptr->state ) );
1449        cmd_status   = hal_remote_l32( XPTR( file_cxy , &socket_ptr->tx_sts) );
1450        cmd_valid    = hal_remote_l32( XPTR( file_cxy , &socket_ptr->tx_valid ) );
1451
1452assert( (((socket_state == TCP_STATE_CLOSED) || (cmd_status != CMD_STS_SUCCESS))
1453         && (cmd_valid == false)),
1454"illegal socket state when client thread resumes after TX_CLOSE\n"
1455" socket_state = %s / cmd_status = %d / cmd_valid = %d\n",
1456socket_state_str(socket_state), cmd_status, cmd_valid );
1457
1458        // reset socket.tx_client
1459        hal_remote_s32( XPTR( file_cxy , &socket_ptr->tx_client ) , XPTR_NULL );
1460
1461        if( cmd_status != CMD_STS_SUCCESS )  // error reported
1462        {
1463            printk("\n[ERROR] in %s for command TX_CLOSE / socket[%x,%d] / thread[%x,%x]\n",
1464            __FUNCTION__, pid, fdid, pid, this->trdid );
1465            return -1;
1466        }
1467        else                                 // success
1468        {
1469
1470#if DEBUG_SOCKET_CLOSE
1471cycle = (uint32_t)hal_get_cycles();
1472if( DEBUG_SOCKET_CLOSE < cycle )
1473printk("\n[%s] thread[%x,%x] socket[%x,%d] / destroy socket / cycle %d\n",
1474__FUNCTION__, pid, trdid, pid, fdid, socket_state_str(socket_state) , cycle );
1475#endif
1476            // destroy socket
1477            socket_destroy( file_xp );
1478
1479            return 0;
1480        }
1481    }   // end if TCP
1482}  // end socket_close()
1483
1484////////////////////////////////////////////////////////////////////////////////////////
1485// This static and blocking function is executed by an user thread calling one of the
1486// four functions: socket_send() / socket_recv() / socket_sendto() / socket_recvfrom()
1487// It can be used for both UDP and TCP sockets.
1488////////////////////////////////////////////////////////////////////////////////////////
1489// @ is_send   : send when true / receive when false.
1490// @ fdid      : socket identifier.
1491// @ u_buf     : pointer on user buffer in user space.
1492// @ length    : number of bytes.
1493// @ explicit  : explicit remote IP address and port when true.
1494////////////////////////////////////////////////////////////////////////////////////////
1495// Implementation note : The behavior is different for SEND & RECV
1496// - For a SEND, the client thread checks that there is no TX command registered
1497//   in the socket. It registers the command arguments in the socket descriptor
1498//   (tx_client, tx_cmd, tx_buf, tx_len). Then the client thread unblocks the
1499//   TX server thread from the BLOCKED_CLIENT condition, blocks itself on the
1500//   BLOCKED_IO condition, and deschedules. It is unblocked by the TX server thread
1501//   when the last byte has been sent (for UDP) or acknowledged (for TCP).
1502//   When the client thread resumes, it reset the command in socket, and returns.
1503// - For a RECV, the client thread checks that there is no RX command registered
1504//   in the socket. It registers itself in socket (rx_client). It checks the status
1505//   of the receive buffer. It the rx_buf is empty, it blocks on the BLOCKED_IO
1506//   condition, and deschedules. It is unblocked by the RX server thread when an UDP
1507//   packet or TCP segment has been writen in the rx_buf. When it resumes, it moves
1508//   the available data from the rx_buf to the user buffer, reset its registration
1509//   in socket (reset the rx_buf for an UDP socket), and returns.
1510////////////////////////////////////////////////////////////////////////////////////////
1511int socket_move_data( bool_t     is_send,
1512                      uint32_t   fdid,
1513                      uint8_t  * u_buf,
1514                      uint32_t   length,
1515                      bool_t     explicit,
1516                      uint32_t   explicit_addr,
1517                      uint32_t   explicit_port )
1518{
1519    vfs_inode_type_t    file_type;       // file descriptor type
1520    socket_t          * socket_ptr;      // local pointer on socket descriptor
1521    uint32_t            socket_state;    // current socket state
1522    uint32_t            socket_type;     // socket type (UDP/TCP)
1523    uint32_t            nic_channel;     // NIC channel for this socket
1524    xptr_t              socket_lock_xp;  // extended pointer on socket lock
1525    xptr_t              file_xp;         // extended pointer on file descriptor
1526    vfs_file_t        * file_ptr;
1527    cxy_t               file_cxy;
1528    xptr_t              chdev_xp;        // extended pointer on NIC_TX[channel] chdev
1529    chdev_t           * chdev_ptr;
1530    cxy_t               chdev_cxy;
1531    uint32_t            remote_addr;
1532    uint32_t            remote_port;
1533    uint32_t            buf_status;      // number of bytes in rx_buf
1534    int32_t             moved_bytes;     // total number of moved bytes (fot return)
1535    xptr_t              server_xp;       // extended pointer on NIC_TX / NIC_RX server thread
1536    thread_t          * server_ptr;      // local pointer on NIC_TX / NIC_RX server thread
1537    kmem_req_t          req;             // KCM request for TX kernel buffer
1538    uint8_t           * tx_buf;          // kernel buffer for TX transfer
1539    bool_t              cmd_valid;       // from socket descriptor
1540    uint32_t            cmd_status;      // from socket descriptor
1541    uint32_t            tx_todo;         // from socket descriptor
1542
1543    thread_t  * this    = CURRENT_THREAD;
1544    process_t * process = this->process;
1545
1546    // build extended pointer on client thread
1547    xptr_t client_xp = XPTR( local_cxy , this );
1548
1549    // get pointers on file descriptor identifying the socket
1550    file_xp  = process_fd_get_xptr_from_local( process , fdid );
1551    file_ptr = GET_PTR( file_xp );
1552    file_cxy = GET_CXY( file_xp );
1553
1554    if( file_xp == XPTR_NULL )
1555    {
1556        printk("\n[ERROR] in %s : undefined fdid %d / thread%x,%x]\n",
1557        __FUNCTION__, fdid , process->pid, this->trdid );
1558        return -1;
1559    }
1560 
1561    // get file type and socket pointer
1562    file_type  = hal_remote_l32( XPTR( file_cxy , &file_ptr->type ) );
1563
1564    // get local pointer on socket
1565    socket_ptr = hal_remote_lpt( XPTR( file_cxy , &file_ptr->socket ) );
1566
1567    // check file descriptor type
1568    if( file_type != INODE_TYPE_SOCK )
1569    {
1570        printk("\n[ERROR] in %s : illegal file type %s / socket[%x,%d]\n",
1571        __FUNCTION__, vfs_inode_type_str(file_type), process->pid, fdid );
1572        return -1;
1573    }
1574
1575    // build extended pointer on lock protecting socket
1576    socket_lock_xp = XPTR( file_cxy , &socket_ptr->lock );
1577
1578    // take the socket lock
1579    remote_queuelock_acquire( socket_lock_xp );
1580
1581    // get socket type, state, and channel
1582    socket_type  = hal_remote_l32( XPTR( file_cxy , &socket_ptr->type ));
1583    socket_state = hal_remote_l32( XPTR( file_cxy , &socket_ptr->state ));
1584    nic_channel  = hal_remote_l32( XPTR( file_cxy , &socket_ptr->nic_channel ));
1585
1586    // handle the explicit remote address and port
1587    if( socket_type == SOCK_DGRAM )                  // UDP socket
1588    {
1589        if( socket_state == UDP_STATE_UNBOUND )
1590        {
1591            // release socket lock
1592            remote_queuelock_release( socket_lock_xp );
1593                   
1594            printk("\n[ERROR] in %s : SEND/RECV for socket[%x,%d] in state %s\n",
1595            __FUNCTION__, process->pid, fdid, socket_state_str(socket_state) );
1596            return -1;
1597        }
1598
1599        if( explicit )
1600        {
1601            // update remote IP address and port into socket descriptor
1602            hal_remote_s32( XPTR( file_cxy , &socket_ptr->remote_addr ), explicit_addr );
1603            hal_remote_s32( XPTR( file_cxy , &socket_ptr->remote_port ), explicit_port );
1604
1605            // update socket state if required
1606            if( socket_state == UDP_STATE_BOUND )
1607            {
1608                hal_remote_s32( XPTR( file_cxy , &socket_ptr->state ), UDP_STATE_ESTAB );
1609            }
1610        }
1611    }
1612    else                                            // TCP socket
1613    {
1614        if( explicit )
1615        {
1616            // get remote IP address and port from socket descriptor
1617            remote_addr = hal_remote_l32( XPTR( file_cxy , &socket_ptr->remote_addr ));
1618            remote_port = hal_remote_l32( XPTR( file_cxy , &socket_ptr->remote_port ));
1619
1620            if( (remote_addr != explicit_addr) || (remote_port != explicit_port) )
1621            {
1622                // release socket lock
1623                remote_queuelock_release( socket_lock_xp );
1624                   
1625                printk("\n[ERROR] in %s : wrong expliciy access for socket[%x,%d]\n",
1626                __FUNCTION__, process->pid, fdid );
1627                return -1;
1628            }
1629        }
1630    }
1631
1632    ///////////////////////////////////////////////////////
1633    if( is_send )                       // TX_SEND command
1634    {
1635
1636#if DEBUG_SOCKET_SEND
1637uint32_t    cycle = (uint32_t)hal_get_cycles();
1638if (DEBUG_SOCKET_SEND < cycle )
1639printk("\n[%s] thread[%x,%x] received SEND command for socket[%x,%d] / length %d / cycle %d\n",
1640__FUNCTION__, process->pid, this->trdid, process->pid, fdid, length, cycle );
1641#endif
1642        // check no previous TX command
1643        if( (hal_remote_l32( XPTR( file_cxy , &socket_ptr->tx_valid )) == true) || 
1644            (hal_remote_l64( XPTR( file_cxy , &socket_ptr->tx_client)) != XPTR_NULL) )
1645        { 
1646            // release socket lock
1647            remote_queuelock_release( socket_lock_xp );
1648                   
1649            printk("\n[ERROR] in %s : previous TX command / socket[%x,%d] / thread[%x,%x]\n",
1650            __FUNCTION__, process->pid, fdid, process->pid, this->trdid );
1651            return -1;
1652        }
1653
1654        // allocate a temporary kernel buffer
1655        req.type  = KMEM_KCM;
1656        req.order = bits_log2( length );
1657        req.flags = AF_NONE;
1658        tx_buf    = kmem_alloc( &req ); 
1659
1660        if( tx_buf == NULL )
1661        {
1662            // release socket lock
1663            remote_queuelock_release( socket_lock_xp );
1664                   
1665            printk("\n[ERROR] in %s : no memory for tx_buf / socket[%x,%d] / thread[%x,%x]\n",
1666            __FUNCTION__, process->pid, fdid, process->pid, this->trdid );
1667            return -1;
1668        }
1669
1670        // copy data from user u_buf to kernel tx_buf   
1671        hal_copy_from_uspace( XPTR( local_cxy , tx_buf ),
1672                              u_buf,
1673                              length );
1674
1675        // register command in socket descriptor
1676        hal_remote_s64( XPTR( file_cxy , &socket_ptr->tx_client ) , client_xp );
1677        hal_remote_s32( XPTR( file_cxy , &socket_ptr->tx_cmd    ) , CMD_TX_SEND );
1678        hal_remote_spt( XPTR( file_cxy , &socket_ptr->tx_buf    ) , tx_buf );
1679        hal_remote_s32( XPTR( file_cxy , &socket_ptr->tx_len    ) , length );
1680        hal_remote_s32( XPTR( file_cxy , &socket_ptr->tx_todo   ) , length );
1681        hal_remote_s32( XPTR( file_cxy , &socket_ptr->tx_valid  ) , true );
1682
1683        // release socket lock
1684        remote_queuelock_release( socket_lock_xp );
1685                   
1686        // get pointers on relevant chdev
1687        chdev_xp  = chdev_dir.nic_tx[nic_channel];
1688        chdev_ptr = GET_PTR( chdev_xp );
1689        chdev_cxy = GET_CXY( chdev_xp );
1690
1691        // get pointers on NIC_TX[channel] server thread
1692        server_ptr = hal_remote_lpt( XPTR( chdev_cxy , &chdev_ptr->server ));
1693        server_xp  = XPTR( chdev_cxy , server_ptr );
1694
1695        // unblocks the NIC_TX server thread
1696        thread_unblock( server_xp , THREAD_BLOCKED_CLIENT );
1697
1698#if DEBUG_SOCKET_SEND   
1699cycle = (uint32_t)hal_get_cycles();
1700if( DEBUG_SOCKET_SEND < cycle )
1701printk("\n[%s] thread[%x,%x] socket[%x,%d] register SEND => blocks on <IO> / cycle %d\n",
1702__FUNCTION__, process->pid, this->trdid, process->pid, fdid, cycle );
1703#endif
1704        // client thread blocks itself and deschedules
1705        thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_IO );
1706        sched_yield( "blocked in nic_io" );
1707
1708#if DEBUG_SOCKET_SEND   
1709cycle = (uint32_t)hal_get_cycles();
1710if( DEBUG_SOCKET_SEND < cycle )
1711printk("\n[%s] thread[%x,%x] socket[%x,%d] for SEND resumes / cycle %d\n",
1712__FUNCTION__, process->pid, this->trdid, process->pid, fdid, cycle );
1713#endif
1714        // take socket lock
1715        remote_queuelock_acquire( socket_lock_xp );
1716     
1717        // get tx_valid, tx_todo, and tx_sts
1718        tx_todo    = hal_remote_l32( XPTR( file_cxy , &socket_ptr->tx_todo ));
1719        cmd_valid  = hal_remote_l32( XPTR( file_cxy , &socket_ptr->tx_valid ));
1720        cmd_status = hal_remote_l32( XPTR( file_cxy , &socket_ptr->tx_sts ));
1721
1722        // reset tx_client in socket descriptor
1723        hal_remote_s64( XPTR( file_cxy , &socket_ptr->tx_client  ) , XPTR_NULL );
1724
1725        // release socket lock
1726        remote_queuelock_release( socket_lock_xp );
1727     
1728// check SEND command completed when TX client thread resumes
1729assert( (((tx_todo == 0) || (cmd_status != CMD_STS_SUCCESS)) && (cmd_valid == false)),
1730"illegal socket state when client thread resumes after TX_SEND\n"
1731" tx_todo = %d / tx_status = %d / tx_valid = %d\n",
1732tx_todo, cmd_status, cmd_valid );
1733
1734        // release the tx_buf
1735        req.ptr = tx_buf;
1736        kmem_free( &req );
1737
1738        if( cmd_status != CMD_STS_SUCCESS )
1739        {
1740
1741#if DEBUG_SOCKET_SEND
1742cycle = (uint32_t)hal_get_cycles();
1743if( DEBUG_SOCKET_RECV < cycle )
1744printk("\n[%s] error %s for TX_SEND / socket[%x,%d] / thread[%x,%x]\n",
1745__FUNCTION__, socket_cmd_sts_str(cmd_status), process->pid, fdid, process->pid, this->trdid );
1746#endif
1747            return -1;
1748        }
1749        else
1750        {
1751
1752#if DEBUG_SOCKET_SEND
1753cycle = (uint32_t)hal_get_cycles();
1754if (DEBUG_SOCKET_SEND < cycle )
1755printk("\n[%s] thread[%x,%x] success for SEND / socket[%x,%d] / length %d / cycle %d\n",
1756__FUNCTION__, process->pid, this->trdid, process->pid, fdid, length, cycle );
1757#endif
1758            return length;
1759        }
1760
1761    }  // end TX_SEND command
1762
1763    ////////////////////////////////////////////////////////
1764    else                                 // RX_RECV command
1765    {
1766
1767#if DEBUG_SOCKET_RECV
1768uint32_t    cycle = (uint32_t)hal_get_cycles();
1769if (DEBUG_SOCKET_SEND < cycle )
1770printk("\n[%s] thread[%x,%x] received RECV command for socket[%x,%d] / length %d / cycle %d\n",
1771__FUNCTION__, process->pid, this->trdid, process->pid, fdid, length, cycle );
1772#endif
1773        // check no previous RX command
1774        if( (hal_remote_l32( XPTR( file_cxy , &socket_ptr->rx_valid )) == true) || 
1775            (hal_remote_l64( XPTR( file_cxy , &socket_ptr->rx_client)) != XPTR_NULL) )
1776        {
1777            // release socket lock
1778            remote_queuelock_release( socket_lock_xp );
1779                   
1780            printk("\n[ERROR] in %s : previous RX command on socket[%x,%d] / thread[%x,%x]\n",
1781            __FUNCTION__, process->pid, fdid, process->pid, this->trdid );
1782            return -1;
1783        }
1784
1785        // return EOF for a TCP socket not in ESTAB state
1786        if( (socket_type == SOCK_STREAM ) && (socket_state != TCP_STATE_ESTAB) )
1787        { 
1788            // release socket lock
1789            remote_queuelock_release( socket_lock_xp );
1790                   
1791#if DEBUG_SOCKET_RECV 
1792uint32_t cycle = (uint32_t)hal_get_cycles();
1793if( DEBUG_SOCKET_RECV < cycle )
1794printk("\n[%s] thread[%x,%x] socket[%x,%d] TCP connection closed / cycle %d\n",
1795__FUNCTION__, process->pid, this->trdid, process->pid, fdid, cycle );
1796#endif
1797            return 0;
1798        }
1799        // build extended pointer on socket.rx_buf
1800        xptr_t rx_buf_xp   = XPTR( file_cxy , &socket_ptr->rx_buf );
1801
1802        // get rx_buf status
1803        buf_status = remote_buf_status( rx_buf_xp );
1804
1805        if( buf_status == 0 )
1806        {
1807            // registers RX_RECV command in socket descriptor
1808            hal_remote_s32( XPTR( file_cxy , &socket_ptr->rx_cmd    ) , CMD_RX_RECV );
1809            hal_remote_s64( XPTR( file_cxy , &socket_ptr->rx_client ) , client_xp );
1810            hal_remote_s32( XPTR( file_cxy , &socket_ptr->rx_valid  ) , true );
1811
1812            // release socket lock
1813            remote_queuelock_release( socket_lock_xp );
1814
1815#if DEBUG_SOCKET_RECV 
1816uint32_t cycle = (uint32_t)hal_get_cycles();
1817if( DEBUG_SOCKET_RECV < cycle )
1818printk("\n[%s] thread[%x,%x] socket[%x,%d] rx_buf empty => blocks on <IO> / cycle %d\n",
1819__FUNCTION__, process->pid, this->trdid, process->pid, fdid, cycle );
1820#endif
1821            // client thread blocks itself and deschedules
1822            thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_IO );
1823            sched_yield( "blocked in nic_io" );
1824
1825#if DEBUG_SOCKET_RECV 
1826cycle = (uint32_t)hal_get_cycles();
1827if( DEBUG_SOCKET_RECV < cycle )
1828printk("\n[%s] thread[%x,%x] socket[%x,%d] for RECV resumes / cycle %d\n",
1829__FUNCTION__, process->pid, this->trdid, process->pid, fdid, cycle );
1830#endif
1831            // take socket lock
1832            remote_queuelock_acquire( socket_lock_xp );
1833
1834            // get rx_sts and rx_buf status
1835            cmd_valid  = hal_remote_l32( XPTR( file_cxy , &socket_ptr->rx_valid ));
1836            cmd_status = hal_remote_l32( XPTR( file_cxy , &socket_ptr->rx_sts ));
1837            buf_status = remote_buf_status( rx_buf_xp );
1838       
1839assert( (((buf_status != 0) || (cmd_status != CMD_STS_SUCCESS)) && (cmd_valid == false)),
1840"illegal socket state when client thread resumes after RX_RECV\n"
1841" buf_status = %d / rx_sts = %d / rx_valid = %d\n",
1842buf_status , cmd_status , cmd_valid );
1843
1844            // reset rx_client in socket descriptor
1845            hal_remote_s64( XPTR( file_cxy , &socket_ptr->rx_client  ) , XPTR_NULL );
1846
1847            // reset rx_buf for an UDP socket
1848            if( socket_type == SOCK_DGRAM ) remote_buf_reset( rx_buf_xp );
1849
1850            // release socket lock
1851            remote_queuelock_release( socket_lock_xp );
1852
1853            if( cmd_status == CMD_STS_EOF )           // EOF (remote close) reported
1854            {
1855
1856#if DEBUG_SOCKET_RECV
1857cycle = (uint32_t)hal_get_cycles();
1858if( DEBUG_SOCKET_RECV < cycle )
1859printk("\n[%s] EOF for RX_RECV / socket[%x,%d] / thread[%x,%x]\n",
1860__FUNCTION__, process->pid, fdid, process->pid, this->trdid );
1861#endif
1862                return 0;
1863            }
1864            else if( cmd_status != CMD_STS_SUCCESS )   // other error reported
1865            {
1866
1867#if DEBUG_SOCKET_RECV
1868cycle = (uint32_t)hal_get_cycles();
1869if( DEBUG_SOCKET_RECV < cycle )
1870printk("\n[%s] error %s for RX_RECV / socket[%x,%d] / thread[%x,%x]\n",
1871__FUNCTION__, socket_cmd_sts_str(cmd_status), process->pid, fdid, process->pid, this->trdid );
1872#endif
1873                return -1;
1874            }
1875
1876        }
1877
1878        // number of bytes extracted from rx_buf cannot be larger than u_buf size
1879        moved_bytes = ( length < buf_status ) ? length : buf_status;
1880
1881        // move data from kernel rx_buf to user u_buf
1882        remote_buf_get_to_user( rx_buf_xp,
1883                                u_buf,
1884                                moved_bytes );
1885#if DEBUG_SOCKET_SEND
1886cycle = (uint32_t)hal_get_cycles();
1887if (DEBUG_SOCKET_SEND < cycle )
1888printk("\n[%s] thread[%x,%x] success for RECV / socket[%x,%d] / length %d / cycle %d\n",
1889__FUNCTION__, process->pid, this->trdid, process->pid, fdid, moved_bytes, cycle );
1890#endif
1891        return moved_bytes;
1892
1893    }  // end RX_RECV command
1894} // end socket_move_data()
1895
1896
1897///////////////////////////////////
1898int socket_send( uint32_t    fdid,
1899                 uint8_t   * u_buf,
1900                 uint32_t    length )
1901{
1902    int nbytes = socket_move_data( true,           // SEND
1903                                   fdid,
1904                                   u_buf,
1905                                   length,
1906                                   false, 0, 0 );  // no explicit remote socket
1907    return nbytes;
1908
1909}  // end socket_send()
1910
1911/////////////////////////////////////
1912int socket_sendto( uint32_t    fdid,
1913                   uint8_t   * u_buf,
1914                   uint32_t    length,
1915                   uint32_t    remote_addr,
1916                   uint32_t    remote_port )
1917{
1918    int nbytes = socket_move_data( true,          // SEND
1919                                   fdid,
1920                                   u_buf,
1921                                   length,
1922                                   true,          // explicit remote socket
1923                                   remote_addr,
1924                                   remote_port );
1925    return nbytes;
1926
1927}  // end socket_sendto()
1928
1929///////////////////////////////////
1930int socket_recv( uint32_t    fdid,
1931                 uint8_t   * u_buf,
1932                 uint32_t    length )
1933{
1934    int nbytes = socket_move_data( false,          // RECV
1935                                   fdid,
1936                                   u_buf,
1937                                   length,
1938                                   false, 0, 0 );  // no explicit remote socket
1939    return nbytes;
1940
1941} // end socket_recv()
1942
1943
1944///////////////////////////////////////
1945int socket_recvfrom( uint32_t    fdid,
1946                     uint8_t   * u_buf,
1947                     uint32_t    length,
1948                     uint32_t    remote_addr,
1949                     uint32_t    remote_port )
1950{
1951    int nbytes = socket_move_data( false,         // RECV
1952                                   fdid,
1953                                   u_buf,
1954                                   length,
1955                                   true,          // explicit remote socket
1956                                   remote_addr,
1957                                   remote_port );
1958    return nbytes;
1959
1960}  // end socket_recvfrom()
1961
1962////////////////////////////////////////////
1963void socket_display( xptr_t       socket_xp,
1964                     const char * func_str )
1965{
1966    socket_t * socket = GET_PTR( socket_xp );
1967    cxy_t      cxy    = GET_CXY( socket_xp );
1968
1969    pid_t      pid         = hal_remote_l32( XPTR( cxy , &socket->pid ));
1970    fdid_t     fdid        = hal_remote_l32( XPTR( cxy , &socket->fdid ));
1971    uint32_t   state       = hal_remote_l32( XPTR( cxy , &socket->state ));
1972    uint32_t   channel     = hal_remote_l32( XPTR( cxy , &socket->nic_channel ));
1973    uint32_t   local_addr  = hal_remote_l32( XPTR( cxy , &socket->local_addr ));
1974    uint32_t   local_port  = hal_remote_l32( XPTR( cxy , &socket->local_port ));
1975    uint32_t   remote_addr = hal_remote_l32( XPTR( cxy , &socket->remote_addr ));
1976    uint32_t   remote_port = hal_remote_l32( XPTR( cxy , &socket->remote_port ));
1977    uint32_t   tx_valid    = hal_remote_l32( XPTR( cxy , &socket->tx_valid ));
1978    uint32_t   tx_cmd      = hal_remote_l32( XPTR( cxy , &socket->tx_cmd ));
1979    uint32_t   tx_sts      = hal_remote_l32( XPTR( cxy , &socket->tx_sts ));
1980    uint32_t   tx_len      = hal_remote_l32( XPTR( cxy , &socket->tx_len ));
1981    uint32_t   tx_todo     = hal_remote_l32( XPTR( cxy , &socket->tx_todo ));
1982    uint32_t   tx_una      = hal_remote_l32( XPTR( cxy , &socket->tx_una ));         
1983    uint32_t   tx_nxt      = hal_remote_l32( XPTR( cxy , &socket->tx_nxt ));         
1984    uint32_t   tx_wnd      = hal_remote_l32( XPTR( cxy , &socket->tx_wnd ));         
1985    uint32_t   rx_valid    = hal_remote_l32( XPTR( cxy , &socket->rx_valid ));
1986    uint32_t   rx_cmd      = hal_remote_l32( XPTR( cxy , &socket->rx_cmd ));
1987    uint32_t   rx_sts      = hal_remote_l32( XPTR( cxy , &socket->rx_sts ));
1988    uint32_t   rx_nxt      = hal_remote_l32( XPTR( cxy , &socket->rx_nxt ));         
1989    uint32_t   rx_wnd      = hal_remote_l32( XPTR( cxy , &socket->rx_wnd ));         
1990    uint32_t   rx_irs      = hal_remote_l32( XPTR( cxy , &socket->rx_irs ));         
1991
1992    if( func_str == NULL )
1993    {
1994        printk("\n****** socket[%x,%d] / xptr[%x,%x]*****\n",
1995        pid, fdid, cxy, socket );
1996    }
1997    else
1998    {
1999        printk("\n***** socket[%x,%d] / xptr[%x,%x] / from %s *****\n",
2000        pid, fdid, cxy, socket, func_str );
2001    }
2002    printk(" - state %s / channel %d\n"
2003           " - local_addr %x / local_port %x\n"
2004           " - remote_addr %x / remote_port %x\n"
2005           " - tx_valid %d (%s) / tx_sts %d / tx_len %x / tx_todo %x\n"
2006           " - tx_una %x / tx_nxt %x / tx_wnd %x\n"
2007           " - rx_valid %d (%s) / rx_sts %d\n"
2008           " - rx_nxt %x / rx_wnd %x / rx_irs %x\n",
2009           socket_state_str(state), channel ,
2010           local_addr, local_port,
2011           remote_addr, remote_port,
2012           tx_valid, socket_cmd_type_str(tx_cmd), tx_sts, tx_len, tx_todo,
2013           tx_una, tx_nxt, tx_wnd,
2014           rx_valid, socket_cmd_type_str(rx_cmd), rx_sts,
2015           rx_nxt, rx_wnd, rx_irs );
2016
2017}  // end socket_display()
2018
2019
2020
2021
2022
Note: See TracBrowser for help on using the repository browser.