/* * ksocket.c - kernel socket implementation. * * Authors Alain Greiner (2016,2017,2018,2019,2020) * * Copyright (c) UPMC Sorbonne Universites * * This file is part of ALMOS-MKH. * * ALMOS-MKH is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by * the Free Software Foundation; version 2.0 of the License. * * ALMOS-MKH is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with ALMOS-MKH.; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include ////////////////////////////////////////////////////////////////////////////////////// // Extern global variables ////////////////////////////////////////////////////////////////////////////////////// extern chdev_directory_t chdev_dir; // allocated in kernel_init.c /////////////////////////////////////////// char * socket_domain_str( uint32_t domain ) { switch( domain ) { case AF_INET : return "INET"; case AF_LOCAL : return "LOCAL"; default : return "undefined"; } } /////////////////////////////////////// char * socket_type_str( uint32_t type ) { switch( type ) { case SOCK_DGRAM : return "UDP"; case SOCK_STREAM : return "TCP"; default : return "undefined"; } } ///////////////////////////////////////// char * socket_state_str( uint32_t state ) { switch( state ) { case UDP_STATE_UNBOUND : return "UDP_UNBOUND"; case UDP_STATE_BOUND : return "UDP_BOUND"; case UDP_STATE_ESTAB : return "UDP_ESTAB"; case TCP_STATE_UNBOUND : return "TCP_UNBOUND"; case TCP_STATE_BOUND : return "TCP_BOUND"; case TCP_STATE_LISTEN : return "TCP_LISTEN"; case TCP_STATE_SYN_SENT : return "TCP_SYN_SENT"; case TCP_STATE_SYN_RCVD : return "TCP_SYN_RCVD"; case TCP_STATE_ESTAB : return "TCP_ESTAB"; case TCP_STATE_FIN_WAIT1 : return "TCP_FIN_WAIT1"; case TCP_STATE_FIN_WAIT2 : return "TCP_FIN_WAIT2"; case TCP_STATE_CLOSING : return "TCP_CLOSING"; case TCP_STATE_TIME_WAIT : return "TCP_TIME_WAIT"; case TCP_STATE_CLOSE_WAIT : return "TCP_CLOSE_WAIT"; case TCP_STATE_LAST_ACK : return "TCP_LAST_ACK"; case TCP_STATE_CLOSED : return "TCP_CLOSED"; default : return "undefined"; } } /////////////////////////////////////////// char * socket_cmd_type_str( uint32_t type ) { switch( type ) { case CMD_TX_CONNECT : return "TX_CONNECT"; case CMD_TX_ACCEPT : return "TX_ACCEPT"; case CMD_TX_CLOSE : return "TX_CLOSE"; case CMD_TX_SEND : return "TX_SEND"; case CMD_RX_ACCEPT : return "RX_ACCEPT"; case CMD_RX_RECV : return "RX_RECV"; default : return "undefined"; } } /////////////////////////////////////////// char * socket_cmd_sts_str( uint32_t sts ) { switch( sts ) { case CMD_STS_SUCCESS : return "SUCCESS"; case CMD_STS_EOF : return "EOF"; case CMD_STS_RST : return "RST"; case CMD_STS_BADACK : return "BADACK"; case CMD_STS_BADSTATE : return "BADSTATE"; case CMD_STS_BADCMD : return "BADCMD"; default : return "undefined"; } } /////////////////////////////////////////////////////////////////////////////////////////// // This static function implements the alarm handler used by a TX client thread to // handle a retransmission timeout for a TX command (ACCEPT / CONNECT / CLOSE / SEND). // The argument is actually an extended pointer on the involved socket. // First, it updates the retransmission timeout. Then, it get the type of TX command, // and request the NIC_TX server thread to re-send the unacknowledged segment. /////////////////////////////////////////////////////////////////////////////////////////// // @ sock_xp : extended pointer on the involved socket. /////////////////////////////////////////////////////////////////////////////////////////// static void __attribute__((noinline)) socket_alarm_handler( xptr_t sock_xp ) { // get cluster and local pointer on socket descriptor socket_t * sock_ptr = GET_PTR( sock_xp ); cxy_t sock_cxy = GET_CXY( sock_xp ); #if DEBUG_SOCKET_ALARM uint32_t cycle = (uint32_t)hal_get_cycles(); #endif // build extended pointer on lock protecting socket xptr_t socket_lock_xp = XPTR( sock_cxy , &sock_ptr->lock ); // take the socket lock remote_queuelock_acquire( socket_lock_xp ); // get relevant infos from socket descriptor uint32_t tx_cmd = hal_remote_l32( XPTR( sock_cxy , &sock_ptr->tx_cmd )); uint32_t channel = hal_remote_l32( XPTR( sock_cxy , &sock_ptr->nic_channel )); xptr_t thread_xp = hal_remote_l64( XPTR( sock_cxy , &sock_ptr->tx_client )); assert( __FUNCTION__, (thread_xp != XPTR_NULL), "illegal tx_client field for a retransmission timeout" ); // get TX client thread cluster cxy_t thread_cxy = GET_CXY( thread_xp ); assert( __FUNCTION__, (thread_cxy == local_cxy), "the client thread must be running in the same cluster as the alarm handler" ); // get pointers on NIC_TX[index] chdev xptr_t tx_chdev_xp = chdev_dir.nic_tx[channel]; chdev_t * tx_chdev_ptr = GET_PTR( tx_chdev_xp ); cxy_t tx_chdev_cxy = GET_CXY( tx_chdev_xp ); // get pointers on NIC_TX[channel] server thread thread_t * tx_server_ptr = hal_remote_lpt( XPTR( tx_chdev_cxy , &tx_chdev_ptr->server )); xptr_t tx_server_xp = XPTR( tx_chdev_cxy , tx_server_ptr ); // update the date in alarm alarm_update( thread_xp , hal_get_cycles() + CONFIG_SOCK_RETRY_TIMEOUT ); ////////////////////////////// if( tx_cmd == CMD_TX_CONNECT ) { #if DEBUG_SOCKET_ALARM if( DEBUG_SOCKET_ALARM < cycle ) printk("\n[%s] rings for CONNECT : request a new SYN segment / cycle %d\n", __FUNCTION__ , cycle ); #endif // set tx_valid to request the NIC_TX server to send a new SYN segment hal_remote_s32( XPTR( sock_cxy , &sock_ptr->tx_valid ) , true ); // update socket state hal_remote_s32( XPTR( sock_cxy , &sock_ptr->state ) , TCP_STATE_BOUND ); // unblock the NIC_TX server thread thread_unblock( tx_server_xp , THREAD_BLOCKED_CLIENT ); } ///////////////////////////// if( tx_cmd == CMD_TX_ACCEPT ) { #if DEBUG_SOCKET_ALARM if( DEBUG_SOCKET_ALARM < cycle ) printk("\n[%s] rings for ACCEPT : request a new SYN-ACK segment / cycle %d\n", __FUNCTION__ , cycle ); #endif // set tx_valid to request the NIC_TX server to send a new SYN-ACK segment hal_remote_s32( XPTR( sock_cxy , &sock_ptr->tx_valid ) , true ); // update socket state hal_remote_s32( XPTR( sock_cxy , &sock_ptr->state ) , TCP_STATE_SYN_RCVD ); // unblock the NIC_TX server thread thread_unblock( tx_server_xp , THREAD_BLOCKED_CLIENT ); } //////////////////////////// if( tx_cmd == CMD_TX_CLOSE ) { #if DEBUG_SOCKET_ALARM if( DEBUG_SOCKET_ALARM < cycle ) printk("\n[%s] rings for CLOSE : request a new FIN-ACK segment / cycle %d\n", __FUNCTION__ , cycle ); #endif // set tx_valid to request the NIC_TX server to send a new FIN-ACK segment hal_remote_s32( XPTR( sock_cxy , &sock_ptr->tx_valid ) , true ); // update socket state hal_remote_s32( XPTR( sock_cxy , &sock_ptr->state ) , TCP_STATE_ESTAB ); // unblock the NIC_TX server thread thread_unblock( tx_server_xp , THREAD_BLOCKED_CLIENT ); } /////////////////////////// if( tx_cmd == CMD_TX_SEND ) { // get get relevant infos from socket pointer uint32_t tx_una = hal_remote_l32( XPTR( sock_cxy , &sock_ptr->tx_una )); uint32_t tx_ack = hal_remote_l32( XPTR( sock_cxy , &sock_ptr->tx_ack )); uint32_t tx_len = hal_remote_l32( XPTR( sock_cxy , &sock_ptr->tx_len )); #if DEBUG_SOCKET_ALARM if( DEBUG_SOCKET_ALARM < cycle ) printk("\n[%s] rings for SEND : request %d bytes / cycle %d\n", __FUNCTION__ , tx_len , cycle ); #endif // update command fields in socket hal_remote_s32( XPTR( sock_cxy , &sock_ptr->tx_nxt ) , tx_una ); hal_remote_s32( XPTR( sock_cxy , &sock_ptr->tx_todo ) , tx_len - tx_ack ); hal_remote_s32( XPTR( sock_cxy , &sock_ptr->tx_valid ) , true ); // unblock the NIC_TX server thread thread_unblock( tx_server_xp , THREAD_BLOCKED_CLIENT ); } // release the socket lock remote_queuelock_release( socket_lock_xp ); } // end socket_alarm_handler() ///////////////////////////////////////////////////////////////////////////////////////// // This static function registers the socket defined by the argument into // the lists of sockets attached to the relevant NIC_TX and NIC_TX chdevs identified // by the argument, and update the channel field in socket descriptor. ///////////////////////////////////////////////////////////////////////////////////////// // @ socket_xp : [in] extended pointer on socket descriptor. // @ channel : [in] NIC channel index. ///////////////////////////////////////////////////////////////////////////////////////// static void socket_link_to_servers( xptr_t socket_xp, uint32_t channel ) { cxy_t socket_cxy = GET_CXY( socket_xp ); socket_t * socket_ptr = GET_PTR( socket_xp ); #if DEBUG_SOCKET_LINK thread_t * this = CURRENT_THREAD; process_t * process = this->process; pid_t socket_pid = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->pid )); fdid_t socket_fdid = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->pid )); uint32_t cycle = (uint32_t)hal_get_cycles(); if( DEBUG_SOCKET_LINK < cycle ) printk("\n[%s] thread[%x,%x] enter for socket[%x,%d] / cycle %d\n", __FUNCTION__, process->pid, this->trdid, socket_pid, socket_fdid, cycle ); #endif // get pointers on NIC_TX[channel] chdev xptr_t tx_chdev_xp = chdev_dir.nic_tx[channel]; chdev_t * tx_chdev_ptr = GET_PTR( tx_chdev_xp ); cxy_t tx_chdev_cxy = GET_CXY( tx_chdev_xp ); // build various TX extended pointers xptr_t tx_root_xp = XPTR( tx_chdev_cxy , &tx_chdev_ptr->wait_root ); xptr_t tx_lock_xp = XPTR( tx_chdev_cxy , &tx_chdev_ptr->wait_lock ); xptr_t tx_list_xp = XPTR( socket_cxy , &socket_ptr->tx_list ); // get pointers on NIC_RX[channel] chdev xptr_t rx_chdev_xp = chdev_dir.nic_rx[channel]; chdev_t * rx_chdev_ptr = GET_PTR( rx_chdev_xp ); cxy_t rx_chdev_cxy = GET_CXY( rx_chdev_xp ); // build various RX extended pointers xptr_t rx_root_xp = XPTR( rx_chdev_cxy , &rx_chdev_ptr->wait_root ); xptr_t rx_lock_xp = XPTR( rx_chdev_cxy , &rx_chdev_ptr->wait_lock ); xptr_t rx_list_xp = XPTR( socket_cxy , &socket_ptr->rx_list ); // register socket in the NIC_TX[channel] chdev clients queue remote_busylock_acquire( tx_lock_xp ); xlist_add_last( tx_root_xp , tx_list_xp ); remote_busylock_release( tx_lock_xp ); // register socket in the NIC_RX[channel] chdev clients queue remote_busylock_acquire( rx_lock_xp ); xlist_add_last( rx_root_xp , rx_list_xp ); remote_busylock_release( rx_lock_xp ); #if DEBUG_SOCKET_LINK cycle = (uint32_t)hal_get_cycles(); if( DEBUG_SOCKET_LINK < cycle ) printk("\n[%s] thread[%x,%x] linked socket[%x,%d] to channel %d / cycle %d\n", __FUNCTION__, process->pid, this->trdid, process->pid, socket_pid, socket_fdid, channel, cycle ); #endif } // end socket_link_to_servers() ///////////////////////////////////////////////////////////////////////////////////////// // This function removes the socket defined by the argument from the // lists of sockets attached to the relevant NIC_TX and NIC_TX chdevs. ///////////////////////////////////////////////////////////////////////////////////////// // @ socket_xp : [in] extended pointer on socket descriptor ///////////////////////////////////////////////////////////////////////////////////////// static void socket_unlink_from_servers( xptr_t socket_xp ) { cxy_t socket_cxy = GET_CXY( socket_xp ); socket_t * socket_ptr = GET_PTR( socket_xp ); #if DEBUG_SOCKET_LINK thread_t * this = CURRENT_THREAD; process_t * process = this->process; pid_t socket_pid = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->pid )); fdid_t socket_fdid = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->pid )); uint32_t cycle = (uint32_t)hal_get_cycles(); if( DEBUG_SOCKET_LINK < cycle ) printk("\n[%s] thread[%x,%x] enter for socket[%x,%d] / cycle %d\n", __FUNCTION__, process->pid, this->trdid, socket_pid, socket_fdid, cycle ); #endif // get NIC channel uint32_t channel = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->nic_channel )); // get pointers on NIC_TX[channel] chdev xptr_t tx_chdev_xp = chdev_dir.nic_tx[channel]; chdev_t * tx_chdev_ptr = GET_PTR( tx_chdev_xp ); cxy_t tx_chdev_cxy = GET_CXY( tx_chdev_xp ); // build various TX extended pointers xptr_t tx_lock_xp = XPTR( tx_chdev_cxy , &tx_chdev_ptr->wait_lock ); xptr_t tx_list_xp = XPTR( socket_cxy , &socket_ptr->tx_list ); xptr_t tx_list_next = hal_remote_l64( tx_list_xp ); xptr_t tx_list_pred = hal_remote_l64( tx_list_xp + sizeof(xptr_t) ); // get pointers on NIC_RX[channel] chdev xptr_t rx_chdev_xp = chdev_dir.nic_rx[channel]; chdev_t * rx_chdev_ptr = GET_PTR( rx_chdev_xp ); cxy_t rx_chdev_cxy = GET_CXY( rx_chdev_xp ); // build various RX extended pointers xptr_t rx_lock_xp = XPTR( rx_chdev_cxy , &rx_chdev_ptr->wait_lock ); xptr_t rx_list_xp = XPTR( socket_cxy , &socket_ptr->rx_list ); xptr_t rx_list_next = hal_remote_l64( rx_list_xp ); xptr_t rx_list_pred = hal_remote_l64( rx_list_xp + sizeof(xptr_t) ); // remove socket from the NIC_TX[channel] chdev clients queue if registered if( (tx_list_next != XPTR_NULL) || (tx_list_pred != XPTR_NULL) ) { remote_busylock_acquire( tx_lock_xp ); xlist_unlink( tx_list_xp ); remote_busylock_release( tx_lock_xp ); } // remove socket from the NIC_RX[channel] chdev clients queue if registered if( (rx_list_next != XPTR_NULL) || (rx_list_pred != XPTR_NULL) ) { remote_busylock_acquire( rx_lock_xp ); xlist_unlink( rx_list_xp ); remote_busylock_release( rx_lock_xp ); } #if DEBUG_SOCKET_LINK cycle = (uint32_t)hal_get_cycles(); if( DEBUG_SOCKET_LINK < cycle ) printk("\n[%s] thread[%x,%x] unlinked socket [%x,%d] / cycle %d\n", __FUNCTION__, process->pid, this->trdid, socket_pid, socket_fdid, cycle ); #endif } // end socket_unlink_from_servers() ///////////////////////////////////////////////////////////////////////////////////////// // This function registers the socket defined by the argument into the // list of listening sockets rooted in the nic_rx[0] chdev. ///////////////////////////////////////////////////////////////////////////////////////// // @ socket_xp : [in] extended pointer on socket descriptor ///////////////////////////////////////////////////////////////////////////////////////// static void socket_link_to_listen( xptr_t socket_xp ) { // get socket cluster and local pointer socket_t * socket_ptr = GET_PTR( socket_xp ); cxy_t socket_cxy = GET_CXY( socket_xp ); // get pointers on NIC_RX[0] chdev xptr_t rx0_chdev_xp = chdev_dir.nic_rx[0]; chdev_t * rx0_chdev_ptr = GET_PTR( rx0_chdev_xp ); cxy_t rx0_chdev_cxy = GET_CXY( rx0_chdev_xp ); // build extended pointers on list of listening sockets xptr_t rx0_root_xp = XPTR( rx0_chdev_cxy , &rx0_chdev_ptr->ext.nic.root ); xptr_t rx0_lock_xp = XPTR( rx0_chdev_cxy , &rx0_chdev_ptr->ext.nic.lock ); // build extended pointer on socket rx_list field xptr_t list_entry_xp = XPTR( socket_cxy , &socket_ptr->rx_list ); // register socket in listening sockets list remote_busylock_acquire( rx0_lock_xp ); xlist_add_last( rx0_root_xp , list_entry_xp ); remote_busylock_release( rx0_lock_xp ); } // end socket_link_to_listen() ///////////////////////////////////////////////////////////////////////////////////////// // This function removes the socket defined by the argument from the // list of listening sockets rooted in the nic_rx[0] chdev. ///////////////////////////////////////////////////////////////////////////////////////// // @ socket_xp : [in] extended pointer on socket descriptor ///////////////////////////////////////////////////////////////////////////////////////// static void socket_unlink_from_listen( xptr_t socket_xp ) { // get socket cluster and local pointer socket_t * socket_ptr = GET_PTR( socket_xp ); cxy_t socket_cxy = GET_CXY( socket_xp ); // get pointers on NIC_RX[0] chdev xptr_t rx0_chdev_xp = chdev_dir.nic_rx[0]; chdev_t * rx0_chdev_ptr = GET_PTR( rx0_chdev_xp ); cxy_t rx0_chdev_cxy = GET_CXY( rx0_chdev_xp ); // build extended pointers on lock protecting list of listening sockets xptr_t rx0_lock_xp = XPTR( rx0_chdev_cxy , &rx0_chdev_ptr->ext.nic.lock ); // build extended pointer on socket rx_list field xptr_t list_entry_xp = XPTR( socket_cxy , &socket_ptr->rx_list ); // register socket in listening sockets list remote_busylock_acquire( rx0_lock_xp ); xlist_unlink( list_entry_xp ); remote_busylock_release( rx0_lock_xp ); } // end socket_unlink_from_listen() ///////////////////////////////////////////////////////////////////////////////////////// // This static function is called by the socket_build() and socket_accept() functions. // It allocates memory in cluster defined by the argument for all structures // associated to a socket: file descriptor, socket descriptor, RX buffer, R2T queue, // and CRQ queue. It allocates an fdid, and register it in the process fd_array. // It initialise the socket desccriptor static fields, other than local_addr, // local_port, remote_addr, remote_port), and set the socket state to UNBOUND. // It returns the local pointer on socket descriptor and the fdid value in buffers // defined by the & arguments. ///////////////////////////////////////////////////////////////////////////////////////// // @ cxy : [in] target cluster fo socket & file descriptors. // @ domain : [in] socket domain. // @ type : [in] socket type. // @ socket_ptr : [out] local pointer on buffer for socket pointer. // @ fdid_ptr : [out] local pointer on buffer for fdid value. // # return 0 if success / return -1 if no memory. ///////////////////////////////////////////////////////////////////////////////////////// static error_t socket_create( cxy_t cxy, uint32_t domain, uint32_t type, socket_t ** socket_ptr, uint32_t * fdid_ptr ) { uint32_t fdid; socket_t * socket; vfs_file_t * file; uint32_t state; void * tx_buf; error_t error; thread_t * this = CURRENT_THREAD; process_t * process = this->process; #if DEBUG_SOCKET_CREATE || DEBUG_SOCKET_ERROR uint32_t cycle = (uint32_t)hal_get_cycles(); #endif #if DEBUG_SOCKET_CREATE if( DEBUG_SOCKET_CREATE < cycle ) printk("\n[%s] thread[%x,%x] enter / cycle %d\n", __FUNCTION__, process->pid, this->trdid, cycle ); #endif // 1. allocate memory for socket descriptor socket = kmem_remote_alloc( cxy , bits_log2(sizeof(socket_t)) , AF_ZERO ); if( socket == NULL ) { #if DEBUG_SOCKET_ERROR printk("\n[ERROR] in %s : cannot allocate socket descriptor / thread[%x,%x] / cycle %d\n", __FUNCTION__, process->pid, this->trdid, cycle ); #endif return -1; } // 2. allocate memory for rx_buf data buffer error = remote_buf_init( XPTR( cxy , &socket->rx_buf ), CONFIG_SOCK_RX_BUF_ORDER ); if( error ) { #if DEBUG_SOCKET_ERROR printk("\n[ERROR] in %s : no memory for rx_buf / thread[%x,%x] / cycle %d\n", __FUNCTION__, process->pid, this->trdid, cycle ); #endif kmem_remote_free( cxy , socket , bits_log2(sizeof(socket_t)) ); // 1 return -1; } // 3. allocate memory for tx_buf tx_buf = kmem_remote_alloc( cxy , CONFIG_SOCK_TX_BUF_ORDER , AF_NONE ); if( tx_buf == NULL ) { #if DEBUG_SOCKET_ERROR printk("\n[ERROR] in %s : no memory for tx_buf / thread[%x,%x] / cycle %d\n", __FUNCTION__, process->pid, this->trdid, cycle ); #endif remote_buf_release_data( XPTR( cxy , &socket->rx_buf ) ); // 2 kmem_remote_free( cxy , socket , bits_log2(sizeof(socket_t)) ); // 1 return -1; } // 4. allocate memory for r2tq queue error = remote_buf_init( XPTR( cxy , &socket->r2tq ), bits_log2( CONFIG_SOCK_R2T_BUF_SIZE ) ); if( error ) { #if DEBUG_SOCKET_ERROR printk("\n[ERROR] in %s : cannot allocate R2T queue / thread[%x,%x] / cycle %d\n", __FUNCTION__, process->pid, this->trdid, cycle ); #endif kmem_remote_free( cxy , tx_buf , CONFIG_SOCK_TX_BUF_ORDER ); // 3 remote_buf_release_data( XPTR( cxy , &socket->rx_buf ) ); // 2 kmem_remote_free( cxy , socket , bits_log2(sizeof(socket_t)) ); // 1 return -1; } // don't allocate memory for CRQ queue / done by the socket_listen function // 5. allocate memory for file descriptor file = kmem_remote_alloc( cxy , bits_log2(sizeof(vfs_file_t)) , AF_ZERO ); if( file == NULL ) { #if DEBUG_SOCKET_ERROR printk("\n[ERROR] in %s : cannot allocate file descriptor / thread[%x,%x] / cycle %d\n", __FUNCTION__, process->pid, this->trdid, cycle ); #endif remote_buf_release_data( XPTR( cxy , &socket->r2tq ) ); // 4 kmem_remote_free( cxy , tx_buf , CONFIG_SOCK_TX_BUF_ORDER ); // 3 remote_buf_release_data( XPTR( cxy , &socket->rx_buf ) ); // 2 kmem_remote_free( cxy , socket , bits_log2(sizeof(socket_t)) ); // 1 return -1; } // 6. get an fdid value, and register file descriptor in fd_array[] error = process_fd_register( process->ref_xp, XPTR( cxy , file ), &fdid ); if ( error ) { #if DEBUG_SOCKET_ERROR if( DEBUG_SOCKET_ERROR < cycle ) printk("\n[ERROR] in %s : cannot register file descriptor / thread[%x,%x] / cycle %d\n", __FUNCTION__, process->pid, this->trdid, cycle ); #endif kmem_remote_free( cxy , file , bits_log2(sizeof(vfs_file_t)) ); // 5 remote_buf_release_data( XPTR( cxy , &socket->r2tq ) ); // 4 kmem_remote_free( cxy , tx_buf , CONFIG_SOCK_TX_BUF_ORDER ); // 3 remote_buf_release_data( XPTR( cxy , &socket->rx_buf ) ); // 2 kmem_remote_free( cxy , socket , bits_log2(sizeof(socket_t)) ); // 1 return -1; } state = (type == SOCK_STREAM) ? TCP_STATE_UNBOUND : UDP_STATE_UNBOUND; // initialise socket descriptor hal_remote_s32( XPTR( cxy , &socket->pid ) , process->pid ); hal_remote_s32( XPTR( cxy , &socket->fdid ) , fdid ); hal_remote_s32( XPTR( cxy , &socket->domain ) , domain ); hal_remote_s32( XPTR( cxy , &socket->type ) , type ); hal_remote_s32( XPTR( cxy , &socket->state ) , state ); hal_remote_s64( XPTR( cxy , &socket->tx_client ) , XPTR_NULL ); hal_remote_s64( XPTR( cxy , &socket->rx_client ) , XPTR_NULL ); hal_remote_s32( XPTR( cxy , &socket->tx_valid ) , false ); hal_remote_s32( XPTR( cxy , &socket->rx_valid ) , false ); hal_remote_s32( XPTR( cxy , &socket->nic_channel ) , 0 ); hal_remote_spt( XPTR( cxy , &socket->tx_buf ) , tx_buf ); // initialize file descriptor hal_remote_s32( XPTR( cxy , &file->type ) , FILE_TYPE_SOCK ); hal_remote_spt( XPTR( cxy , &file->socket ) , socket ); // initialize socket lock remote_queuelock_init( XPTR( cxy , &socket->lock ) , LOCK_SOCKET_STATE ); #if DEBUG_SOCKET_CREATE cycle = (uint32_t)hal_get_cycles(); if( DEBUG_SOCKET_CREATE < cycle ) printk("\n[%s] thread[%x,%x] exit / socket[%x,%d] / xptr[%x,%x] / cycle %d\n", __FUNCTION__, process->pid, this->trdid, process->pid, fdid, cxy, socket, cycle ); #endif // return success *socket_ptr = socket; *fdid_ptr = fdid; return 0; } // end socket_create ///////////////////////////////////////////////////////////////////////////////////////// // This static function is called by the socket_close() function to destroy a socket // identified by the argument. // It remove the associated file from the reference process fd_array. It unlink the // socket from the NIC_TX [k] and NIC_RX[k] chdevs. It release all memory allocated // for the structures associated to the target socket : file descriptor, socket // descriptor, RX buffer, R2T queue, CRQ queue. ///////////////////////////////////////////////////////////////////////////////////////// // @ file_xp : extended pointer on the file descriptor. ///////////////////////////////////////////////////////////////////////////////////////// static void socket_destroy( xptr_t file_xp ) { thread_t * this = CURRENT_THREAD; process_t * process = this->process; // check file_xp argument assert( __FUNCTION__, (file_xp != XPTR_NULL), "illegal argument\n" ); // get cluster & local pointer for file descriptor vfs_file_t * file_ptr = GET_PTR( file_xp ); cxy_t file_cxy = GET_CXY( file_xp ); // get local pointer for socket and file type socket_t * socket_ptr = hal_remote_lpt( XPTR( file_cxy , &file_ptr->socket ) ); uint32_t file_type = hal_remote_l32( XPTR( file_cxy , &file_ptr->type ) ); // check file descriptor type assert( __FUNCTION__, (file_type == FILE_TYPE_SOCK), "illegal file type\n" ); // get socket nic_channel, state, pid and fdid uint32_t state = hal_remote_l32( XPTR( file_cxy , &socket_ptr->state )); uint32_t fdid = hal_remote_l32( XPTR( file_cxy , &socket_ptr->fdid )); #if DEBUG_SOCKET_DESTROY uint32_t cycle = (uint32_t)hal_get_cycles(); pid_t pid = hal_remote_l32( XPTR( file_cxy , &socket_ptr->pid )); if( DEBUG_SOCKET_DESTROY < cycle ) printk("\n[%s] thread[%x,%x] enter / file[%x,%x] / cycle %d\n", __FUNCTION__, process->pid, this->trdid, pid, fdid, cycle ); #endif // remove socket from NIC_TX & NIC_RX chdev queues // or from the listening sockets list if( state == TCP_STATE_LISTEN ) { socket_unlink_from_listen( XPTR( file_cxy , socket_ptr ) ); } else { socket_unlink_from_servers( XPTR( file_cxy , socket_ptr ) ); } // remove the file descriptor from the fd_array process_fd_remove( process->owner_xp , fdid ); // release memory allocated for file descriptor kmem_remote_free( file_cxy , file_ptr , bits_log2(sizeof(vfs_file_t)) ); // release memory allocated for buffers attached to socket descriptor remote_buf_release_data( XPTR( file_cxy , &socket_ptr->crqq ) ); remote_buf_release_data( XPTR( file_cxy , &socket_ptr->r2tq ) ); remote_buf_release_data( XPTR( file_cxy , &socket_ptr->rx_buf ) ); // release memory allocated for socket descriptor kmem_remote_free( file_cxy , socket_ptr , bits_log2(sizeof(socket_t)) ); #if DEBUG_SOCKET_DESTROY cycle = (uint32_t)hal_get_cycles(); if( DEBUG_SOCKET_DESTROY < cycle ) printk("\n[%s] thread[%x,%x] exit / cycle %d\n", __FUNCTION__, process->pid, this->trdid, cycle ); #endif } // end socket_destroy() //////////////////////////////////////////////// void socket_put_r2t_request( xptr_t queue_xp, uint8_t flags, uint32_t channel ) { xptr_t chdev_xp; cxy_t chdev_cxy; chdev_t * chdev_ptr; thread_t * server_ptr; xptr_t server_xp; while( 1 ) { // try to register R2T request error_t error = remote_buf_put_from_kernel( queue_xp, &flags, 1 ); if( error ) { // queue full => wait and retry sched_yield( "waiting R2T queue" ); } else { // get NIC_TX chdev pointers chdev_xp = chdev_dir.nic_tx[channel]; chdev_cxy = GET_CXY( chdev_xp ); chdev_ptr = GET_PTR( chdev_xp ); // get NIC_TX server thread pointers server_ptr = hal_remote_lpt( XPTR( chdev_cxy , &chdev_ptr->server ) ); server_xp = XPTR( chdev_cxy , server_ptr ); // unblocks NIC_TX server thread thread_unblock( server_xp , THREAD_BLOCKED_CLIENT ); return; } } } // end socket_put_r2t_request() /////////////////////////////////////////////////// error_t socket_get_r2t_request( xptr_t queue_xp, uint8_t * flags ) { // get one request from R2T queue return remote_buf_get_to_kernel( queue_xp, flags, 1 ); } // end socket_get_r2T_request() /////////////////////////////////////////////////// error_t socket_put_crq_request( xptr_t queue_xp, uint32_t remote_addr, uint32_t remote_port, uint32_t remote_iss, uint32_t remote_window ) { connect_request_t req; // build request req.addr = remote_addr; req.port = remote_port; req.iss = remote_iss; req.window = remote_window; // try to register request in CRQ return remote_buf_put_from_kernel( queue_xp, (uint8_t *)(&req), sizeof(connect_request_t) ); } // end socket_put_crq_request() //////////////////////////////////////////////////// error_t socket_get_crq_request( xptr_t queue_xp, uint32_t * remote_addr, uint32_t * remote_port, uint32_t * remote_iss, uint32_t * remote_window ) { connect_request_t req; error_t error; // get request from CRQ error = remote_buf_get_to_kernel( queue_xp, (uint8_t *)(&req), sizeof(connect_request_t) ); // extract request arguments *remote_addr = req.addr; *remote_port = req.port; *remote_iss = req.iss; *remote_window = req.window; return error; } // end socket_get_crq_request() ///////////////////////////////////////////////////////////////////////////////////////// // Functions implementing the SOCKET related syscalls ///////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////// int socket_build( uint32_t domain, uint32_t type ) { uint32_t fdid; socket_t * socket; error_t error; #if DEBUG_SOCKET_BUILD uint32_t cycle = (uint32_t)hal_get_cycles(); thread_t * this = CURRENT_THREAD; process_t * process = this->process; if( DEBUG_SOCKET_BUILD < cycle ) printk("\n[%s] thread[%x,%x] enter / %s / %s / cycle %d\n", __FUNCTION__, process->pid, this->trdid, socket_domain_str(domain), socket_type_str(type), cycle ); #endif // allocate memory for the file descriptor and for the socket error = socket_create( local_cxy, domain, type, &socket, &fdid ); #if DEBUG_SOCKET_BUILD cycle = (uint32_t)hal_get_cycles(); if( DEBUG_SOCKET_BUILD < cycle ) printk("\n[%s] thread[%x,%x] exit / socket %x / fdid %d / %s / cycle %d\n", __FUNCTION__, process->pid, this->trdid, socket, fdid, socket_state_str(hal_remote_l32(XPTR(local_cxy , &socket->state))), cycle ); #endif if( error ) return -1; return fdid; } //////////////////////////////// int socket_bind( uint32_t fdid, uint32_t addr, uint16_t port ) { vfs_file_type_t file_type; socket_t * socket; uint32_t socket_type; uint32_t socket_state; thread_t * this = CURRENT_THREAD; process_t * process = this->process; #if DEBUG_SOCKET_BIND || DEBUG_SOCKET_ERROR uint32_t cycle = (uint32_t)hal_get_cycles(); #endif #if DEBUG_SOCKET_BIND if( DEBUG_SOCKET_BIND < cycle ) printk("\n[%s] thread[%x,%x] enter / socket[%x,%d] / addr %x / port %x / cycle %d\n", __FUNCTION__, process->pid, this->trdid, process->pid, fdid, addr, port, cycle ); #endif // get pointers on file descriptor xptr_t file_xp = process_fd_get_xptr_from_local( process , fdid ); vfs_file_t * file_ptr = GET_PTR( file_xp ); cxy_t file_cxy = GET_CXY( file_xp ); // check file_xp if( file_xp == XPTR_NULL ) { #if DEBUG_SOCKET_ERROR printk("\n[ERROR] in %s : undefined fdid %d / thread[%x,%x] / cycle %d\n", __FUNCTION__, fdid, process->pid, this->trdid, cycle ); #endif return -1; } file_type = hal_remote_l32( XPTR( file_cxy , &file_ptr->type ) ); socket = hal_remote_lpt( XPTR( file_cxy , &file_ptr->socket ) ); // check file descriptor type if( file_type != FILE_TYPE_SOCK ) { #if DEBUG_SOCKET_ERROR printk("\n[ERROR] in %s : illegal file type %s / thread[%x,%x] / cycle %d", __FUNCTION__, vfs_inode_type_str( file_type ), process->pid, this->trdid, cycle ); #endif return -1; } // get socket type socket_type = hal_remote_l32(XPTR( file_cxy , &socket->type )); // compute socket state socket_state = (socket_type == SOCK_STREAM) ? TCP_STATE_BOUND : UDP_STATE_BOUND; // update the socket descriptor hal_remote_s32( XPTR( file_cxy , &socket->local_addr ) , addr ); hal_remote_s32( XPTR( file_cxy , &socket->local_port ) , port ); hal_remote_s32( XPTR( file_cxy , &socket->state ) , socket_state ); #if DEBUG_SOCKET_BIND cycle = (uint32_t)hal_get_cycles(); if( DEBUG_SOCKET_BIND < cycle ) printk("\n[%s] thread[%x,%x] exit / socket[%x,%d] / %s / addr %x / port %x / cycle %d\n", __FUNCTION__, process->pid, this->trdid, process->pid, fdid, socket_state_str(hal_remote_l32( XPTR( file_cxy , &socket->state ))), hal_remote_l32( XPTR( file_cxy , &socket->local_addr )), hal_remote_l32( XPTR( file_cxy , &socket->local_port )), cycle ); #endif return 0; } // end socket_bind() ////////////////////////////////// int socket_listen( uint32_t fdid, uint32_t crq_depth ) { xptr_t file_xp; vfs_file_t * file_ptr; cxy_t file_cxy; vfs_file_type_t file_type; socket_t * socket_ptr; uint32_t socket_type; uint32_t socket_state; uint32_t socket_local_addr; uint32_t socket_local_port; error_t error; thread_t * this = CURRENT_THREAD; process_t * process = this->process; #if DEBUG_SOCKET_LISTEN || DEBUG_SOCKET_ERROR uint32_t cycle = (uint32_t)hal_get_cycles(); #endif #if DEBUG_SOCKET_LISTEN if( DEBUG_SOCKET_LISTEN < cycle ) printk("\n[%s] thread[%x,%x] enter / socket[%x,%d] / crq_depth %x / cycle %d\n", __FUNCTION__, process->pid, this->trdid, process->pid, fdid, crq_depth, cycle ); #endif // get pointers on file descriptor file_xp = process_fd_get_xptr_from_local( process , fdid ); file_ptr = GET_PTR( file_xp ); file_cxy = GET_CXY( file_xp ); // check file_xp if( file_xp == XPTR_NULL ) { #if DEBUG_SOCKET_ERROR printk("\n[ERROR] in %s : undefined fdid %d / thread[%x,%x] / cycle %d\n", __FUNCTION__, fdid, process->pid, this->trdid, cycle ); #endif return -1; } file_type = hal_remote_l32( XPTR( file_cxy , &file_ptr->type ) ); socket_ptr = hal_remote_lpt( XPTR( file_cxy , &file_ptr->socket ) ); // check file descriptor type if( file_type != FILE_TYPE_SOCK ) { #if DEBUG_SOCKET_ERROR printk("\n[ERROR] in %s : illegal file type %s / thread[%x,%x] / cycle %d\n", __FUNCTION__, vfs_inode_type_str(file_type), process->pid, this->trdid, cycle ); #endif return -1; } // get relevant infos from socket descriptor socket_type = hal_remote_l32( XPTR( file_cxy , &socket_ptr->type )); socket_state = hal_remote_l32( XPTR( file_cxy , &socket_ptr->state )); socket_local_addr = hal_remote_l32( XPTR( file_cxy , &socket_ptr->local_addr )); socket_local_port = hal_remote_l32( XPTR( file_cxy , &socket_ptr->local_port )); // check socket type if( socket_type != SOCK_STREAM ) { #if DEBUG_SOCKET_ERROR printk("\n[ERROR] in %s : illegal socket type %s / thread[%x,%x] / cycle %d\n", __FUNCTION__, socket_type_str(socket_type), process->pid, this->trdid, cycle ); #endif return -1; } // check socket state if( socket_state != TCP_STATE_BOUND ) { #if DEBUG_SOCKET_ERROR printk("\n[ERROR] in %s : illegal socket state %s / thread[%x,%x] / cycle %d\n", __FUNCTION__, socket_state_str(socket_state), process->pid, this->trdid, cycle ); #endif return -1; } // compute CRQ queue depth : max( crq_depth , CONFIG_SOCK_CRQ_BUF_SIZE ) uint32_t depth = ( crq_depth > CONFIG_SOCK_CRQ_BUF_SIZE ) ? crq_depth : CONFIG_SOCK_CRQ_BUF_SIZE; // allocate memory for the CRQ queue error = remote_buf_init( XPTR( file_cxy , &socket_ptr->crqq ), bits_log2( depth * sizeof(connect_request_t)) ); if( error ) { #if DEBUG_SOCKET_ERROR printk("\n[ERROR] in %s : cannot allocate CRQ queue / thread[%x,%x] / cycle %d\n", __FUNCTION__, process->pid, this->trdid, cycle ); #endif return -1; } // update socket.state hal_remote_s32( XPTR( file_cxy , &socket_ptr->state ) , TCP_STATE_LISTEN ); // register socket in the list of listening socket socket_link_to_listen( XPTR( file_cxy , socket_ptr ) ); #if DEBUG_SOCKET_LISTEN cycle = (uint32_t)hal_get_cycles(); if( DEBUG_SOCKET_LISTEN < cycle ) printk("\n[%s] thread[%x,%x] exit / socket[%x,%d] / %s / cycle %d\n", __FUNCTION__, process->pid, this->trdid, process->pid, fdid, socket_state_str(socket_state), cycle ); #endif return 0; } // end socket_listen() /////////////////////////////////// int socket_accept( uint32_t fdid, uint32_t * remote_addr, uint16_t * remote_port ) { xptr_t file_xp; // extended pointer on remote file vfs_file_t * file_ptr; cxy_t file_cxy; vfs_file_type_t file_type; // file descriptor type socket_t * socket_ptr; // local pointer on remote waiting socket uint32_t socket_type; // listening socket type uint32_t socket_state; // listening socket state uint32_t socket_domain; // listening socket domain uint32_t socket_local_addr; // listening socket local IP address uint32_t socket_local_port; // listening socket local port uint32_t socket_tx_nxt; // listening socket tx_nxt bool_t socket_tx_valid; // listening socket tx_valid xptr_t socket_tx_client; // listening socket tx_client thread bool_t socket_rx_valid; // listening socket rx_valid xptr_t socket_rx_client; // listening socket rx_client thread xptr_t socket_lock_xp; // listening socket lock xptr_t crq_xp; // listening socket CRQ queue uint32_t crq_status; // number of bytes in CRQ cxy_t new_socket_cxy; // new socket cluster identifier socket_t * new_socket_ptr; // local pointer on new socket xptr_t new_socket_xp; // extended pointer on new socket volatile uint32_t new_state; // new socket state (modified by NIC_RX thread) uint32_t new_fdid; // new socket file descriptor index uint32_t new_remote_addr; // new socket remote IP address uint32_t new_remote_port; // new socket remote port uint32_t new_remote_iss; // new socket remote iss uint32_t new_remote_window; // new socket receive window xptr_t tx_server_xp; // extended pointer on TX server thread thread_t * tx_server_ptr; // local pointer on TX server thread uint32_t cmd_status; // command status (rx_sts or tx_sts) bool_t cmd_valid; // valid command (rx_valid or tx_valid) error_t error; thread_t * this = CURRENT_THREAD; xptr_t client_xp = XPTR( local_cxy , this ); process_t * process = this->process; #if DEBUG_SOCKET_ACCEPT || DEBUG_SOCKET_ERROR uint32_t cycle = (uint32_t)hal_get_cycles(); #endif #if DEBUG_SOCKET_ACCEPT if( DEBUG_SOCKET_ACCEPT < cycle ) printk("\n[%s] thread[%x,%x] enter for socket[%x,%d] / cycle %d\n", __FUNCTION__, process->pid, this->trdid, process->pid, fdid, cycle ); #endif // 1) get pointers on file descriptor file_xp = process_fd_get_xptr_from_local( process , fdid ); file_ptr = GET_PTR( file_xp ); file_cxy = GET_CXY( file_xp ); // check file_xp if( file_xp == XPTR_NULL ) { #if DEBUG_SOCKET_ERROR printk("\n[ERROR] in %s : undefined fdid %d / thead[%x,%x] / cycle %d", __FUNCTION__, fdid, process->pid, this->trdid, cycle ); #endif return -1; } file_type = hal_remote_l32( XPTR( file_cxy , &file_ptr->type ) ); socket_ptr = hal_remote_lpt( XPTR( file_cxy , &file_ptr->socket ) ); // check file descriptor type if( file_type != FILE_TYPE_SOCK ) { #if DEBUG_SOCKET_ERROR printk("\n[ERROR] in %s : illegal file type %s / thread[%x,%x] / cycle %d\n", __FUNCTION__, vfs_inode_type_str(file_type), process->pid, this->trdid, cycle ); #endif return -1; } // build extended pointer on listening socket lock socket_lock_xp = XPTR( file_cxy , &socket_ptr->lock ); // acquire listening socket lock remote_queuelock_acquire( socket_lock_xp ); // get listening socket type, domain, state, local_addr, local_port & tx_nxt socket_type = hal_remote_l32( XPTR( file_cxy , &socket_ptr->type )); socket_state = hal_remote_l32( XPTR( file_cxy , &socket_ptr->state )); socket_domain = hal_remote_l32( XPTR( file_cxy , &socket_ptr->domain )); socket_local_addr = hal_remote_l32( XPTR( file_cxy , &socket_ptr->local_addr )); socket_local_port = hal_remote_l32( XPTR( file_cxy , &socket_ptr->local_port )); socket_tx_nxt = hal_remote_l32( XPTR( file_cxy , &socket_ptr->tx_nxt )); socket_tx_valid = hal_remote_l32( XPTR( file_cxy , &socket_ptr->tx_valid )); socket_tx_client = hal_remote_l64( XPTR( file_cxy , &socket_ptr->tx_client )); socket_rx_valid = hal_remote_l32( XPTR( file_cxy , &socket_ptr->rx_valid )); socket_rx_client = hal_remote_l64( XPTR( file_cxy , &socket_ptr->rx_client )); // check socket type if( socket_type != SOCK_STREAM ) { #if DEBUG_SOCKET_ERROR printk("\n[ERROR] in %s : illegal socket type %s / thread[%x,%x] / cycle %d\n", __FUNCTION__, socket_type_str(socket_type), process->pid , this->trdid, cycle ); #endif remote_queuelock_release( socket_lock_xp ); return -1; } // check socket state if( socket_state != TCP_STATE_LISTEN ) { #if DEBUG_SOCKET_ERROR printk("\n[ERROR] in %s : illegal socket state %s / thread[%x,%x] / cycle %d\n", __FUNCTION__, socket_state_str(socket_state), process->pid, this->trdid, cycle ); #endif remote_queuelock_release( socket_lock_xp ); return -1; } // check no previous RX command if( (socket_rx_valid == true) || (socket_rx_client != XPTR_NULL) ) { #if DEBUG_SOCKET_ERROR printk("\n[ERROR] in %s : previous RX cmd on socket[%x,%d] / thread[%x,%x] / cycle %d\n", __FUNCTION__, process->pid, fdid, process->pid, this->trdid, cycle ); #endif remote_queuelock_release( socket_lock_xp ); return -1; } // check no previous TX command if( (socket_tx_valid == true) || (socket_tx_client != XPTR_NULL) ) { #if DEBUG_SOCKET_ERROR printk("\n[ERROR] in %s : previous TX cmd on socket[%x,%d] / thread[%x,%x] / cycle %d\n", __FUNCTION__, process->pid, fdid, process->pid, this->trdid, cycle ); #endif remote_queuelock_release( socket_lock_xp ); return -1; } // 2) check the listenig socket CRQ crq_xp = XPTR( file_cxy , &socket_ptr->crqq ); // get CRQ status crq_status = remote_buf_status( crq_xp ); // block & deschedule to wait a client request when CRQ empty if( crq_status == 0 ) { // register command arguments for NIC_RX server in listening socket hal_remote_s32( XPTR( file_cxy , &socket_ptr->rx_cmd ), CMD_RX_ACCEPT ); hal_remote_s64( XPTR( file_cxy , &socket_ptr->rx_client ), client_xp ); hal_remote_s32( XPTR( file_cxy , &socket_ptr->rx_valid ), true ); // release listening socket lock remote_queuelock_release( socket_lock_xp ); #if DEBUG_SOCKET_ACCEPT cycle = (uint32_t)hal_get_cycles(); if( DEBUG_SOCKET_ACCEPT < cycle ) printk("\n[%s] thread[%x,%x] socket[%x,%d] / CRQ empty => blocks on / cycle %d\n", __FUNCTION__, process->pid, this->trdid, process->pid, fdid, cycle ); #endif // block & deschedule when CRQQ empty thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_IO ); sched_yield( "CRQ queue empty"); #if DEBUG_SOCKET_ACCEPT cycle = (uint32_t)hal_get_cycles(); if( DEBUG_SOCKET_ACCEPT < cycle ) printk("\n[%s] thread[%x,%x] socket[%x,%d] / resumes / cycle %d\n", __FUNCTION__, process->pid, this->trdid, process->pid, fdid, cycle ); #endif // take listening socket lock remote_queuelock_acquire( socket_lock_xp ); // get CRQ status & command status cmd_valid = hal_remote_l32( XPTR( file_cxy , &socket_ptr->rx_valid ) ); cmd_status = hal_remote_l32( XPTR( file_cxy , &socket_ptr->rx_sts ) ); crq_status = remote_buf_status( crq_xp ); assert( __FUNCTION__, (((crq_status > 0) || (cmd_status!= CMD_STS_SUCCESS)) && (cmd_valid == false)), "illegal socket state when client thread resumes after RX_ACCEPT" ); // reset socket.rx_client hal_remote_s32( XPTR( file_cxy , &socket_ptr->rx_client ) , XPTR_NULL ); if( cmd_status != CMD_STS_SUCCESS ) { #if DEBUG_SOCKET_ERROR printk("\n[ERROR] in %s : reported for RX / socket[%x,%d] / thread[%x,%x] / cycle %d\n", __FUNCTION__, process->pid, fdid, process->pid, this->trdid, cycle ); #endif remote_queuelock_release( socket_lock_xp ); return -1; } } // end blocking on CRQ empty // from this point, we can extract a request from listening socket CRQ error = socket_get_crq_request( crq_xp, &new_remote_addr, &new_remote_port, &new_remote_iss, &new_remote_window ); assert( __FUNCTION__, (error == 0), "cannot get a connection request from a non-empty CRQ" ); // release listening socket lock remote_queuelock_release( socket_lock_xp ); #if DEBUG_SOCKET_ACCEPT cycle = (uint32_t)hal_get_cycles(); if( DEBUG_SOCKET_ACCEPT < cycle ) printk("\n[%s] thread[%x,%x] socket[%x,%d] / CRQ request [addr %x / port %x] / cycle %d\n", __FUNCTION__, process->pid, this->trdid, process->pid, fdid, new_remote_addr, new_remote_port, cycle ); #endif // 3) select a cluster for the new socket new_socket_cxy = cluster_random_select(); // allocate memory for the new socket descriptor error = socket_create( new_socket_cxy, socket_domain, socket_type, &new_socket_ptr, &new_fdid ); if( error ) { #if DEBUG_SOCKET_ERROR printk("\n[ERROR] in %s : cannot create new socket / thread[%x,%x] / cycle %d\n", __FUNCTION__, process->pid, this->trdid, cycle ); #endif return -1; } // build extended poiner on new socket new_socket_xp = XPTR( new_socket_cxy , new_socket_ptr ); #if DEBUG_SOCKET_ACCEPT cycle = (uint32_t)hal_get_cycles(); if( DEBUG_SOCKET_ACCEPT < cycle ) printk("\n[%s] thread[%x,%x] created new socket[%x,%d] / cycle %d\n", __FUNCTION__, process->pid, this->trdid, process->pid, new_fdid, cycle ); #endif // compute NIC channel index from remote_addr and remote_port uint32_t new_nic_channel = dev_nic_get_key( new_remote_addr , new_remote_port ); // update new socket descriptor hal_remote_s32(XPTR(new_socket_cxy , &new_socket_ptr->local_addr ) , socket_local_addr ); hal_remote_s32(XPTR(new_socket_cxy , &new_socket_ptr->local_port ) , socket_local_port ); hal_remote_s32(XPTR(new_socket_cxy , &new_socket_ptr->remote_addr) , new_remote_addr ); hal_remote_s32(XPTR(new_socket_cxy , &new_socket_ptr->remote_port) , new_remote_port ); hal_remote_s32(XPTR(new_socket_cxy , &new_socket_ptr->nic_channel) , new_nic_channel ); hal_remote_s32(XPTR(new_socket_cxy , &new_socket_ptr->state ) , TCP_STATE_SYN_RCVD ); // set new socket TCB : increment tx_nxt / initialize rx_nxt, rx_irs, rx_wnd hal_remote_s32( XPTR( new_socket_cxy , &new_socket_ptr->tx_nxt ), socket_tx_nxt + 1 ); hal_remote_s32( XPTR( new_socket_cxy , &new_socket_ptr->rx_nxt ), new_remote_iss + 1 ); hal_remote_s32( XPTR( new_socket_cxy , &new_socket_ptr->rx_irs ), new_remote_iss ); hal_remote_s32( XPTR( new_socket_cxy , &new_socket_ptr->rx_wnd ), new_remote_window ); // link new socket to chdev servers socket_link_to_servers( new_socket_xp , new_nic_channel ); // 3) get pointers on NIC_TX[channel] chdev xptr_t tx_chdev_xp = chdev_dir.nic_tx[new_nic_channel]; chdev_t * tx_chdev_ptr = GET_PTR( tx_chdev_xp ); cxy_t tx_chdev_cxy = GET_CXY( tx_chdev_xp ); // get pointers on NIC_TX[channel] server thread tx_server_ptr = hal_remote_lpt( XPTR( tx_chdev_cxy , &tx_chdev_ptr->server )); tx_server_xp = XPTR( tx_chdev_cxy , tx_server_ptr ); // register command arguments in new socket to request a SYN_ACK segment hal_remote_s32( XPTR( new_socket_cxy , &new_socket_ptr->tx_cmd ), CMD_TX_ACCEPT ); hal_remote_s64( XPTR( new_socket_cxy , &new_socket_ptr->tx_client ), client_xp ); hal_remote_s32( XPTR( new_socket_cxy , &new_socket_ptr->tx_valid ), true ); // unblock NIC_TX server thread thread_unblock( tx_server_xp , THREAD_BLOCKED_CLIENT ); // start retransmission timer alarm_start( client_xp, hal_get_cycles() + CONFIG_SOCK_RETRY_TIMEOUT, &socket_alarm_handler, new_socket_xp ); #if DEBUG_SOCKET_ACCEPT cycle = (uint32_t)hal_get_cycles(); if( DEBUG_SOCKET_ACCEPT < cycle ) printk("\n[%s] thread[%x,%x] for socket[%x,%d] request SYN-ACK & blocks on / cycle %d\n", __FUNCTION__, process->pid, this->trdid, process->pid, new_fdid, cycle ); #endif // client thread blocks & deschedules thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_IO ); sched_yield( "waiting new socket connection"); #if DEBUG_SOCKET_ACCEPT cycle = (uint32_t)hal_get_cycles(); if( DEBUG_SOCKET_ACCEPT < cycle ) printk("\n[%s] thread[%x,%x] new_socket[%x,%d] resumes / cycle %d\n", __FUNCTION__, process->pid, this->trdid, process->pid, new_fdid, cycle ); #endif // stop retransmission timer in thread descriptor alarm_stop( client_xp ); // get new socket state, tx_valid and tx_sts new_state = hal_remote_l32( XPTR( new_socket_cxy , &new_socket_ptr->state )); cmd_valid = hal_remote_l32( XPTR( new_socket_cxy , &new_socket_ptr->tx_valid )); cmd_status = hal_remote_l32( XPTR( new_socket_cxy , &new_socket_ptr->tx_sts )); assert( __FUNCTION__, (((new_state == TCP_STATE_ESTAB) || (cmd_status != CMD_STS_SUCCESS)) && (cmd_valid == false)), "illegal socket state when client thread resumes after TX_ACCEPT" ); // reset socket.tx_client hal_remote_s64( XPTR( new_socket_cxy , &new_socket_ptr->tx_client ) , XPTR_NULL ); if( cmd_status != CMD_STS_SUCCESS ) { #if DEBUG_SOCKET_ERROR printk("\n[ERROR] in %s reported for TX / socket[%x,%d] / thread[%x,%x] / cycle %d\n", __FUNCTION__, process->pid, new_fdid, process->pid, this->trdid, cycle ); #endif return -1; } else { #if DEBUG_SOCKET_ACCEPT cycle = (uint32_t)hal_get_cycles(); if( DEBUG_SOCKET_ACCEPT < cycle ) printk("\n[%s] thread[%x,%x] new_socket[%x,%d] / state %s / addr %x / port %x / cycle %d\n", __FUNCTION__, process->pid, this->trdid, process->pid, new_fdid, socket_state_str(new_state), new_remote_addr, new_remote_port, cycle ); #endif // return success *remote_addr = new_remote_addr; *remote_port = new_remote_port; return new_fdid; } } // end socket_accept() ////////////////////////////////// int socket_connect( uint32_t fdid, uint32_t remote_addr, uint16_t remote_port ) { vfs_file_type_t file_type; xptr_t socket_xp; // extended pointer on socket descriptor socket_t * socket_ptr; // local pointer on socket descriptor volatile uint32_t socket_state; // socket state (modified by the NIC_TX thread) uint32_t socket_type; // socket type uint32_t local_addr; // local IP address uint32_t local_port; // local port xptr_t tx_server_xp; // extended pointer on TX server thread thread_t * tx_server_ptr; // local pointer on TX server thread uint32_t nic_channel; // NIC channel index uint32_t cmd_status; // command status (tx_sts field) bool_t cmd_valid; // command valid (tx_valid field) thread_t * this = CURRENT_THREAD; xptr_t client_xp = XPTR( local_cxy , this ); pid_t pid = this->process->pid; trdid_t trdid = this->trdid; #if DEBUG_SOCKET_CONNECT || DEBUG_SOCKET_ERROR uint32_t cycle = (uint32_t)hal_get_cycles(); #endif // get pointers on file descriptor xptr_t file_xp = process_fd_get_xptr_from_local( this->process , fdid ); vfs_file_t * file_ptr = GET_PTR( file_xp ); cxy_t file_cxy = GET_CXY( file_xp ); // check file_xp if( file_xp == XPTR_NULL ) { #if DEBUG_SOCKET_ERROR printk("\n[ERROR] in %s : undefined fdid %d / thread[%x,%x] / cycle %d", __FUNCTION__, fdid, pid, trdid, cycle ); #endif return -1; } file_type = hal_remote_l32( XPTR( file_cxy , &file_ptr->type ) ); socket_ptr = hal_remote_lpt( XPTR( file_cxy , &file_ptr->socket ) ); socket_xp = XPTR( file_cxy , socket_ptr ); #if DEBUG_SOCKET_CONNECT if( DEBUG_SOCKET_CONNECT < cycle ) printk("\n[%s] thread[%x,%x] enter for socket[%x,%d] / addr %x / port %x / cycle %d\n", __FUNCTION__, pid, trdid, pid, fdid, remote_addr, remote_port, cycle ); #endif // check file descriptor type if( file_type != FILE_TYPE_SOCK ) { #if DEBUG_SOCKET_ERROR printk("\n[ERROR] in %s : illegal file type %s / thread[%x,%x] / cycle %d", __FUNCTION__, vfs_inode_type_str( file_type ), pid, trdid, cycle ); #endif return -1; } // get relevant socket infos socket_type = hal_remote_l32( XPTR( file_cxy , &socket_ptr->type ) ); socket_state = hal_remote_l32( XPTR( file_cxy , &socket_ptr->state ) ); local_addr = hal_remote_l32( XPTR( file_cxy , &socket_ptr->local_addr ) ); local_port = hal_remote_l32( XPTR( file_cxy , &socket_ptr->local_port ) ); if( socket_type == SOCK_DGRAM ) // UDP { if( socket_state != UDP_STATE_BOUND ) { #if DEBUG_SOCKET_ERROR printk("\n[ERROR] in %s : illegal socket state %s for type %s / thread[%x,%x] / cycle %d", __FUNCTION__, socket_state_str(socket_state), socket_type_str(socket_type), pid, trdid, cycle ); #endif return -1; } } else if( socket_type == SOCK_STREAM ) // TCP { if( socket_state != TCP_STATE_BOUND ) { #if DEBUG_SOCKET_ERROR printk("\n[ERROR] in %s : illegal socket state %s for type %s / thread[%x,%x] / cycle %d", __FUNCTION__, socket_state_str(socket_state), socket_type_str(socket_type), pid, trdid, cycle ); #endif return -1; } } else { #if DEBUG_SOCKET_ERROR printk("\n[ERROR] in %s : illegal socket type / thread[%x,%x] / cycle %d", __FUNCTION__, pid, trdid, cycle ); #endif return -1; } // compute nic_channel index from remote_addr and remote_port nic_channel = dev_nic_get_key( remote_addr , remote_port ); // link socket to chdev servers socket_link_to_servers( XPTR( file_cxy , socket_ptr ), nic_channel ); // update the socket descriptor hal_remote_s32( XPTR( file_cxy , &socket_ptr->remote_addr ) , remote_addr ); hal_remote_s32( XPTR( file_cxy , &socket_ptr->remote_port ) , remote_port ); hal_remote_s32( XPTR( file_cxy , &socket_ptr->nic_channel ) , nic_channel ); // the actual connection mechanism depends on socket type // UDP : client thread updates the local socket state without blocking // TCP : client thread request TX server thread to start the 3 steps handshake if( socket_type == SOCK_DGRAM ) // UDP { // directly update the local socket state hal_remote_s32( XPTR( file_cxy , &socket_ptr->state ) , UDP_STATE_ESTAB ); return 0; } else // TCP { // get pointers on NIC_TX[channel] chdev xptr_t tx_chdev_xp = chdev_dir.nic_tx[nic_channel]; chdev_t * tx_chdev_ptr = GET_PTR( tx_chdev_xp ); cxy_t tx_chdev_cxy = GET_CXY( tx_chdev_xp ); // get pointers on NIC_TX[channel] server thread tx_server_ptr = hal_remote_lpt( XPTR( tx_chdev_cxy , &tx_chdev_ptr->server )); tx_server_xp = XPTR( tx_chdev_cxy , tx_server_ptr ); // register command arguments in socket descriptor for a SYN segment hal_remote_s32( XPTR( file_cxy , &socket_ptr->tx_cmd ), CMD_TX_CONNECT ); hal_remote_s64( XPTR( file_cxy , &socket_ptr->tx_client ), client_xp ); hal_remote_s32( XPTR( file_cxy , &socket_ptr->tx_valid ), true ); // unblock NIC_TX server thread thread_unblock( tx_server_xp , THREAD_BLOCKED_CLIENT ); // start retransmission timer alarm_start( client_xp, hal_get_cycles() + CONFIG_SOCK_RETRY_TIMEOUT, &socket_alarm_handler, socket_xp ); #if DEBUG_SOCKET_CONNECT cycle = (uint32_t)hal_get_cycles(); if( DEBUG_SOCKET_CONNECT < cycle ) printk("\n[%s] thread[%x,%x] socket[%x,%d] blocks on waiting connexion / cycle %d \n", __FUNCTION__, pid, trdid, pid, fdid, cycle ); #endif // block itself and deschedule thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_IO ); sched_yield( "waiting connection" ); #if DEBUG_SOCKET_CONNECT cycle = (uint32_t)hal_get_cycles(); if( DEBUG_SOCKET_CONNECT < cycle ) printk("\n[%s] thread[%x,%x] socket[%x,%d] / resumes / cycle %d \n", __FUNCTION__, pid, trdid, pid, fdid, cycle ); #endif // stop retransmission timer in thread descriptor alarm_stop( client_xp ); // get socket state, tx_valid and tx_sts cmd_valid = hal_remote_l32( XPTR( file_cxy , &socket_ptr->tx_valid )); cmd_status = hal_remote_l32( XPTR( file_cxy , &socket_ptr->tx_sts )); socket_state = hal_remote_l32( XPTR( file_cxy , &socket_ptr->state )); assert( __FUNCTION__, (((socket_state == TCP_STATE_ESTAB) || (cmd_status != CMD_STS_SUCCESS)) && (cmd_valid == false)), "illegal socket state when client thread resumes after TX_CONNECT" ); // reset socket.tx_client hal_remote_s64( XPTR( file_cxy , &socket_ptr->tx_client ) , XPTR_NULL ); if( cmd_status != CMD_STS_SUCCESS ) { #if DEBUG_SOCKET_ERROR printk("\n[ERROR] in %s reported by server / socket[%x,%d] / thread[%x,%x] / cycle %d\n", __FUNCTION__, pid, fdid, pid, trdid, cycle ); #endif return -1; } else { #if DEBUG_SOCKET_CONNECT cycle = (uint32_t)hal_get_cycles(); if( DEBUG_SOCKET_CONNECT < cycle ) printk("\n[%s] thread[%x,%x] exit for socket[%x,%d] / %s / cycle %d \n", __FUNCTION__, pid, trdid, pid, fdid, socket_state_str(socket_state),cycle ); #endif return 0; } } // end TCP } // end socket_connect() /////////////////////////////////// int socket_close( xptr_t file_xp, uint32_t fdid ) { uint32_t socket_type; uint32_t socket_state; uint32_t nic_channel; uint32_t cmd_status; // socket.tx_sts bool_t cmd_valid; // socket.tx_valid thread_t * tx_server_ptr; // local pointer on NIC_TX server thread xptr_t tx_server_xp; // extended pointer on NIC_TX server thread xptr_t socket_lock_xp; // extended pointer on socket lock thread_t * this = CURRENT_THREAD; xptr_t client_xp = XPTR( local_cxy , this ); pid_t pid = this->process->pid; trdid_t trdid = this->trdid; #if DEBUG_SOCKET_CLOSE || DEBUG_SOCKET_ERROR uint32_t cycle = (uint32_t)hal_get_cycles(); #endif // get pointers on socket descriptor cxy_t file_cxy = GET_CXY( file_xp ); vfs_file_t * file_ptr = GET_PTR( file_xp ); socket_t * socket_ptr = hal_remote_lpt( XPTR( file_cxy , &file_ptr->socket ) ); xptr_t socket_xp = XPTR( file_cxy , socket_ptr ); assert( __FUNCTION__, (hal_remote_l32( XPTR( file_cxy , &socket_ptr->fdid )) == fdid), "unconsistent file_xp & fdid arguments"); #if DEBUG_SOCKET_CLOSE if (DEBUG_SOCKET_CLOSE < cycle ) printk("\n[%s] thread[%x,%x] enters for socket[%x,%d] / cycle %d\n", __FUNCTION__, pid, trdid, pid, fdid, cycle ); #endif // build extended pointer on lock protecting socket socket_lock_xp = XPTR( file_cxy , &socket_ptr->lock ); // take socket lock remote_queuelock_acquire( socket_lock_xp ); // check no previous TX command if( (hal_remote_l32( XPTR( file_cxy , &socket_ptr->tx_valid )) == true) || (hal_remote_l64( XPTR( file_cxy , &socket_ptr->tx_client)) != XPTR_NULL) ) { #if DEBUG_SOCKET_ERROR printk("\n[ERROR] in %s : previous TX cmd on socket[%x,%d] / thread[%x,%x] / cycle %d\n", __FUNCTION__, pid, fdid, pid, trdid, cycle ); #endif remote_queuelock_release( socket_lock_xp ); return -1; } // get relevant socket infos socket_type = hal_remote_l32( XPTR( file_cxy , &socket_ptr->type )); nic_channel = hal_remote_l32( XPTR( file_cxy , &socket_ptr->nic_channel )); socket_state = hal_remote_l32( XPTR( file_cxy , &socket_ptr->state )); // the actual close mechanism depends on socket type and state: // UDP or TCP not connected : client thread directly destroy the socket descriptor // TCP connected : client thread request TX server thread to make the TCP close handshake if( socket_type == SOCK_DGRAM ) // UDP { #if DEBUG_SOCKET_CLOSE cycle = (uint32_t)hal_get_cycles(); if( cycle > DEBUG_DEV_NIC_TX ) printk("\n[%s] thread[%x,%x] socket[%x,%d] %s => directly destroy socket / cycle %d\n", __FUNCTION__, pid, trdid, pid, fdid, socket_state_str( socket_state ), cycle ); #endif // directly destroy socket socket_destroy( file_xp ); return 0; } else if( (socket_state == TCP_STATE_BOUND) || (socket_state == TCP_STATE_LISTEN) || (socket_state == TCP_STATE_SYN_SENT) ) // TCP not connected { #if DEBUG_SOCKET_CLOSE cycle = (uint32_t)hal_get_cycles(); if( cycle > DEBUG_DEV_NIC_TX ) printk("\n[%s] thread[%x,%x] socket[%x,%d] %s => directly destroy socket / cycle %d\n", __FUNCTION__, pid, trdid, pid, fdid, socket_state_str( socket_state ), cycle ); #endif // directly destroy socket socket_destroy( file_xp ); return 0; } else // TCP connected { // get pointers on NIC_TX[index] chdev xptr_t tx_chdev_xp = chdev_dir.nic_tx[nic_channel]; chdev_t * tx_chdev_ptr = GET_PTR( tx_chdev_xp ); cxy_t tx_chdev_cxy = GET_CXY( tx_chdev_xp ); // get pointers on NIC_TX[channel] server thread tx_server_ptr = hal_remote_lpt( XPTR( tx_chdev_cxy , &tx_chdev_ptr->server )); tx_server_xp = XPTR( tx_chdev_cxy , tx_server_ptr ); // register command arguments in socket descriptor hal_remote_s32( XPTR( file_cxy , &socket_ptr->tx_cmd ), CMD_TX_CLOSE ); hal_remote_s64( XPTR( file_cxy , &socket_ptr->tx_client ), client_xp ); hal_remote_s32( XPTR( file_cxy , &socket_ptr->tx_valid ), true ); // release socket lock remote_queuelock_release( socket_lock_xp ); // unblock NIC_TX server thread thread_unblock( tx_server_xp , THREAD_BLOCKED_CLIENT ); // start retransmission timer alarm_start( client_xp, hal_get_cycles() + CONFIG_SOCK_RETRY_TIMEOUT, &socket_alarm_handler, socket_xp ); #if DEBUG_SOCKET_CLOSE cycle = (uint32_t)hal_get_cycles(); if( DEBUG_SOCKET_CLOSE < cycle ) printk("\n[%s] thread[%x,%x] socket[%x,%d] %s => blocks on waiting close / cycle %d \n", __FUNCTION__, pid, trdid, pid, fdid, socket_state_str( socket_state ), cycle ); #endif // client thread block itself and deschedule thread_block( client_xp , THREAD_BLOCKED_IO ); sched_yield( "blocked in close" ); #if DEBUG_SOCKET_CLOSE cycle = (uint32_t)hal_get_cycles(); if( DEBUG_SOCKET_CLOSE < cycle ) printk("\n[%s] thread[%x,%x] socket[%x,%d] / resumes / cycle %d \n", __FUNCTION__, pid, trdid, pid, fdid, cycle ); #endif // stop retransmission timer in thread descriptor alarm_stop( client_xp ); // take socket lock remote_queuelock_acquire( socket_lock_xp ); // get socket state & command status socket_state = hal_remote_l32( XPTR( file_cxy , &socket_ptr->state ) ); cmd_status = hal_remote_l32( XPTR( file_cxy , &socket_ptr->tx_sts) ); cmd_valid = hal_remote_l32( XPTR( file_cxy , &socket_ptr->tx_valid ) ); assert( __FUNCTION__, (((socket_state == TCP_STATE_CLOSED) || (cmd_status != CMD_STS_SUCCESS)) && (cmd_valid == false)), " socket_state = %s / cmd_status = %d / cmd_valid = %d", socket_state_str(socket_state), cmd_status, cmd_valid ); // reset socket.tx_client hal_remote_s64( XPTR( file_cxy , &socket_ptr->tx_client ) , XPTR_NULL ); if( cmd_status != CMD_STS_SUCCESS ) // error reported { #if DEBUG_SOCKET_ERROR printk("\n[ERROR] in %s for command TX_CLOSE / socket[%x,%d] / thread[%x,%x] / cycle %d\n", __FUNCTION__, pid, fdid, pid, this->trdid, cycle ); #endif return -1; } else // success { #if DEBUG_SOCKET_CLOSE cycle = (uint32_t)hal_get_cycles(); if( DEBUG_SOCKET_CLOSE < cycle ) printk("\n[%s] thread[%x,%x] socket[%x,%d] / destroy socket / cycle %d\n", __FUNCTION__, pid, trdid, pid, fdid, socket_state_str(socket_state) , cycle ); #endif // destroy socket socket_destroy( file_xp ); return 0; } } // end if TCP } // end socket_close() //////////////////////////////////////////////////////////////////////////////////////// // This static function is called by the two functions socket_send() & socket_recv(). // It is used for both UDP and TCP sockets. //////////////////////////////////////////////////////////////////////////////////////// // @ is_send : send when true / receive when false. // @ fdid : socket identifier. // @ u_buf : pointer on user buffer in user space. // @ length : number of bytes in buffer. //////////////////////////////////////////////////////////////////////////////////////// // Implementation note : The behavior is different for SEND & RECV // - For a SEND, the client thread checks that there is no TX command registered // in the socket. It registers the command arguments in the socket descriptor // (tx_client, tx_cmd, tx_buf, tx_len). Then the client thread unblocks the // TX server thread from the BLOCKED_CLIENT condition, blocks itself on the // BLOCKED_IO condition, and deschedules. It is unblocked by the TX server thread // when the last byte has been sent (for UDP) or acknowledged (for TCP). // When the client thread resumes, it reset the command in socket, and returns. // - For a RECV, the client thread checks that there is no RX command registered // in the socket. It registers itself in socket (rx_client). It checks the status // of the receive buffer. It the rx_buf is empty, it blocks on the BLOCKED_IO // condition, and deschedules. It is unblocked by the RX server thread when an UDP // packet or TCP segment has been writen in the rx_buf. When it resumes, it moves // the available data from the rx_buf to the user buffer, reset its registration // in socket (reset the rx_buf for an UDP socket), and returns. //////////////////////////////////////////////////////////////////////////////////////// int socket_move_data( bool_t is_send, uint32_t fdid, uint8_t * u_buf, uint32_t length ) { vfs_file_type_t file_type; // file descriptor type xptr_t socket_xp; // extended pointer on socket descriptor socket_t * socket_ptr; // local pointer on socket descriptor uint32_t socket_state; // current socket state uint32_t socket_type; // socket type (UDP/TCP) uint32_t nic_channel; // NIC channel for this socket xptr_t socket_lock_xp; // extended pointer on socket lock xptr_t file_xp; // extended pointer on file descriptor vfs_file_t * file_ptr; cxy_t file_cxy; xptr_t chdev_xp; // extended pointer on NIC_TX[channel] chdev chdev_t * chdev_ptr; cxy_t chdev_cxy; int32_t moved_bytes; // total number of moved bytes (fot return) xptr_t server_xp; // ext pointer on NIC_TX / NIC_RX thread thread_t * server_ptr; // local pointer on NIC_TX / NIC_RX thread uint8_t * tx_buf; // pointer on kernel buffer for TX transfer bool_t cmd_valid; // RX or TX command from socket descriptor uint32_t cmd_sts; // RX or TX command from socket descriptor uint32_t tx_todo; // number of bytes still to send xptr_t rx_buf_xp; // extended pointer on socket rx_buf uint32_t rx_buf_sts; // current status of socket rx_buf thread_t * this = CURRENT_THREAD; process_t * process = this->process; #if DEBUG_SOCKET_SEND || DEBUG_SOCKET_RECV || DEBUG_SOCKET_ERROR uint32_t cycle = (uint32_t)hal_get_cycles(); #endif #if DEBUG_SOCKET_SEND || DEBUG_SOCKET_RECV if( is_send ) printk("\n[%s] thread[%x,%x] socket[%x,%d] enter : SEND / buf %x / length %d / cycle %d\n", __FUNCTION__, process->pid, this->trdid, process->pid, fdid, u_buf, length, cycle ); else printk("\n[%s] thread[%x,%x] socket[%x,%d] enter : RECV / buf %x / length %d / cycle %d\n", __FUNCTION__, process->pid, this->trdid, process->pid, fdid, u_buf, length, cycle ); #endif // build extended pointer on client thread xptr_t client_xp = XPTR( local_cxy , this ); // get pointers on file descriptor identifying the socket file_xp = process_fd_get_xptr_from_local( process , fdid ); file_ptr = GET_PTR( file_xp ); file_cxy = GET_CXY( file_xp ); if( file_xp == XPTR_NULL ) { #if DEBUG_SOCKET_ERROR printk("\n[ERROR] in %s : undefined fdid %d / thread%x,%x] / cycle %d\n", __FUNCTION__, fdid , process->pid, this->trdid, cycle ); #endif return -1; } // get file type and socket pointer file_type = hal_remote_l32( XPTR( file_cxy , &file_ptr->type ) ); // get pointers on socket socket_ptr = hal_remote_lpt( XPTR( file_cxy , &file_ptr->socket ) ); socket_xp = XPTR( file_cxy , socket_ptr ); // check file descriptor type if( file_type != FILE_TYPE_SOCK ) { #if DEBUG_SOCKET_ERROR printk("\n[ERROR] in %s : illegal file type thread[%x,%x] / cycle %d\n", __FUNCTION__, process->pid, this->trdid, cycle ); #endif return -1; } // build extended pointer on lock protecting socket socket_lock_xp = XPTR( file_cxy , &socket_ptr->lock ); // take the socket lock remote_queuelock_acquire( socket_lock_xp ); // get socket type, state, and channel socket_type = hal_remote_l32( XPTR( file_cxy , &socket_ptr->type )); socket_state = hal_remote_l32( XPTR( file_cxy , &socket_ptr->state )); nic_channel = hal_remote_l32( XPTR( file_cxy , &socket_ptr->nic_channel )); ////////////////////////////////////////////////////// if( is_send ) // SEND command { #if DEBUG_SOCKET_SEND cycle = (uint32_t)hal_get_cycles(); if (DEBUG_SOCKET_SEND < cycle ) printk("\n[%s] thread[%x,%x] / socket[%x,%d] get SEND / length %d / cycle %d\n", __FUNCTION__, process->pid, this->trdid, process->pid, fdid, length, cycle ); #endif // check no previous TX command if( hal_remote_l32( XPTR( file_cxy , &socket_ptr->tx_valid )) == true ) { #if DEBUG_SOCKET_ERROR printk("\n[ERROR] in %s : previous TX command / socket[%x,%d] / thread[%x,%x] / cycle %d\n", __FUNCTION__, process->pid, fdid, process->pid, this->trdid, cycle ); #endif remote_queuelock_release( socket_lock_xp ); return -1; } // get tx_buf pointer from socket pointer tx_buf = (uint8_t*)hal_remote_lpt( XPTR( file_cxy , &socket_ptr->tx_buf )); // copy data from user u_buf to kernel socket tx_buf hal_copy_from_uspace( XPTR( file_cxy , tx_buf ), u_buf, length ); #if DEBUG_SOCKET_SEND if (DEBUG_SOCKET_SEND < cycle ) printk("\n[%s] thread[%x,%x] / socket[%x,%d] copied %d bytes to tx_buf (%x,%x)\n", __FUNCTION__, process->pid, this->trdid, process->pid, fdid, length, file_cxy, tx_buf ); putb("tx_buf : 16 first data bytes" , tx_buf , 16 ); #endif // register command in socket descriptor hal_remote_s64( XPTR( file_cxy , &socket_ptr->tx_client ) , client_xp ); hal_remote_s32( XPTR( file_cxy , &socket_ptr->tx_cmd ) , CMD_TX_SEND ); hal_remote_spt( XPTR( file_cxy , &socket_ptr->tx_buf ) , tx_buf ); hal_remote_s32( XPTR( file_cxy , &socket_ptr->tx_len ) , length ); hal_remote_s32( XPTR( file_cxy , &socket_ptr->tx_todo ) , length ); hal_remote_s32( XPTR( file_cxy , &socket_ptr->tx_ack ) , 0 ); hal_remote_s32( XPTR( file_cxy , &socket_ptr->tx_valid ) , true ); // release socket lock remote_queuelock_release( socket_lock_xp ); // get pointers on relevant chdev chdev_xp = chdev_dir.nic_tx[nic_channel]; chdev_ptr = GET_PTR( chdev_xp ); chdev_cxy = GET_CXY( chdev_xp ); // get pointers on NIC_TX[channel] server thread server_ptr = hal_remote_lpt( XPTR( chdev_cxy , &chdev_ptr->server )); server_xp = XPTR( chdev_cxy , server_ptr ); // unblocks the NIC_TX server thread thread_unblock( server_xp , THREAD_BLOCKED_CLIENT ); // start retransmission timer for TCP socket if( socket_type == SOCK_STREAM ) { alarm_start( client_xp, hal_get_cycles() + CONFIG_SOCK_RETRY_TIMEOUT, &socket_alarm_handler, socket_xp ); } #if DEBUG_SOCKET_SEND if( DEBUG_SOCKET_SEND < cycle ) printk("\n[%s] thread[%x,%x] / socket[%x,%d] registers SEND => blocks on \n", __FUNCTION__, process->pid, this->trdid, process->pid, fdid ); #endif // client thread blocks itself and deschedules thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_IO ); sched_yield( "blocked in nic_io" ); #if DEBUG_SOCKET_SEND cycle = (uint32_t)hal_get_cycles(); if( DEBUG_SOCKET_SEND < cycle ) printk("\n[%s] thread[%x,%x] / socket[%x,%d] resumes for SEND / cycle %d\n", __FUNCTION__, process->pid, this->trdid, process->pid, fdid, cycle ); #endif // stop retransmission timer for TCP socket if( socket_type == SOCK_STREAM ) { alarm_stop( client_xp ); } // take socket lock remote_queuelock_acquire( socket_lock_xp ); // get tx_valid, tx_todo, and tx_sts tx_todo = hal_remote_l32( XPTR( file_cxy , &socket_ptr->tx_todo )); cmd_valid = hal_remote_l32( XPTR( file_cxy , &socket_ptr->tx_valid )); cmd_sts = hal_remote_l32( XPTR( file_cxy , &socket_ptr->tx_sts )); // reset tx_client in socket descriptor hal_remote_s64( XPTR( file_cxy , &socket_ptr->tx_client ) , XPTR_NULL ); // release socket lock remote_queuelock_release( socket_lock_xp ); // check SEND command completed when TX client thread resumes assert( __FUNCTION__, (((tx_todo == 0) || (cmd_sts != CMD_STS_SUCCESS)) && (cmd_valid == false)), "client thread resumes from SEND / bad state : tx_todo %d / tx_sts %d / tx_valid %d", tx_todo, cmd_sts, cmd_valid ); if( cmd_sts != CMD_STS_SUCCESS ) { #if DEBUG_SOCKET_ERROR printk("\n[ERROR] in %s : reported for SEND / socket[%x,%d] / thread[%x,%x] / cycle %d\n", __FUNCTION__, process->pid, fdid, process->pid, this->trdid, cycle ); #endif return -1; } else { #if DEBUG_SOCKET_SEND cycle = (uint32_t)hal_get_cycles(); if (DEBUG_SOCKET_SEND < cycle ) printk("\n[%s] thread[%x,%x] SEND success / socket[%x,%d] / bytes %d / cycle %d\n", __FUNCTION__, process->pid, this->trdid, process->pid, fdid, length, cycle ); #endif return length; } } // end SEND command ///////////////////////////////////////////////////////////// else // RECV command { #if DEBUG_SOCKET_RECV if (DEBUG_SOCKET_RECV < cycle ) printk("\n[%s] thread[%x,%x] / socket[%x,%d] get RECV / length %d / cycle %d\n", __FUNCTION__, process->pid, this->trdid, process->pid, fdid, length, cycle ); #endif // check no previous RX command if( hal_remote_l32( XPTR( file_cxy , &socket_ptr->rx_valid )) == true ) { #if DEBUG_SOCKET_ERROR printk("\n[ERROR] in %s : previous RX command on socket[%x,%d] / thread[%x,%x] / cycle %d\n", __FUNCTION__, process->pid, fdid, process->pid, this->trdid, cycle ); #endif remote_queuelock_release( socket_lock_xp ); return -1; } // return EOF for a TCP socket not in ESTAB state if( (socket_type == SOCK_STREAM ) && (socket_state != TCP_STATE_ESTAB) ) { // release socket lock remote_queuelock_release( socket_lock_xp ); #if DEBUG_SOCKET_RECV cycle = (uint32_t)hal_get_cycles(); if( DEBUG_SOCKET_RECV < cycle ) printk("\n[%s] thread[%x,%x] / socket[%x,%d] TCP connection closed / cycle %d\n", __FUNCTION__, process->pid, this->trdid, process->pid, fdid, cycle ); #endif return 0; } // build extended pointer on socket rx_buf rx_buf_xp = XPTR( file_cxy , &socket_ptr->rx_buf ); // get socket rx_buf status rx_buf_sts = remote_buf_status( rx_buf_xp ); // register RECV command and deschedule when rx_buf empty if( rx_buf_sts == 0 ) { // registers RX_RECV command in socket descriptor hal_remote_s32( XPTR( file_cxy , &socket_ptr->rx_cmd ) , CMD_RX_RECV ); hal_remote_s64( XPTR( file_cxy , &socket_ptr->rx_client ) , client_xp ); hal_remote_s32( XPTR( file_cxy , &socket_ptr->rx_valid ) , true ); // release socket lock remote_queuelock_release( socket_lock_xp ); #if DEBUG_SOCKET_RECV if( DEBUG_SOCKET_RECV < cycle ) printk("\n[%s] thread[%x,%x] socket[%x,%d] for RECV : rx_buf empty => blocks on \n", __FUNCTION__, process->pid, this->trdid, process->pid, fdid ); #endif // client thread blocks itself and deschedules thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_IO ); sched_yield( "blocked in nic_io" ); #if DEBUG_SOCKET_RECV cycle = (uint32_t)hal_get_cycles(); if( DEBUG_SOCKET_RECV < cycle ) printk("\n[%s] thread[%x,%x] socket[%x,%d] for RECV : resumes / cycle %d\n", __FUNCTION__, process->pid, this->trdid, process->pid, fdid, cycle ); #endif // take socket lock remote_queuelock_acquire( socket_lock_xp ); // get command status, command valid, and rx_buf status cmd_valid = hal_remote_l32( XPTR( file_cxy , &socket_ptr->rx_valid )); cmd_sts = hal_remote_l32( XPTR( file_cxy , &socket_ptr->rx_sts )); rx_buf_sts = remote_buf_status( rx_buf_xp ); assert( __FUNCTION__, (cmd_valid == false), "client thread resumes from RECV but rx_valid is true" ); if( cmd_sts == CMD_STS_EOF ) // EOF reported by RX server { #if DEBUG_SOCKET_RECV if( DEBUG_SOCKET_RECV < cycle ) printk("\n[%s] EOF received for socket[%x,%d] / thread[%x,%x]\n", __FUNCTION__, process->pid, fdid, process->pid, this->trdid ); #endif // release socket lock remote_queuelock_release( socket_lock_xp ); return 0; } else if( cmd_sts != CMD_STS_SUCCESS ) // error reported by RX server { #if DEBUG_SOCKET_ERROR printk("\n[ERROR] in %s : rx_server for socket[%x,%d] / thread[%x,%x] / cycle %d\n", __FUNCTION__, process->pid, fdid, process->pid, this->trdid, cycle ); #endif // release socket lock remote_queuelock_release( socket_lock_xp ); return -1; } else if( rx_buf_sts == 0 ) // annormally empty rx_buf { #if DEBUG_SOCKET_ERROR printk("\n[ERROR] in %s : rx_buf empty for socket[%x,%d] / thread[%x,%x] / cycle %d\n", __FUNCTION__, process->pid, fdid, process->pid, this->trdid, cycle ); #endif // release socket lock remote_queuelock_release( socket_lock_xp ); return -1; } } // number of bytes extracted from rx_buf cannot be larger than u_buf size moved_bytes = ( length < rx_buf_sts ) ? length : rx_buf_sts; // move data from kernel rx_buf to user u_buf remote_buf_get_to_user( rx_buf_xp, u_buf, moved_bytes ); #if DEBUG_SOCKET_RECV cycle = (uint32_t)hal_get_cycles(); if (DEBUG_SOCKET_RECV < cycle ) printk("\n[%s] thread[%x,%x] : RECV success / socket[%x,%d] / bytes %d / cycle %d\n", __FUNCTION__, process->pid, this->trdid, process->pid, fdid, moved_bytes, cycle ); #endif // release socket lock remote_queuelock_release( socket_lock_xp ); return moved_bytes; } // end RECV command } // end socket_move_data() /////////////////////////////////// int socket_send( uint32_t fdid, uint8_t * u_buf, uint32_t length ) { return socket_move_data( true, // SEND fdid, u_buf, length ); } // end socket_send() /////////////////////////////////// int socket_recv( uint32_t fdid, uint8_t * u_buf, uint32_t length ) { return socket_move_data( false, // RECV fdid, u_buf, length ); } // end socket_recv() //////////////////////////////////// int socket_sendto( uint32_t fdid, uint8_t * u_buf, uint32_t length, uint32_t remote_ip, uint16_t remote_port ) { printk("\n[ERROR] in %s : this function is not implemented yet\n", __FUNCTION__, fdid, u_buf, length, remote_ip, remote_port ); return -1; } // end socket_sendto() ////////////////////////////////////// int socket_recvfrom( uint32_t fdid, uint8_t * u_buf, uint32_t length, uint32_t remote_ip, uint16_t remote_port ) { printk("\n[ERROR] in %s : this function is not implemented yet\n", __FUNCTION__, fdid, u_buf, length, remote_ip, remote_port ); return -1; } // end socket_recvfrom() //////////////////////////////////////////// void socket_display( xptr_t socket_xp, const char * func_str, const char * string ) { uint32_t cycle = (uint32_t)hal_get_cycles(); socket_t * socket = GET_PTR( socket_xp ); cxy_t cxy = GET_CXY( socket_xp ); pid_t pid = hal_remote_l32( XPTR( cxy , &socket->pid )); fdid_t fdid = hal_remote_l32( XPTR( cxy , &socket->fdid )); uint32_t state = hal_remote_l32( XPTR( cxy , &socket->state )); uint32_t channel = hal_remote_l32( XPTR( cxy , &socket->nic_channel )); uint32_t local_addr = hal_remote_l32( XPTR( cxy , &socket->local_addr )); uint32_t local_port = hal_remote_l32( XPTR( cxy , &socket->local_port )); uint32_t remote_addr = hal_remote_l32( XPTR( cxy , &socket->remote_addr )); uint32_t remote_port = hal_remote_l32( XPTR( cxy , &socket->remote_port )); uint32_t tx_valid = hal_remote_l32( XPTR( cxy , &socket->tx_valid )); xptr_t tx_client = hal_remote_l64( XPTR( cxy , &socket->tx_client )); uint32_t tx_cmd = hal_remote_l32( XPTR( cxy , &socket->tx_cmd )); uint32_t tx_sts = hal_remote_l32( XPTR( cxy , &socket->tx_sts )); uint32_t tx_len = hal_remote_l32( XPTR( cxy , &socket->tx_len )); uint32_t tx_todo = hal_remote_l32( XPTR( cxy , &socket->tx_todo )); uint32_t tx_una = hal_remote_l32( XPTR( cxy , &socket->tx_una )); uint32_t tx_nxt = hal_remote_l32( XPTR( cxy , &socket->tx_nxt )); uint32_t tx_wnd = hal_remote_l32( XPTR( cxy , &socket->tx_wnd )); uint32_t tx_ack = hal_remote_l32( XPTR( cxy , &socket->tx_ack )); uint32_t rx_valid = hal_remote_l32( XPTR( cxy , &socket->rx_valid )); xptr_t rx_client = hal_remote_l64( XPTR( cxy , &socket->rx_client )); uint32_t rx_cmd = hal_remote_l32( XPTR( cxy , &socket->rx_cmd )); uint32_t rx_sts = hal_remote_l32( XPTR( cxy , &socket->rx_sts )); uint32_t rx_nxt = hal_remote_l32( XPTR( cxy , &socket->rx_nxt )); uint32_t rx_wnd = hal_remote_l32( XPTR( cxy , &socket->rx_wnd )); uint32_t rx_irs = hal_remote_l32( XPTR( cxy , &socket->rx_irs )); remote_queuelock_t * lock_ptr = &socket->lock; uint32_t taken = hal_remote_l32( XPTR( cxy , &lock_ptr->taken )); thread_t * tx_ptr = GET_PTR( tx_client ); cxy_t tx_cxy = GET_CXY( tx_client ); trdid_t tx_tid = hal_remote_l32( XPTR( tx_cxy , &tx_ptr->trdid )); thread_t * rx_ptr = GET_PTR( rx_client ); cxy_t rx_cxy = GET_CXY( rx_client ); trdid_t rx_tid = hal_remote_l32( XPTR( rx_cxy , &rx_ptr->trdid )); if( string == NULL ) { printk("\n****** socket[%x,%d] / lock %d / in %s / cycle %d *****\n", pid, fdid, taken, func_str, cycle ); } else { printk("\n***** socket[%x,%d] / lock %d / in %s %s / cycle %d *****\n", pid, fdid, taken, func_str, string, cycle ); } printk(" - state %s / channel %d / local [%x,%x] / remote[%x,%x]\n" " - tx : valid %d / client [%x,%x] / cmd %s \n" " sts %d / len %x / todo %x / ack %x / una %x / nxt %x / wnd %x\n" " - rx : valid %d / client [%x,%x] / cmd %s\n" " sts %d / nxt %x / wnd %x / irs %x\n", socket_state_str(state), channel, local_addr, local_port, remote_addr, remote_port, tx_valid, pid, tx_tid, socket_cmd_type_str(tx_cmd), tx_sts, tx_len, tx_todo, tx_ack, tx_una, tx_nxt, tx_wnd, rx_valid, pid, rx_tid, socket_cmd_type_str(rx_cmd), rx_sts, rx_nxt, rx_wnd, rx_irs ); } // end socket_display()