/* * dev_nic.c - NIC (Network Controler) generic device API implementation. * * Author Alain Greiner (2016,2017,2018,2019,2020) * * Copyright (c) UPMC Sorbonne Universites * * This file is part of ALMOS-MKH. * * ALMOS-MKH is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by * the Free Software Foundation; version 2.0 of the License. * * ALMOS-MKH is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with ALMOS-MKH; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include ///////////////////////////////////////////////////////////////////////////////////////// // Extern global variables ///////////////////////////////////////////////////////////////////////////////////////// extern chdev_directory_t chdev_dir; // allocated in kernel_init.c //////////////////////////////////////////////////////////////////////////////////////////// // This static function is used by the dev_nic_rx_handle_tcp() & dev_nic_tx_handle_tcp() // functions to check acceptability of a given sequence number. It returns true when // the argument is contained in a wrap-around window defined by the and // arguments. The window wrap-around when (min > max). //////////////////////////////////////////////////////////////////////////////////////////// // @ seq : [in] value to be checked. // @ min : [in] first base. // @ max : [in] window size. //////////////////////////////////////////////////////////////////////////////////////////// static inline bool_t is_in_window( uint32_t seq, uint32_t min, uint32_t max ) { if( max >= min ) // no wrap_around => only one window [min,max] { return( (seq >= min) && (seq <= max) ); } else // window wrap-around => two windows [min,0xFFFFFFFF] and [0,max] { return( (seq <= max) || (seq >= min) ); } } //////////////////////////////////////////////////////////////////////////////////////////// // this static function compute a channel index in range [0,nic_channelx[ from // a remote IP address and remote port. // TODO this function should be provided by the NIC driver. //////////////////////////////////////////////////////////////////////////////////////////// // @ addr : [in] IP address. // @ port : [in] TCP/UDP port. //////////////////////////////////////////////////////////////////////////////////////////// static inline uint32_t dev_nic_channel_index( uint32_t addr, uint16_t port ) { // get number of NIC channels uint32_t nic_channels = LOCAL_CLUSTER->nb_nic_channels; // compute NIC channel index return ( ((addr ) & 0xFF) ^ ((addr > 8 ) & 0xFF) ^ ((addr > 16) & 0xFF) ^ ((addr > 24) & 0xFF) ^ ((port ) & 0xFF) ^ ((port > 8 ) & 0xFF) ) % nic_channels; } //////////////////////////////////////////////////////////////////////////////////////// // This static function computes the checksum for an IP packet header. // The "checksum" field itself is not taken into account for this computation. //////////////////////////////////////////////////////////////////////////////////////// // @ buffer : [in] pointer on IP packet header (20 bytes) // @ return the checksum value on 16 bits //////////////////////////////////////////////////////////////////////////////////////// uint16_t dev_nic_ip_checksum( uint8_t * buffer ) { uint32_t i; uint32_t cs; // 32 bits accumulator uint16_t * buf; buf = (uint16_t *)buffer; // compute checksum for( i = 0 , cs = 0 ; i < 10 ; i++ ) { if( i != 5 ) cs += buf[i]; } // one's complement return ~cs; } //////////////////////////////////////////////////////////////////////////////////////// // This static function computes the checksum for an UDP packet defined by // the and arguments. //////////////////////////////////////////////////////////////////////////////////////// // @ buffer : [in] pointer on UDP packet base. // @ size : [in] number of bytes in this packet (including header). // @ return the checksum value on 16 bits //////////////////////////////////////////////////////////////////////////////////////// uint16_t dev_nic_udp_checksum( uint8_t * buffer, uint32_t size ) { uint32_t i; uint32_t carry; uint32_t cs; // 32 bits accumulator uint16_t * buf; uint32_t max; // number of uint16_t in packet // compute max & buf buf = (uint16_t *)buffer; max = size >> 1; // extend buffer[] if required if( size & 1 ) { max++; buffer[size] = 0; } // compute checksum for UDP packet for( i = 0 , cs = 0 ; i < size ; i++ ) cs += buf[i]; // handle carry carry = (cs >> 16); if( carry ) { cs += carry; carry = (cs >> 16); if( carry ) cs += carry; } // one's complement return ~cs; } //////////////////////////////////////////////////////////////////////////////////////// // This static function computes the checksum for a TCP segment defined by // the and arguments. // It includes the pseudo header defined by the , , // arguments, and by the TCP_PROTOCOL code. //////////////////////////////////////////////////////////////////////////////////////// // @ buffer : [in] pointer on TCP segment base. // @ size : [in] number of bytes in this segment (including header). // @ src_ip_addr : [in] source IP address (pseudo header) // @ dst_ip_addr : [in] destination IP address (pseudo header) // @ return the checksum value on 16 bits //////////////////////////////////////////////////////////////////////////////////////// uint16_t dev_nic_tcp_checksum( uint8_t * buffer, uint32_t size, uint32_t src_ip_addr, uint32_t dst_ip_addr ) { uint32_t i; uint32_t carry; uint32_t cs; // 32 bits accumulator uint16_t * buf; uint32_t max; // number of uint16_t in segment // compute max & buf buf = (uint16_t *)buffer; max = size >> 1; // extend buffer[] if required if( size & 1 ) { max++; buffer[size] = 0; } // compute checksum for TCP segment for( i = 0 , cs = 0 ; i < size ; i++ ) cs += buf[i]; // complete checksum for pseudo-header cs += src_ip_addr; cs += dst_ip_addr; cs += PROTOCOL_TCP; cs += size; // handle carry carry = (cs >> 16); if( carry ) { cs += carry; carry = (cs >> 16); if( carry ) cs += carry; } // one's complement return ~cs; } ////////////////////////////////// void dev_nic_init( chdev_t * nic ) { // get "channel" & "is_rx" fields from chdev descriptor uint32_t channel = nic->channel; bool_t is_rx = nic->is_rx; // set chdev name if( is_rx ) snprintf( nic->name , 16 , "nic%d_rx" , channel ); else snprintf( nic->name , 16 , "nic%d_tx" , channel ); // call driver init function hal_drivers_nic_init( nic ); // select a core to execute the NIC server thread lid_t lid = cluster_select_local_core( local_cxy ); // bind the NIC IRQ to the selected core // but does NOT enable it dev_pic_bind_irq( lid , nic ); // create server thread thread_t * new_thread; error_t error; error = thread_kernel_create( &new_thread, THREAD_DEV, &chdev_server_func, nic, lid ); assert( (error == 0) , "cannot create server thread" ); // set "server" field in chdev descriptor nic->server = new_thread; // set "chdev" field in thread descriptor new_thread->chdev = nic; // unblock server thread thread_unblock( XPTR( local_cxy , new_thread ) , THREAD_BLOCKED_GLOBAL ); } // end dev_nic_init() ///////////////////////////////////////////////////////////////////////////////////////// // Functions implementing the SOCKET related syscalls ///////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////// int dev_nic_socket( uint32_t domain, uint32_t type ) { uint32_t fdid; socket_t * socket; error_t error; // allocate memory for the file descriptor and for the socket error = socket_create( local_cxy, domain, type, &socket, // unused here &fdid ); if( error ) return -1; return fdid; } //////////////////////////////// int dev_nic_bind( uint32_t fdid, uint32_t addr, uint16_t port ) { vfs_inode_type_t type; socket_t * socket; uint32_t state; thread_t * this = CURRENT_THREAD; process_t * process = this->process; // get pointers on file descriptor xptr_t file_xp = process_fd_get_xptr( process , fdid ); vfs_file_t * file_ptr = GET_PTR( file_xp ); cxy_t file_cxy = GET_CXY( file_xp ); // check file_xp if( file_xp == XPTR_NULL ) { printk("\n[ERROR] in %s : undefined fdid %d", __FUNCTION__, fdid ); return -1; } type = hal_remote_l32( XPTR( file_cxy , &file_ptr->type ) ); socket = hal_remote_lpt( XPTR( file_cxy , &file_ptr->socket ) ); // check file descriptor type if( type != INODE_TYPE_SOCK ) { printk("\n[ERROR] in %s : illegal file type %s", __FUNCTION__, vfs_inode_type_str( type ) ); return -1; } state = (type == SOCK_STREAM) ? TCP_STATE_BOUND : UDP_STATE_BOUND; // update the socket descriptor hal_remote_s32( XPTR( file_cxy , &socket->local_addr ) , addr ); hal_remote_s32( XPTR( file_cxy , &socket->local_port ) , port ); hal_remote_s32( XPTR( file_cxy , &socket->state ) , state ); return 0; } // end dev_nic_bind() ////////////////////////////////// int dev_nic_listen( uint32_t fdid, uint32_t max_pending ) { xptr_t file_xp; vfs_file_t * file_ptr; cxy_t file_cxy; vfs_inode_type_t file_type; socket_t * socket_ptr; uint32_t socket_type; uint32_t socket_state; thread_t * this = CURRENT_THREAD; process_t * process = this->process; if( max_pending != 0 ) { printk("\n[WARNING] in %s : max_pending argument non supported\n", __FUNCTION__ ); } // get pointers on file descriptor file_xp = process_fd_get_xptr( process , fdid ); file_ptr = GET_PTR( file_xp ); file_cxy = GET_CXY( file_xp ); // check file_xp if( file_xp == XPTR_NULL ) { printk("\n[ERROR] in %s : undefined fdid %d", __FUNCTION__, fdid ); return -1; } file_type = hal_remote_l32( XPTR( file_cxy , &file_ptr->type ) ); socket_ptr = hal_remote_lpt( XPTR( file_cxy , &file_ptr->socket ) ); // check file descriptor type if( file_type != INODE_TYPE_SOCK ) { printk("\n[ERROR] in %s : illegal file type %s", __FUNCTION__, vfs_inode_type_str(file_type) ); return -1; } // get socket type and state socket_type = hal_remote_l32( XPTR( file_cxy , &socket_ptr->type )); socket_state = hal_remote_l32( XPTR( file_cxy , &socket_ptr->state )); // check socket type if( socket_type != SOCK_STREAM ) { printk("\n[ERROR] in %s : illegal socket type", __FUNCTION__ ); return -1; } // check socket state if( socket_state != TCP_STATE_BOUND ) { printk("\n[ERROR] in %s : illegal socket state %s", __FUNCTION__, socket_state_str(socket_state) ); return -1; } // update socket.state hal_remote_s32( XPTR( file_cxy , &socket_ptr->state ) , TCP_STATE_LISTEN ); return 0; } // end dev_nic_listen() /////////////////////////////////// int dev_nic_connect( uint32_t fdid, uint32_t remote_addr, uint16_t remote_port ) { vfs_inode_type_t file_type; socket_t * socket; uint32_t socket_state; // socket state uint32_t socket_type; // socket type uint32_t local_addr; // local IP address uint32_t local_port; // local port xptr_t tx_server_xp; // extended pointer on TX server thread thread_t * tx_server_ptr; // local pointer on TX server thread thread_t * this = CURRENT_THREAD; process_t * process = this->process; // get pointers on file descriptor xptr_t file_xp = process_fd_get_xptr( process , fdid ); vfs_file_t * file_ptr = GET_PTR( file_xp ); cxy_t file_cxy = GET_CXY( file_xp ); // check file_xp if( file_xp == XPTR_NULL ) { printk("\n[ERROR] in %s : undefined fdid %d", __FUNCTION__, fdid ); return -1; } file_type = hal_remote_l32( XPTR( file_cxy , &file_ptr->type ) ); socket = hal_remote_lpt( XPTR( file_cxy , &file_ptr->socket ) ); // check file descriptor type if( file_type != INODE_TYPE_SOCK ) { printk("\n[ERROR] in %s : illegal file type %s", __FUNCTION__, vfs_inode_type_str( file_type ) ); return -1; } // get relevant socket infos socket_type = hal_remote_l32( XPTR( file_cxy , &socket->type ) ); socket_state = hal_remote_l32( XPTR( file_cxy , &socket->state ) ); local_addr = hal_remote_l32( XPTR( file_cxy , &socket->local_addr ) ); local_port = hal_remote_l32( XPTR( file_cxy , &socket->local_port ) ); if( socket_type == SOCK_DGRAM ) // UDP { if( socket_state != UDP_STATE_BOUND ) { printk("\n[ERROR] in %s : illegal socket statea %s for CONNECT", __FUNCTION__, socket_state_str(socket_state) ); return -1; } } else if( socket_type == SOCK_STREAM ) // TCP { if( socket_state != TCP_STATE_BOUND ) { printk("\n[ERROR] in %s : illegal socket state %s for CONNECT", __FUNCTION__, socket_state_str(socket_state) ); return -1; } } else { printk("\n[ERROR] in %s : illegal socket type %d for CONNECT", __FUNCTION__, socket_type ); return -1; } // compute nic_channel index from remote_addr and remote_port uint32_t nic_channel = dev_nic_channel_index( remote_addr , remote_port ); // link new socket to chdev servers socket_link_to_servers( XPTR( file_cxy , socket ), nic_channel ); // update the socket descriptor hal_remote_s32( XPTR( file_cxy , &socket->remote_addr ) , remote_addr ); hal_remote_s32( XPTR( file_cxy , &socket->remote_port ) , remote_port ); hal_remote_s32( XPTR( file_cxy , &socket->nic_channel ) , nic_channel ); // the actual connection mechanism depends on socket type // UDP : client thread directly updates the local socket state // TCP : client thread request TX server thread to start the 3 steps handshake if( socket_type == SOCK_DGRAM ) // UDP { // directly update the local socket state hal_remote_s32( XPTR( file_cxy , &socket->state ) , UDP_STATE_CONNECT ); } else // TCP { // get pointers on NIC_TX[index] chdev xptr_t tx_chdev_xp = chdev_dir.nic_tx[nic_channel]; chdev_t * tx_chdev_ptr = GET_PTR( tx_chdev_xp ); cxy_t tx_chdev_cxy = GET_CXY( tx_chdev_xp ); // get pointers on NIC_TX[channel] server thread tx_server_ptr = hal_remote_lpt( XPTR( tx_chdev_cxy , &tx_chdev_ptr->server )); tx_server_xp = XPTR( tx_chdev_cxy , tx_server_ptr ); // register command arguments in socket descriptor hal_remote_s64( XPTR( file_cxy , &socket->tx_cmd ), SOCKET_TX_CONNECT ); // update the "tx_client" field in socket descriptor hal_remote_s64( XPTR( file_cxy , &socket->tx_client ), XPTR( local_cxy , this ) ); // unblock NIC_TX server thread thread_unblock( tx_server_xp , THREAD_BLOCKED_CLIENT ); // block on THREAD_BLOCKED_IO condition and deschedules thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_IO ); sched_yield( "blocked in connect" ); // reset the "tx_client" field in socket descriptor hal_remote_s64( XPTR( file_cxy , &socket->tx_client ), XPTR_NULL ); } return 0; } // end dev_nic_connect() //////////////////////////////////// int dev_nic_accept( uint32_t fdid, uint32_t * remote_addr, uint16_t * remote_port ) { xptr_t file_xp; // extended pointer on remote file vfs_file_t * file_ptr; cxy_t file_cxy; vfs_inode_type_t file_type; // file descriptor type socket_t * socket; // local pointer on remote waiting socket uint32_t socket_type; // waiting socket type uint32_t socket_state; // waiting socket state uint32_t socket_domain; // waiting socket domain uint32_t socket_local_addr; // waiting socket local IP address uint32_t socket_local_port; // waiting socket local port xptr_t crqq_xp; // extended pointer on socket.crqq queue socket_t * new_socket; // local pointer on new socket uint32_t new_fdid; // new socket file descriptor index sockaddr_t new_sockaddr; // one request in crqq queue uint32_t new_remote_addr; // new socket remote IP address uint32_t new_remote_port; // new socket remote port error_t error; thread_t * this = CURRENT_THREAD; process_t * process = this->process; // get pointers on file descriptor file_xp = process_fd_get_xptr( process , fdid ); file_ptr = GET_PTR( file_xp ); file_cxy = GET_CXY( file_xp ); // check file_xp if( file_xp == XPTR_NULL ) { printk("\n[ERROR] in %s : undefined fdid %d", __FUNCTION__, fdid ); return -1; } file_type = hal_remote_l32( XPTR( file_cxy , &file_ptr->type ) ); socket = hal_remote_lpt( XPTR( file_cxy , &file_ptr->socket ) ); // check file descriptor type if( file_type != INODE_TYPE_SOCK ) { printk("\n[ERROR] in %s : illegal file type %s / thread[%x,%x]\n", __FUNCTION__, vfs_inode_type_str(file_type), process->pid, this->trdid ); return -1; } // get socket type, domain, state, local_addr and local_port socket_type = hal_remote_l32( XPTR( file_cxy , &socket->type )); socket_state = hal_remote_l32( XPTR( file_cxy , &socket->state )); socket_domain = hal_remote_l32( XPTR( file_cxy , &socket->domain )); socket_local_addr = hal_remote_l32( XPTR( file_cxy , &socket->local_addr )); socket_local_port = hal_remote_l32( XPTR( file_cxy , &socket->local_port )); // check socket type if( socket_type != SOCK_STREAM ) { printk("\n[ERROR] in %s : illegal socket type / thread[%x,%x]\n", __FUNCTION__, process->pid , this->trdid ); return -1; } // check socket state if( socket_state != TCP_STATE_LISTEN ) { printk("\n[ERROR] in %s : illegal socket state %s / thread[%x,%x]\n", __FUNCTION__, socket_state_str(socket_state), process->pid, this->trdid ); return -1; } // select a cluster for the new socket cxy_t new_cxy = cluster_random_select(); // allocate memory for the new socket descriptor error = socket_create( new_cxy, socket_domain, socket_type, &new_socket, &new_fdid ); if( error ) { printk("\n[ERROR] in %s : cannot allocate new socket / thread[%x,%x]\n", __FUNCTION__, process->pid, this->trdid ); return -1; } // build extended pointer on socket.crqq crqq_xp = XPTR( file_cxy , &socket->crqq ); // blocks and deschedules if requests queue empty if( remote_buf_status( crqq_xp ) == 0 ) { thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_IO ); sched_yield( "socket.crqq queue empty"); } // extract first request from the socket.crqq queue remote_buf_get_to_kernel( crqq_xp, (uint8_t *)(&new_sockaddr), sizeof(sockaddr_t) ); new_remote_addr = new_sockaddr.s_addr; new_remote_port = new_sockaddr.s_port; // compute NIC channel index from remote_addr and remote_port uint32_t nic_channel = dev_nic_channel_index( new_remote_addr , new_remote_port ); // update new socket descriptor new_socket->local_addr = hal_remote_l32(XPTR( file_cxy , &socket->local_addr )); new_socket->local_port = hal_remote_l32(XPTR( file_cxy , &socket->local_port )); new_socket->remote_addr = new_remote_addr; new_socket->remote_port = new_remote_port; new_socket->nic_channel = nic_channel; // link new socket to chdev servers socket_link_to_servers( XPTR( new_cxy , new_socket ), nic_channel ); // return success *remote_addr = new_remote_addr; *remote_port = new_remote_port; return new_fdid; } // end dev_nic_accept() //////////////////////////////////////////////////////////////////////////////////////// // This static and blocking function is called by the four functions : // dev_nic_send() / dev_nic_recv() / dev_nic_sendto() / dev_nic_recvfrom(). //////////////////////////////////////////////////////////////////////////////////////// // Implementation note // The behavior is very different for SEND & RECV : // - For a SEND, the client thread checks that there is no TX command registered // in the socket. It registers the command arguments in the socket descriptor // (tx_client, tx_cmd, tx_buf, tx_len). Then the client thread unblocks the // TX server thread from the BLOCKED_CLIENT condition, blocks itself on the // BLOCKED_IO condition, and deschedules. It is unblocked by the TX server thread // when the last byte has been sent (for UDP) or acknowledged (for TCP). // When the client thread resumes, it reset the command in socket, and returns. // - For a RECV, the client thread checks that there is no RX command registered // in the socket. It registers itself in socket (rx_client). It checks the status // of the receive buffer. It the rx_buf is empty, it blocks on the BLOCKED_IO // condition, and deschedules. It is unblocked by the RX server thread when an UDP // packet or TCP segment has been writen in the rx_buf. When it resumes, it moves // the available data from the rx_buf to the user buffer, reset its registration // in socket (reset the rx_buf for an UDP socket), and returns. //////////////////////////////////////////////////////////////////////////////////////// int dev_nic_register_cmd( bool_t is_send, uint32_t fdid, uint8_t * u_buf, uint32_t length, bool_t explicit, uint32_t explicit_addr, uint32_t explicit_port ) { vfs_inode_type_t file_type; // file descriptor type socket_t * socket_ptr; // local pointer on socket descriptor uint32_t socket_state; // current socket state uint32_t socket_type; // socket type (UDP/TCP) uint32_t nic_channel; // NIC channel for this socket xptr_t socket_lock_xp; // extended pointer on socket lock xptr_t file_xp; // extended pointer on file descriptor vfs_file_t * file_ptr; cxy_t file_cxy; xptr_t chdev_xp; // extended pointer on NIC_TX[channel] chdev chdev_t * chdev_ptr; cxy_t chdev_cxy; uint32_t remote_addr; uint32_t remote_port; uint32_t status; // number of bytes in rx_buf int32_t moved_bytes; // total number of moved bytes (fot return) xptr_t server_xp; // extended pointer on NIC_TX / NIC_RX server thread thread_t * server_ptr; // local pointer on NIC_TX / NIC_RX server thread thread_t * this = CURRENT_THREAD; process_t * process = this->process; // get pointers on file descriptor identifying the socket file_xp = process_fd_get_xptr( process , fdid ); file_ptr = GET_PTR( file_xp ); file_cxy = GET_CXY( file_xp ); if( file_xp == XPTR_NULL ) { printk("\n[ERROR] in %s : undefined fdid %d / thread%x,%x]\n", __FUNCTION__, fdid , process->pid, this->trdid ); return -1; } // get file type and socket pointer file_type = hal_remote_l32( XPTR( file_cxy , &file_ptr->type ) ); // get local pointer on socket socket_ptr = hal_remote_lpt( XPTR( file_cxy , &file_ptr->socket ) ); // check file descriptor type if( file_type != INODE_TYPE_SOCK ) { printk("\n[ERROR] in %s : illegal file type %s / fdid %d / thread%x,%x]\n", __FUNCTION__, vfs_inode_type_str(file_type), fdid, process->pid, this->trdid ); return -1; } // build extended pointer on file lock protecting socket socket_lock_xp = XPTR( file_cxy , &file_ptr->lock ); // take the socket lock remote_rwlock_wr_acquire( socket_lock_xp ); // get socket type, state, and channel socket_type = hal_remote_l32( XPTR( file_cxy , &socket_ptr->type )); socket_state = hal_remote_l32( XPTR( file_cxy , &socket_ptr->state )); nic_channel = hal_remote_l32( XPTR( file_cxy , &socket_ptr->nic_channel )); // check socket state / type if( socket_type == SOCK_STREAM ) // TCP socket { if( socket_state != TCP_STATE_ESTAB ) { printk("\n[ERROR] in %s : illegal SEND/RECV for state %s / thread%x,%x]\n", __FUNCTION__, socket_state_str(socket_state), process->pid, this->trdid ); return -1; } if( explicit ) { // get remote IP address and type from socket descriptor remote_addr = hal_remote_l32( XPTR( file_cxy , &socket_ptr->remote_addr )); remote_port = hal_remote_l32( XPTR( file_cxy , &socket_ptr->remote_port )); if( (remote_addr != explicit_addr) || (remote_port != explicit_port) ) { printk("\n[ERROR] in %s : wrong expliciy access / thread%x,%x]\n", __FUNCTION__, process->pid, this->trdid ); return -1; } } } else // UDP socket { if( explicit ) { if( socket_state == UDP_STATE_UNBOUND ) { printk("\n[ERROR] in %s : illegal SEND/RECV for state %s / thread%x,%x]\n", __FUNCTION__, socket_state_str(socket_state), process->pid, this->trdid ); return -1; } // update remote IP address and port into socket descriptor hal_remote_s32( XPTR( file_cxy , &socket_ptr->remote_addr ), explicit_addr ); hal_remote_s32( XPTR( file_cxy , &socket_ptr->remote_port ), explicit_port ); } else { if( socket_state != UDP_STATE_CONNECT ) { printk("\n[ERROR] in %s : illegal SEND/RECV for state %s / thread%x,%x]\n", __FUNCTION__, socket_state_str(socket_state), process->pid, this->trdid ); return -1; } } } /////////////////////////////////////////////////////// if( is_send ) // SEND command { // build extended pointer on socket "tx_client" xptr_t client_xp = XPTR( file_cxy , &socket_ptr->tx_client ); // check no previous SEND command xptr_t client = hal_remote_l64( client_xp ); if( client != XPTR_NULL ) // release socket lock and return error { // release socket lock remote_rwlock_wr_release( socket_lock_xp ); // get previous thread cluster & local pointer cxy_t prev_cxy = GET_CXY( client ); thread_t * prev_ptr = GET_PTR( client ); // get previous command type and trdid uint32_t prev_cmd = hal_remote_l32( XPTR( prev_cxy , &prev_ptr->nic_cmd.type )); uint32_t prev_tid = hal_remote_l32( XPTR( prev_cxy , &prev_ptr->trdid )); printk("\n[ERROR] in %s : previous command %s for thread %x / thread%x,%x]\n", __FUNCTION__, socket_cmd_str(prev_cmd), prev_tid, process->pid, this->trdid ); return -1; } // client thread registers in socket descriptor hal_remote_s64( client_xp , XPTR( local_cxy , this ) ); hal_remote_s32( XPTR( file_cxy , &socket_ptr->tx_cmd ) , SOCKET_TX_SEND ); hal_remote_spt( XPTR( file_cxy , &socket_ptr->tx_buf ) , u_buf ); hal_remote_s32( XPTR( file_cxy , &socket_ptr->tx_len ) , length ); hal_remote_s32( XPTR( file_cxy , &socket_ptr->tx_todo ) , length ); // release socket lock remote_rwlock_wr_release( socket_lock_xp ); // get pointers on relevant chdev chdev_xp = chdev_dir.nic_tx[nic_channel]; chdev_ptr = GET_PTR( chdev_xp ); chdev_cxy = GET_CXY( chdev_xp ); // get pointers on NIC_TX[channel] server thread server_ptr = hal_remote_lpt( XPTR( chdev_cxy , &chdev_ptr->server )); server_xp = XPTR( chdev_cxy , server_ptr ); // unblocks the NIC_TX server thread thread_unblock( server_xp , THREAD_BLOCKED_CLIENT ); // client thread blocks itself and deschedules thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_IO ); sched_yield( "blocked in nic_io" ); // take the socket lock when unblocked remote_rwlock_wr_acquire( socket_lock_xp ); // unlink client thread from socket hal_remote_s64( client_xp , XPTR_NULL ); // release socket lock remote_rwlock_wr_release( socket_lock_xp ); // exit waiting loop and return return length; } // end SEND //////////////////////////////////////////////////////// else // RECV command { // build extended pointers on socket "rx_client" xptr_t client_xp = XPTR( file_cxy , &socket_ptr->rx_client ); // check no previous RECV command xptr_t client = hal_remote_l64( client_xp ); if( client != XPTR_NULL ) // release socket lock and return error { // release socket lock remote_rwlock_wr_release( socket_lock_xp ); // get previous thread cluster & local pointer cxy_t prev_cxy = GET_CXY( client ); thread_t * prev_ptr = GET_PTR( client ); // get previous command type and trdid uint32_t prev_cmd = hal_remote_l32( XPTR( prev_cxy , &prev_ptr->nic_cmd.type )); uint32_t prev_tid = hal_remote_l32( XPTR( prev_cxy , &prev_ptr->trdid )); printk("\n[ERROR] in %s : previous command %s for thread %x / thread%x,%x]\n", __FUNCTION__, socket_cmd_str(prev_cmd), prev_tid, process->pid, this->trdid ); return -1; } // build extended pointer on "rx_buf" xptr_t rx_buf_xp = XPTR( file_cxy , &socket_ptr->rx_buf ); // get rx_buf status from socket status = remote_buf_status( rx_buf_xp ); if( status == 0 ) // rx_buf empty => blocks and deschedules { // release socket lock remote_rwlock_wr_release( socket_lock_xp ); // client thread blocks itself and deschedules thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_IO ); sched_yield( "blocked in nic_io" ); // take socket lock remote_rwlock_wr_release( socket_lock_xp ); } // number of moved bytes cannot be larger than u_buf size moved_bytes = ( length < status ) ? length : status; // move data from kernel rx_buf to user u_buf remote_buf_get_to_user( rx_buf_xp, u_buf, moved_bytes ); // reset rx_buf for an UDP socket if( socket_type == SOCK_DGRAM ) remote_buf_reset( rx_buf_xp ); // unlink client thread from socket hal_remote_s64( client_xp , XPTR_NULL ); // release socket lock remote_rwlock_wr_release( socket_lock_xp ); // exit waiting loop and return return moved_bytes; } // end SEND } // end dev_nic_register_cmd() /////////////////////////////////// int dev_nic_send( uint32_t fdid, uint8_t * u_buf, uint32_t length ) { #if DEBUG_DEV_NIC_TX thread_t * this = CURRENT_THREAD; process_t * process = this->process; trdid_t trdid = this->trdid; pid_t pid = process->pid; uint32_t cycle = (uint32_t)hal_get_cycle(); if (DEBUG_DEV_NIC_TX < cycle ) printk("[%s] thread[%x,%x] enters : fdid %d / buf %x / length %d / cycle %d\n", __FUNCTION__, pid, trdid, fdid, u_buf, length, cycle ); #endif error_t error = dev_nic_register_cmd( true, // SEND fdid, u_buf, length, false, 0, 0 ); // no explicit remote socket #if DEBUG_DEV_NIC_TX cycle = (uint32_t)hal_get_cycle(); if (DEBUG_DEV_NIC_TX < cycle ) printk("[%s] thread[%x,%x] exit : fdid %d / cycle %d\n", __FUNCTION__, pid, trdid, cycle ); #endif return error; } // end dev_nic_send() /////////////////////////////////// int dev_nic_recv( uint32_t fdid, uint8_t * u_buf, uint32_t length ) { #if DEBUG_DEV_NIC_RX thread_t * this = CURRENT_THREAD; process_t * process = this->process; trdid_t trdid = this->trdid; pid_t pid = process->pid; uint32_t cycle = (uint32_t)hal_get_cycle(); if (DEBUG_DEV_NIC_RX < cycle ) printk("[%s] thread[%x,%x] enters : fdid %d / buf %x / length %d / cycle %d\n", __FUNCTION__, pid, trdid, fdid, u_buf, length, cycle ); #endif error_t error = dev_nic_register_cmd( false, // RECV fdid, u_buf, length, false, 0, 0 ); // no explicit remote socket #if DEBUG_DEV_NIC_RX cycle = (uint32_t)hal_get_cycle(); if (DEBUG_DEV_NIC_RX < cycle ) printk("[%s] thread[%x,%x] exit : fdid %d / cycle %d\n", __FUNCTION__, pid, trdid, cycle ); #endif return error; } // end dev_nic_recv() ///////////////////////////////////// int dev_nic_sendto( uint32_t fdid, uint8_t * u_buf, uint32_t length, uint32_t remote_addr, uint32_t remote_port ) { #if DEBUG_DEV_NIC_TX thread_t * this = CURRENT_THREAD; process_t * process = this->process; trdid_t trdid = this->trdid; pid_t pid = process->pid; uint32_t cycle = (uint32_t)hal_get_cycle(); if (DEBUG_DEV_NIC_TX < cycle ) printk("[%s] thread[%x,%x] enters : fdid %d / buf %x / length %d / cycle %d\n", __FUNCTION__, pid, trdid, fdid, u_buf, length, cycle ); #endif error_t error = dev_nic_register_cmd( true, // SEND fdid, u_buf, length, true, // explicit remote socket remote_addr, remote_port ); #if DEBUG_DEV_NIC_TX cycle = (uint32_t)hal_get_cycle(); if (DEBUG_DEV_NIC_TX < cycle ) printk("[%s] thread[%x,%x] exit : fdid %d / cycle %d\n", __FUNCTION__, pid, trdid, cycle ); #endif return error; } // end dev_nic_sendto() /////////////////////////////////////// int dev_nic_recvfrom( uint32_t fdid, uint8_t * u_buf, uint32_t length, uint32_t remote_addr, uint32_t remote_port ) { #if DEBUG_DEV_NIC_RX thread_t * this = CURRENT_THREAD; process_t * process = this->process; trdid_t trdid = this->trdid; pid_t pid = process->pid; uint32_t cycle = (uint32_t)hal_get_cycle(); if (DEBUG_DEV_NIC_RX < cycle ) printk("[%s] thread[%x,%x] enters : fdid %d / buf %x / length %d / cycle %d\n", __FUNCTION__, pid, trdid, fdid, u_buf, length, cycle ); #endif error_t error = dev_nic_register_cmd( false, // RECV fdid, u_buf, length, true, // explicit remote socket remote_addr, remote_port ); #if DEBUG_DEV_NIC_RX cycle = (uint32_t)hal_get_cycle(); if (DEBUG_DEV_NIC_RX < cycle ) printk("[%s] thread[%x,%x] exit : fdid %d / cycle %d\n", __FUNCTION__, pid, trdid, cycle ); #endif return error; } // end dev_nic_recvfrom() /////////////////////////////////////////////////////////////////////////////////////////// // Functions called by the NIC_RX server thread /////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////// // This static function is called by the NIC_RX[channel] server thread to register // a send request defined by the argument in the R2T queue specified by // the argument. ///////////////////////////////////////////////////////////////////////////////////////// // @ queue_xp : [in] extended pointer on the R2T qeue descriptor. // @ flags : [in] flags to be set in the TCP segment. ///////////////////////////////////////////////////////////////////////////////////////// static void dev_nic_rx_put_r2t_request( xptr_t queue_xp, uint32_t flags ) { while( 1 ) { error_t error = remote_buf_put_from_kernel( queue_xp, (uint8_t *)(&flags), 1 ); if( error ) sched_yield( "waiting R2T queue" ); else break; } } // end dev_nic_rx_put_r2t_request() /////////////////////////////////////////////////////////////////////////////////////////// // This static function is called by the dev_nic_rx_server() function. // It calls directly the NIC driver (with the READABLE command) and returns the status // of the NIC_RX queue identified by the argument. // in the buffer. /////////////////////////////////////////////////////////////////////////////////////////// // @ chdev : [in] local pointer on NIC_TX chdev. // @ readable : [out] zero if queue empty. // @ returns 0 if success / returns -1 if failure in accessing NIC device. /////////////////////////////////////////////////////////////////////////////////////////// error_t dev_nic_rx_queue_readable( chdev_t * chdev, uint32_t * readable ) { thread_t * this = CURRENT_THREAD; // initialize NIC_READABLE command in thread descriptor this->nic_cmd.dev_xp = XPTR( local_cxy , chdev ); this->nic_cmd.type = NIC_CMD_READABLE; // call driver to test readable chdev->cmd( XPTR( local_cxy , this ) ); // return status *readable = this->nic_cmd.status; // return error return this->nic_cmd.error; } /////////////////////////////////////////////////////////////////////////////////////////// // This static function is called by the dev_nic_rx_server() function. // It moves one Ethernet packet from the NIC_RX_QUEUE identified the argument, // to the 2K bytes kernel buffer identified by the argument. The actual // Ethernet packet length is returned in the argument. // It calls directly the NIC driver with the READ command, without registering in the // waiting queue, because only the NIC_RX server thread can access this NIC_RX_QUEUE. /////////////////////////////////////////////////////////////////////////////////////////// // @ chdev : [in] local pointer on NIC_TX chdev. // @ buffer : [in] local pointer on destination kernel buffer. // @ length : [out] Ethernet packet size in bytes. // @ returns 0 if success / returns -1 if failure in accessing NIC device. /////////////////////////////////////////////////////////////////////////////////////////// error_t dev_nic_rx_move_packet( chdev_t * chdev, uint8_t * k_buf, uint32_t * length ) { thread_t * this = CURRENT_THREAD; #if DEBUG_DEV_NIC_RX uint32_t cycle = (uint32_t)hal_get_cycles(); if( DEBUG_DEV_NIC_RX < cycle ) printk("\n[%s] thread[%x,%x] enters / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, cycle ); #endif // initialize NIC_READ command in thread descriptor this->nic_cmd.type = NIC_CMD_READ; this->nic_cmd.buffer = k_buf; // call NIC driver chdev->cmd( XPTR( local_cxy , this ) ); // returns packet length *length = this->nic_cmd.length; // check error if( this->nic_cmd.error ) { #if DEBUG_DEV_NIC_RX cycle = (uint32_t)hal_get_cycles(); if( DEBUG_DEV_NIC_RX < cycle ) printk("\n[%s] thread[%x,%x] exit / ERROR in NIC_RX / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, cycle ); #endif return -1; } else { #if DEBUG_DEV_NIC_RX cycle = (uint32_t)hal_get_cycles(); if( DEBUG_DEV_NIC_RX < cycle ) printk("\n[%s] thread[%x,%x] exit / SUCCESS / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, cycle ); #endif return 0; } } // end dev_nic_rx_move_packet() /////////////////////////////////////////////////////////////////////////////////////////// // This static function is called by the dev_nic_rx_server() function. // It analyses an Ethernet frame contained in the kernel buffer defined // by the argument, and returns in the argument the length // of the IP packet contained in the Ethernet packet payload. /////////////////////////////////////////////////////////////////////////////////////////// // @ buffer : [in] pointer on a received Ethernet packet // @ ip_length : [out] length of IP packet (in bytes). // @ return 0 if success / return -1 if illegal packet length. /////////////////////////////////////////////////////////////////////////////////////////// static error_t dev_nic_rx_check_eth( uint8_t * buffer, uint32_t * ip_length ) { uint32_t length = ((uint32_t)buffer[12] << 8) | (uint32_t)buffer[13]; *ip_length = length; return 0; } /////////////////////////////////////////////////////////////////////////////////////////// // This static function analyses the IP packet contained in the kernel buffer // defined by the argument, and returns in the , , // and arguments the informations contained in the IP header. // It checks the IP packet length versus the value contained in Ethernet header. // It checks the IP header checksum. /////////////////////////////////////////////////////////////////////////////////////////// // @ buffer : [in] pointer on the IP packet. // @ expected_length : [in] expected IP packet length (from Ethernet header). // @ ip_src_addr : [out] source IP address. // @ ip_dst_addr : [out] destination IP address. // @ protocol : [out] transport protocol type. // @ return 0 if success / return -1 if illegal packet. /////////////////////////////////////////////////////////////////////////////////////////// static error_t dev_nic_rx_check_ip( uint8_t * buffer, uint32_t expected_length, uint32_t * ip_src_addr, uint32_t * ip_dst_addr, uint32_t * trsp_protocol ) { uint32_t length = ((uint32_t)buffer[2] << 8) | (uint32_t)buffer[3]; // discard packet if eth_length != ip_length if( length != expected_length ) { #if DEBUG_NIC_DEV thread_t * this = CURRENT_THREAD; printk("\n[%s] thread[%x,%x] enters : length (%d) != expected_length (%d)\n", __FUNCTION__, this->process->pid, this->trdid, length, expected_length ); #endif return -1; } // compute IP header checksum uint32_t received_cs = (uint32_t)dev_nic_ip_checksum( buffer ); // extract IP header checksum uint32_t computed_cs = ((uint32_t)buffer[10] << 8) | ((uint32_t)buffer[11]); // discard packet if bad checksum if( received_cs != computed_cs ) { #if DEBUG_NIC_DEV thread_t * this = CURRENT_THREAD; printk("\n[%s] thread[%x,%x] computed checksum (%d) != received checksum (%d)\n", __FUNCTION__, this->process->pid, this->trdid, computed_cs, received_cs ); #endif return -1; } *ip_src_addr = ((uint32_t)buffer[12] << 24) | ((uint32_t)buffer[13] << 16) | ((uint32_t)buffer[14] << 8) | ((uint32_t)buffer[15] ) ; *ip_dst_addr = ((uint32_t)buffer[16] << 24) | ((uint32_t)buffer[17] << 16) | ((uint32_t)buffer[18] << 8) | ((uint32_t)buffer[19] ) ; *trsp_protocol = (uint32_t)buffer[9]; return 0; } /////////////////////////////////////////////////////////////////////////////////////////// // This static function analyses the UDP packet contained in the kernel buffer // defined by the and arguments. // It checks the UDP checksum, and discard corrupted packets. // It scans the list of sockets attached to the NIC_RX chdev to find a matching socket, // and discard the received packet if no UDP socket found. // Finally, it copies the payload to the socket "rx_buf", as long as the packet payload // is not larger than the rx_buf. // It set the "rx_valid" flip-flop, and unblock the client thread when the last expected // byte has been received. /////////////////////////////////////////////////////////////////////////////////////////// // @ chdev : [in] local pointer on local NIC_RX chdev descriptor. // @ k_buf : [in] pointer on the UDP packet in local kernel buffer. // @ k_length : [in] number of bytes in buffer (including UDP header). // @ pkt_src_addr : [in] source IP address (from IP packet header). // @ pkt_dst_addr : [in] destination IP address (from IP packet header). /////////////////////////////////////////////////////////////////////////////////////////// static void dev_nic_rx_handle_udp_packet( chdev_t * chdev, uint8_t * k_buf, uint32_t k_length, uint32_t pkt_src_addr, uint32_t pkt_dst_addr ) { xptr_t root_xp; // extended pointer on attached sockets list root xptr_t lock_xp; // extended pointer on chdev lock xptr_t iter_xp; // iterator on socket list xptr_t socket_xp; // extended pointer on socket descriptor cxy_t socket_cxy; socket_t * socket_ptr; uint32_t socket_type; // socket type uint32_t socket_state; // socket state uint32_t local_addr; // local IP address from socket uint32_t local_port; // local port from socket uint32_t remote_addr; // remote IP address from socket uint32_t remote_port; // remote port from socket bool_t match_socket; // matching socket found uint16_t checksum; // computed checksum uint16_t pkt_checksum; // received checksum xptr_t socket_rbuf_xp; // extended pointer on socket rx_buf xptr_t socket_lock_xp; // extended pointer on socket lock xptr_t socket_client_xp; // extended pointer on socket rx_client field xptr_t client_xp; // extended pointer on client thread descriptor uint32_t payload; // number of bytes in payload uint32_t status; // number of bytes in rx_buf uint32_t space; // number of free slots in rx_buf uint32_t moved_bytes; // number of bytes actually moved to rx_buf // build extended pointers on list of sockets attached to NIC_RX chdev root_xp = XPTR( local_cxy , &chdev->wait_root ); lock_xp = XPTR( local_cxy , &chdev->wait_lock ); // compute UDP packet checksum checksum = dev_nic_udp_checksum( k_buf , k_length ); // get checksum from received packet header pkt_checksum = ((uint16_t)k_buf[6] << 8) | (uint16_t)k_buf[7]; // discard corrupted packet if( pkt_checksum != checksum ) return; // get src_port and dst_port from UDP header uint32_t pkt_src_port = ((uint32_t)k_buf[0] << 8) | (uint32_t)k_buf[1]; uint32_t pkt_dst_port = ((uint32_t)k_buf[2] << 8) | (uint32_t)k_buf[3]; // discard unexpected packet if( xlist_is_empty( root_xp ) ) return; // take the tock protecting the sockets list remote_busylock_acquire( lock_xp ); match_socket = false; // scan sockets list to find a match XLIST_FOREACH( root_xp , iter_xp ) { // get socket cluster and local pointer socket_xp = XLIST_ELEMENT( iter_xp , socket_t , rx_list ); socket_ptr = GET_PTR( socket_xp ); socket_cxy = GET_CXY( socket_xp ); // get socket type socket_type = hal_remote_l32(XPTR(socket_cxy , &socket_ptr->type )); socket_state = hal_remote_l32(XPTR(socket_cxy , &socket_ptr->state )); // skip TCP socket if( socket_type == SOCK_STREAM ) continue; // get relevant info from socket descriptor local_addr = hal_remote_l32(XPTR(socket_cxy , &socket_ptr->local_addr )); remote_addr = hal_remote_l32(XPTR(socket_cxy , &socket_ptr->remote_addr )); local_port = hal_remote_l32(XPTR(socket_cxy , &socket_ptr->local_port )); remote_port = hal_remote_l32(XPTR(socket_cxy , &socket_ptr->remote_port )); // compute matching bool_t local_match = (local_addr == pkt_dst_addr) && (local_port == pkt_dst_port); bool_t remote_match = (remote_addr == pkt_src_addr) && (remote_port == pkt_src_port); if (socket_state == UDP_STATE_CONNECT ) match_socket = local_match && remote_match; else match_socket = local_match; // exit loop when socket found if( match_socket ) break; } // release the lock protecting the sockets list remote_busylock_release( lock_xp ); // discard unexpected packet if( match_socket == false ) return; // build extended pointers on various socket fields socket_rbuf_xp = XPTR( socket_cxy , &socket_ptr->rx_buf ); socket_lock_xp = XPTR( socket_cxy , &socket_ptr->lock ); socket_client_xp = XPTR( socket_cxy , &socket_ptr->rx_client ); // take the lock protecting the socket remote_rwlock_wr_acquire( socket_lock_xp ); // get status & space from rx_buf status = remote_buf_status( socket_rbuf_xp ); space = NIC_RX_BUF_SIZE - status; // get client thread client_xp = hal_remote_l64( socket_client_xp ); // get number of bytes in payload payload = k_length - UDP_HEAD_LEN; // compute number of bytes to move : min (space , seg_payload) moved_bytes = ( space < payload ) ? space : payload; // move payload from kernel buffer to socket rx_buf remote_buf_put_from_kernel( socket_rbuf_xp, k_buf + UDP_HEAD_LEN, moved_bytes ); // unblock client thread if registered if( client_xp != XPTR_NULL ) { thread_unblock( client_xp , THREAD_BLOCKED_IO ); } // release the lock protecting the socket remote_rwlock_wr_release( socket_lock_xp ); } // end dev_nic_rx_handle_udp_packet() /////////////////////////////////////////////////////////////////////////////////////////// // This static function is called by the dev_nic_rx_server() function to handle one RX // TCP segment contained in a kernel buffer defined by the & arguments. // It the received segment doesn't match an existing local socket, or is corrupted, // this faulty segment is discarded. /////////////////////////////////////////////////////////////////////////////////////////// // Implementation note: // 1) It checks the TCP checksum, and discard the corrupted segment. // 2) It scans the list of sockets attached to the RX chdev, to find the socket // matching the TCP segment header, and discards the segment if no socket found. // 3) When a socket has been found, it takes the lock protecting the socket state, // because the socket is accessed by both the NIC_TX and NIC_RX server threads. // 4) Depending on the socket state, it handle the received segment, including the // SYN, FIN, ACK and RST flags. It updates the socket state when required, moves // data to the rx_buf when possible, and registers requests to the TX server // thread in the R2T queue attached to the socket, to insert control flags in the // TX stream, as required. // 5) Finally, it releases the lock protecting the socke and returns. /////////////////////////////////////////////////////////////////////////////////////////// // @ chdev : [in] local pointer on local NIC_RX chdev descriptor. // @ k_buf : [in] pointer on the TCP packet in local kernel buffer. // @ k_length : [in] number of bytes in buffer (including TCP header). // @ seg_src_addr : [in] source IP address (from IP packet header). // @ seg_dst_addr : [in] destination IP address (from IP packet header). /////////////////////////////////////////////////////////////////////////////////////////// static void dev_nic_rx_handle_tcp_segment( chdev_t * chdev, uint8_t * k_buf, uint32_t k_length, uint32_t seg_src_addr, uint32_t seg_dst_addr ) { xptr_t root_xp; // extended pointer on attached sockets list root xptr_t lock_xp; // extended pointer on chdev lock xptr_t iter_xp; // iterator for these queues bool_t match_socket; // true if socket found xptr_t socket_xp; // extended pointer on matching socket descriptor cxy_t socket_cxy; socket_t * socket_ptr; uint32_t local_addr; // local IP address from socket uint32_t local_port; // local port from socket uint32_t remote_addr; // remote IP address from socket uint32_t remote_port; // remote port from socket uint32_t socket_state; // socket state uint32_t socket_type; // socket type uint32_t socket_tx_nxt; // next byte to send in TX stream uint32_t socket_tx_una; // first unacknowledged byte in TX stream uint32_t socket_rx_nxt; // next expected byte in RX stream uint32_t socket_rx_wnd; // current window value in RX stream xptr_t socket_lock_xp; // extended pointer on lock protecting socket state xptr_t socket_rx_buf_xp; // extended pointer on socket rx_buf xptr_t socket_r2tq_xp; // extended pointer on socket r2t queue xptr_t socket_client_xp; // extended pointer on socket rx_client thread uint16_t checksum; // computed TCP segment chechsum // build extended pointer on xlist of all sockets attached to NIC_RX chdev root_xp = XPTR( local_cxy , &chdev->wait_root ); lock_xp = XPTR( local_cxy , &chdev->wait_lock ); // get relevant infos from TCP segment header uint32_t seg_src_port = ((uint32_t)k_buf[0] << 8) | (uint32_t)k_buf[1]; uint32_t seg_dst_port = ((uint32_t)k_buf[2] << 8) | (uint32_t)k_buf[3]; uint32_t seg_seq_num = ((uint32_t)k_buf[4] << 24) | ((uint32_t)k_buf[5] << 16) | ((uint32_t)k_buf[6] << 8) | ((uint32_t)k_buf[7] ); uint32_t seg_ack_num = ((uint32_t)k_buf[8] << 24) | ((uint32_t)k_buf[9] << 16) | ((uint32_t)k_buf[10] << 8) | ((uint32_t)k_buf[11] ); uint8_t seg_hlen = k_buf[12] >> 2; // TCP header length in bytes uint8_t seg_flags = k_buf[13]; bool_t seg_ack_set = ((seg_flags & TCP_FLAG_ACK) != 0); bool_t seg_syn_set = ((seg_flags & TCP_FLAG_SYN) != 0); bool_t seg_fin_set = ((seg_flags & TCP_FLAG_FIN) != 0); bool_t seg_rst_set = ((seg_flags & TCP_FLAG_RST) != 0); uint16_t seg_window = ((uint32_t)k_buf[14] << 8) | (uint32_t)k_buf[15]; uint16_t seg_checksum = ((uint32_t)k_buf[16] << 8) | (uint32_t)k_buf[17]; uint32_t seg_payload = k_length - seg_hlen; // number of bytes in payload // 1. compute TCP checksum checksum = dev_nic_tcp_checksum( k_buf, k_length, seg_src_addr, seg_dst_addr ); // discard segment if corrupted if( seg_checksum != checksum ) return; match_socket = false; // take the lock protecting the list of sockets remote_busylock_acquire( lock_xp ); // 2. scan list of sockets to find a matching socket XLIST_FOREACH( root_xp , iter_xp ) { // get socket cluster and local pointer socket_xp = XLIST_ELEMENT( iter_xp , socket_t , rx_list ); socket_ptr = GET_PTR( socket_xp ); socket_cxy = GET_CXY( socket_xp ); // get socket type and state socket_type = hal_remote_l32(XPTR(socket_cxy , &socket_ptr->type )); socket_state = hal_remote_l32(XPTR(socket_cxy , &socket_ptr->state )); // skip UDP socket if( socket_type == SOCK_DGRAM ) continue; // get relevant socket infos for matching local_addr = hal_remote_l32(XPTR(socket_cxy , &socket_ptr->local_addr )); remote_addr = hal_remote_l32(XPTR(socket_cxy , &socket_ptr->remote_addr )); local_port = hal_remote_l32(XPTR(socket_cxy , &socket_ptr->local_port )); remote_port = hal_remote_l32(XPTR(socket_cxy , &socket_ptr->remote_port )); // compute matching condition // (in LISTEN state, remote_port and remote_addr can be unspecified) if( socket_state == TCP_STATE_LISTEN ) { match_socket = (local_addr == seg_dst_addr) && (local_port == seg_dst_port) ; } else { match_socket = (local_addr == seg_dst_addr) && (local_port == seg_dst_port) && (remote_addr == seg_src_addr) && (remote_port == seg_src_port) ; } // exit loop if matching if( match_socket ) break; } // end loop on sockets // release the lock protecting the list of sockets remote_busylock_release( lock_xp ); // discard segment if no matching socket found if( match_socket == false ) return; // From here the actions depend on both the socket state, // and the received segment flags // - update socket state, // - move data to rx_buf, // - make a R2T request when required // build extended pointers on various socket fields socket_lock_xp = XPTR( socket_cxy , &socket_ptr->lock ); socket_rx_buf_xp = XPTR( socket_cxy , &socket_ptr->rx_buf ); socket_r2tq_xp = XPTR( socket_cxy , &socket_ptr->r2tq ); socket_client_xp = XPTR( socket_cxy , &socket_ptr->rx_client ); // 3. take the lock protecting the matching socket remote_rwlock_wr_acquire( socket_lock_xp ); // get relevant socket infos from socket descriptor socket_state = hal_remote_l32(XPTR( socket_cxy , &socket_ptr->state )); socket_rx_nxt = hal_remote_l32(XPTR( socket_cxy , &socket_ptr->rx_nxt )); socket_rx_wnd = hal_remote_l32(XPTR( socket_cxy , &socket_ptr->rx_wnd )); socket_tx_una = hal_remote_l32(XPTR( socket_cxy , &socket_ptr->tx_una )); socket_tx_nxt = hal_remote_l32(XPTR( socket_cxy , &socket_ptr->tx_nxt )); switch( socket_state ) { ////////////////////// case TCP_STATE_LISTEN: { // [1] discard segment if RST flag if( seg_rst_set ) return; // [2] send a RST & discard segment if ACK flag if( seg_ack_set ) { // set socket.tx_nxt to seg_ack_num hal_remote_s32( XPTR( socket_cxy , &socket_ptr->tx_nxt ), seg_ack_num ); // make RST request to R2T queue dev_nic_rx_put_r2t_request( socket_r2tq_xp, TCP_FLAG_RST ); // discard segment break; } // [3] handle SYN flag if( seg_syn_set ) { // set socket.rx_nxt to seg_seq_num + 1 hal_remote_s32( XPTR( socket_cxy , &socket_ptr->rx_nxt ), seg_seq_num + 1 ); // set socket.tx_nxt to ISS hal_remote_s32( XPTR( socket_cxy , &socket_ptr->tx_nxt ), TCP_ISS ); // set socket.rx_irs to seg_seq_num hal_remote_s32( XPTR( socket_cxy , &socket_ptr->rx_irs ), seg_seq_num + 1 ); // make SYN.ACK request to R2T queue dev_nic_rx_put_r2t_request( socket_r2tq_xp, TCP_FLAG_SYN | TCP_FLAG_ACK ); // set socket.tx_nxt to ISS + 1 hal_remote_s32( XPTR( socket_cxy , &socket_ptr->tx_nxt ), TCP_ISS + 1 ); // set socket.tx_una to ISS hal_remote_s32( XPTR( socket_cxy , &socket_ptr->tx_una ), TCP_ISS ); // update socket.state hal_remote_s32( XPTR( socket_cxy , &socket_ptr->state ), TCP_STATE_SYN_RCVD ); // update socket.remote_addr hal_remote_s32( XPTR( socket_cxy , &socket_ptr->remote_addr ), seg_src_addr ); // update socket.remote_port hal_remote_s32( XPTR( socket_cxy , &socket_ptr->remote_port ), seg_src_port ); } break; } //////////////////////// case TCP_STATE_SYN_SENT: { // [1] check ACK flag if( seg_ack_set ) { if( seg_ack_num != TCP_ISS + 1 ) // ACK not acceptable { // discard segment if RST if( seg_rst_set ) break; // set socket.tx_nxt to seg_ack_num hal_remote_s32( XPTR( socket_cxy , &socket_ptr->tx_nxt ), seg_ack_num ); // make an RST request to R2T queue dev_nic_rx_put_r2t_request( socket_r2tq_xp, TCP_FLAG_RST ); // discard segment break; } } // [2] check RST flag if( seg_rst_set ) { // TODO signal "error: connection reset" to user // update socket.state hal_remote_s32( XPTR( socket_cxy , &socket_ptr->state ), TCP_STATE_BOUND ); // discard segment break; } // [3] handle SYN flag when (no ACK or acceptable ACK, and no RST) if( seg_syn_set ) { // TODO Ne faut-il pas tester seg_seq_num ? if( seg_ack_set ) // received both SYN and ACK { // set socket.rx_nxt to seg_seq_num + 1 hal_remote_s32( XPTR( socket_cxy , &socket_ptr->rx_nxt ), seg_seq_num + 1 ); // set socket.tx_una to seg_ack_num hal_remote_s32( XPTR( socket_cxy , &socket_ptr->tx_una ), seg_ack_num ); // set socket.rx_irs to seg_seq_num hal_remote_s32( XPTR( socket_cxy , &socket_ptr->rx_irs ), seg_seq_num ); // update socket.state hal_remote_s32( XPTR( socket_cxy , &socket_ptr->state ), TCP_STATE_ESTAB ); // make an ACK request to R2T queue dev_nic_rx_put_r2t_request( socket_r2tq_xp, TCP_FLAG_ACK ); } else // received SYN without ACK { // update socket.state hal_remote_s32( XPTR( socket_cxy , &socket_ptr->state ), TCP_STATE_SYN_RCVD ); // set socket.tx_nxt to ISS hal_remote_s32( XPTR( socket_cxy , &socket_ptr->tx_nxt ), TCP_ISS ); // make a SYN.ACK request to R2T queue dev_nic_rx_put_r2t_request( socket_r2tq_xp, TCP_FLAG_SYN | TCP_FLAG_ACK ); } } break; } //////////////////////// case TCP_STATE_SYN_RCVD: case TCP_STATE_ESTAB: case TCP_STATE_FIN_WAIT1: case TCP_STATE_FIN_WAIT2: case TCP_STATE_CLOSE_WAIT: case TCP_STATE_CLOSING: case TCP_STATE_LAST_ACK: case TCP_STATE_TIME_WAIT: { // [1] check sequence number // compute min & max acceptable sequence numbers uint32_t seq_min = socket_rx_nxt; uint32_t seq_max = socket_rx_nxt + socket_rx_wnd - 1; // compute sequence number for last byte in segment uint32_t seg_seq_last = seg_seq_num + seg_payload - 1; if( (seg_seq_num != socket_rx_nxt) || // out_of_order (is_in_window( seg_seq_last, seq_min, seq_max ) == false) ) // out_of_window { // discard segment return; } // [2] handle RST flag if( seg_rst_set ) { if( socket_state == TCP_STATE_SYN_RCVD ) { // TODO unblock all clients threads with "reset" responses } else if( (socket_state == TCP_STATE_ESTAB ) || (socket_state == TCP_STATE_FIN_WAIT1 ) || (socket_state == TCP_STATE_FIN_WAIT2 ) || (socket_state == TCP_STATE_CLOSE_WAIT) ) { // TODO all pending send & received commands // must receive "reset" responses // TODO destroy the socket } else // all other states { } // [3] handle security & precedence TODO ... someday // [4] handle SYN flag if( seg_syn_set ) // received SYN { // TODO signal error to user // make an RST request to R2T queue dev_nic_rx_put_r2t_request( socket_r2tq_xp, TCP_FLAG_RST ); // update socket state hal_remote_s32( XPTR( socket_cxy , &socket_ptr->state ), TCP_STATE_BOUND ); } // [5] handle ACK flag if( seg_ack_set == false ) { // discard segment when ACK not set break; } else if( socket_state == TCP_STATE_SYN_RCVD ) { if( is_in_window( seg_ack_num , socket_tx_una , socket_tx_nxt ) ) { // update socket.state to ESTAB hal_remote_s32( XPTR( socket_cxy , &socket_ptr->state ), TCP_STATE_ESTAB ); } else // unacceptable ACK { // set socket.tx_nxt to seg_ack_num hal_remote_s32( XPTR( socket_cxy , &socket_ptr->rx_nxt ), seg_ack_num ); // make an RST request to R2T queue dev_nic_rx_put_r2t_request( socket_r2tq_xp, TCP_FLAG_RST ); } } else if( (socket_state == TCP_STATE_ESTAB) || (socket_state == TCP_STATE_FIN_WAIT1) || (socket_state == TCP_STATE_FIN_WAIT1) || (socket_state == TCP_STATE_CLOSE_WAIT) || (socket_state == TCP_STATE_CLOSING) ) { if( is_in_window( seg_ack_num + 1 , socket_tx_una , socket_tx_nxt ) ) { // update socket.tx_una hal_remote_s32( XPTR( socket_cxy , &socket_ptr->tx_una ), seg_ack_num ); // update socket.tx_wnd hal_remote_s32( XPTR( socket_cxy , &socket_ptr->tx_wnd ), seg_window ); } else // unacceptable ACK { // discard segment break; } // specific for FIN_WAIT1 if( socket_state == TCP_STATE_FIN_WAIT1 ) { if( seg_fin_set ) { // update socket.state hal_remote_s32( XPTR( socket_cxy , &socket_ptr->state ), TCP_STATE_FIN_WAIT2 ); } } // specific for CLOSING if( socket_state == TCP_STATE_CLOSING ) { if( seg_ack_set ) { // update socket.state hal_remote_s32( XPTR( socket_cxy , &socket_ptr->state ), TCP_STATE_TIME_WAIT ); } else { // discard segment break; } } } else if( socket_state == TCP_STATE_LAST_ACK ) { if( seg_ack_set ) { // update socket.state hal_remote_s32( XPTR( socket_cxy , &socket_ptr->state ), TCP_STATE_TIME_WAIT ); } } // [6] handle URG flag TODO ... someday // [7] Move DATA to rx_buf and unblock client thread if( seg_payload ) { if( (socket_state == TCP_STATE_ESTAB) || (socket_state == TCP_STATE_FIN_WAIT1) || (socket_state == TCP_STATE_FIN_WAIT2) ) { // get number of bytes already stored in rx_buf uint32_t status = remote_buf_status( socket_rx_buf_xp ); // compute empty space in rx_buf uint32_t space = NIC_RX_BUF_SIZE - status; // compute number of bytes to move : min (space , seg_payload) uint32_t nbytes = ( space < seg_payload ) ? space : seg_payload; // move payload from k_buf to rx_buf remote_buf_put_from_kernel( socket_rx_buf_xp, k_buf + seg_hlen, nbytes ); // update socket.rx_nxt hal_remote_s32( XPTR( socket_cxy , &socket_ptr->rx_nxt ), socket_rx_nxt + nbytes ); // update socket.rx_wnd hal_remote_s32( XPTR( socket_cxy , &socket_ptr->rx_wnd ), socket_rx_wnd - nbytes ); // make an ACK request to R2T queue dev_nic_rx_put_r2t_request( socket_r2tq_xp, TCP_FLAG_ACK ); // get extended pointer on rx_client thread xptr_t client_xp = hal_remote_l64( socket_client_xp ); // unblock client thread if( client_xp != XPTR_NULL ) { thread_unblock( client_xp , THREAD_BLOCKED_IO ); } } } // [8] handle FIN flag if( seg_fin_set ) { if( (socket_state == TCP_STATE_UNBOUND) || (socket_state == TCP_STATE_BOUND) || (socket_state == TCP_STATE_LISTEN) || (socket_state == TCP_STATE_SYN_SENT) ) { // discard segment break; } else // all other states { // TODO signal "connection closing" // make an ACK request to R2T queue dev_nic_rx_put_r2t_request( socket_r2tq_xp, TCP_FLAG_ACK ); // increment socket.rx_nxt hal_remote_s32( XPTR( socket_cxy , &socket_ptr->rx_nxt ), socket_rx_nxt + 1 ); if( (socket_state == TCP_STATE_SYN_RCVD) || (socket_state == TCP_STATE_ESTAB) ) { // update socket.state hal_remote_s32( XPTR( socket_cxy , &socket_ptr->state ), TCP_STATE_TIME_WAIT ); } else if( socket_state == TCP_STATE_FIN_WAIT1 ) { if( seg_ack_set ) { // TODO start "time-wait" timer / turn off others timers // update socket.state hal_remote_s32( XPTR( socket_cxy , &socket_ptr->state ), TCP_STATE_TIME_WAIT ); } else { // update socket.state hal_remote_s32( XPTR( socket_cxy , &socket_ptr->state ), TCP_STATE_CLOSING ); } } else if( socket_state == TCP_STATE_FIN_WAIT2 ) { // TODO start "time-wait" timer / turn off other timers // update socket.state hal_remote_s32( XPTR( socket_cxy , &socket_ptr->state ), TCP_STATE_TIME_WAIT ); } else if( socket_state == TCP_STATE_TIME_WAIT ) { // TODO restart "time_wait" timer } } } // end if FIN } // end case sockets synchronized } // end switch socket state // release the lock protecting socket remote_rwlock_wr_acquire( socket_lock_xp ); } // end socket found } // end dev_nic_rx_handle_tcp_segment() ///////////////////////////////////////// void dev_nic_rx_server( chdev_t * chdev ) { uint8_t k_buf[2048]; // kernel buffer for one ETH/IP/UDP packet uint32_t pkt_src_addr; // packet source IP address uint32_t pkt_dst_addr; // packet destination IP address uint32_t trsp_protocol; // transport protocol (TCP / UDP) uint32_t eth_length; // size of Ethernet packet (bytes) uint32_t ip_length; // size of IP packet in bytes uint32_t nic_queue_readable; // NIC_RX queue non empty when true error_t error; thread_t * this = CURRENT_THREAD; // check chdev direction and type assert( (chdev->func == DEV_FUNC_NIC) && (chdev->is_rx == true) , "illegal chdev type or direction" ); // check thread can yield assert( (this->busylocks == 0), "cannot yield : busylocks = %d\n", this->busylocks ); while( 1 ) { // check NIC_RX_QUEUE readable error = dev_nic_rx_queue_readable( chdev, &nic_queue_readable ); if( error ) { printk("\n[PANIC] in %s : cannot access NIC_TX[%d] queue\n", __FUNCTION__, chdev->channel ); } if( nic_queue_readable ) // NIC_TX_QUEUE non empty { // moves one Ethernet packet to kernel buffer error = dev_nic_rx_move_packet( chdev, k_buf, ð_length ); if( error ) { printk("\n[PANIC] in %s : cannot read the NIC_TX[%d] queue\n", __FUNCTION__, chdev->channel ); } // analyse the ETH header error = dev_nic_rx_check_eth( k_buf, &ip_length ); // discard packet if error reported by Ethernet layer if( error ) continue; // analyse the IP header error = dev_nic_rx_check_ip( k_buf + ETH_HEAD_LEN, ip_length, &pkt_src_addr, &pkt_dst_addr, &trsp_protocol ); // discard packet if error reported by IP layer if( error ) continue; // call relevant transport protocol if( trsp_protocol == PROTOCOL_UDP ) { dev_nic_rx_handle_udp_packet( chdev, k_buf + ETH_HEAD_LEN + IP_HEAD_LEN, ip_length - IP_HEAD_LEN, pkt_src_addr, pkt_dst_addr ); } else if ( trsp_protocol == PROTOCOL_TCP) { dev_nic_rx_handle_tcp_segment( chdev, k_buf + ETH_HEAD_LEN + IP_HEAD_LEN, ip_length - IP_HEAD_LEN, pkt_src_addr, pkt_dst_addr ); } } else // block and deschedule if NIC_RX_QUEUE empty { thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_ISR ); sched_yield( "waiting RX client" ); } } // end of while loop } // end dev_nic_rx_server() /////////////////////////////////////////////////////////////////////////////////////////// // Functions used by the NIC_TX server thread /////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////// // These static functions are called by the NIC_TX server thread to report the // completion (success or error) of a TX command. // - it print an error message in case of error. // - it updates the "tx_error" field in socket descriptor. // - it unblocks the client thread. /////////////////////////////////////////////////////////////////////////////////////////// // @ socket_xp : [in] extended pointer on socket // @ cmd_type : [in] SOCKET_TX_CONNECT / SOCKET_TX_SEND / SOCKET_TX_CLOSE // @ socket_state : [in] current socket state /////////////////////////////////////////////////////////////////////////////////////////// static void dev_nic_tx_report_error( xptr_t socket_xp, uint32_t cmd_type, uint32_t socket_state ) { printk("\n[ERROR] in %s : command %s in %s state\n", __FUNCTION__, socket_cmd_str(cmd_type), socket_state_str(socket_state) ); // get socket thread cluster and local pointer socket_t * socket_ptr = GET_PTR( socket_xp ); cxy_t socket_cxy = GET_CXY( socket_xp ); // set tx_error field in socket descriptor hal_remote_s32( XPTR( socket_cxy , &socket_ptr->tx_error ) , 1 ); // get extended point on client thread xptr_t client_xp = hal_remote_l64( XPTR( socket_cxy , &socket_ptr->tx_client )); // unblock the client thread thread_unblock( client_xp , THREAD_BLOCKED_IO ); } //////////////////////////////////////////////////////////// static void dev_nic_tx_report_success( xptr_t socket_xp ) { // get socket thread cluster and local pointer socket_t * socket_ptr = GET_PTR( socket_xp ); cxy_t socket_cxy = GET_CXY( socket_xp ); // set tx_error field in socket descriptor hal_remote_s32( XPTR( socket_cxy , &socket_ptr->tx_error ) , 0 ); // get extended point on client thread xptr_t client_xp = hal_remote_l64( XPTR( socket_cxy , &socket_ptr->tx_client )); // unblock the client thread thread_unblock( client_xp , THREAD_BLOCKED_IO ); } /////////////////////////////////////////////////////////////////////////////////////////// // This static function is called by the dev_nic_tx_server() function. // It calls directly the NIC driver (WRITABLE command) and returns the status // of the NIC_TX queue identified by the argument. // in the buffer. /////////////////////////////////////////////////////////////////////////////////////////// // @ chdev : [in] local pointer on NIC_TX chdev. // @ length : [in] packet length in bytes. // @ writable : [out] zero if queue full. // @ returns 0 if success / returns -1 if failure in accessing NIC device. /////////////////////////////////////////////////////////////////////////////////////////// error_t dev_nic_tx_queue_writable( chdev_t * chdev, uint32_t length, uint32_t * writable ) { thread_t * this = CURRENT_THREAD; // initialize READABLE command in thread descriptor this->nic_cmd.dev_xp = XPTR( local_cxy , chdev ); this->nic_cmd.type = NIC_CMD_WRITABLE; this->nic_cmd.length = length; // call driver to test writable chdev->cmd( XPTR( local_cxy , this ) ); // return status *writable = this->nic_cmd.status; // return error return this->nic_cmd.error; } // end dev_nic_tx_queue_writable /////////////////////////////////////////////////////////////////////////////////////////// // This static function is called by the dev_nic_tx_server() function. // It moves one ETH/IP/UDP packet from the kernel buffer identified by the and // arguments to the NIC_TX_QUEUE identified the argument. // It calls directly the NIC driver, without registering in a waiting queue, because // only this NIC_TX server thread can access this NIC_TX_QUEUE. // 1) It checks NIC_TX_QUEUE status in a while loop, using the NIC_CMD_WRITABLE command. // As long as the queue is not writable, it blocks and deschedules. It is re-activated // by the NIC-TX ISR as soon as the queue changes status. // 2) When the queue is writable, it put the ETH/IP/UDP packet into the NIC_TX_QUEUE, // using the driver NIC_CMD_WRITE command. // Both commands are successively registered in this NIC-TX server thread descriptor // to be passed to the driver. /////////////////////////////////////////////////////////////////////////////////////////// // @ chdev : [in] local pointer on NIC_TX chdev. // @ buffer : [in] pointer on a local kernel buffer (2K bytes). // @ length : [in] actual Ethernet packet length in bytes. /////////////////////////////////////////////////////////////////////////////////////////// void dev_nic_tx_move_packet( chdev_t * chdev, uint8_t * buffer, uint32_t length ) { error_t error; uint32_t writable; thread_t * this = CURRENT_THREAD; // get extended pointers on server tread and chdev xptr_t thread_xp = XPTR( local_cxy , this ); xptr_t chdev_xp = XPTR( local_cxy , chdev ); // get local pointer on core running this server thead core_t * core = this->core; // check thread can yield assert( (this->busylocks == 0), "cannot yield : busylocks = %d\n", this->busylocks ); #if DEBUG_DEV_NIC_RX uint32_t cycle = (uint32_t)hal_get_cycles(); if( DEBUG_DEV_NIC_RX < cycle ) printk("\n[%s] thread[%x,%x] enters for packet %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, pkd, cycle ); #endif // check NIC_TX_QUEUE writable while( 1 ) { error = dev_nic_tx_queue_writable( chdev, length, &writable ); if( error ) { printk("\n[PANIC] in %s : cannot access NIC_TX queue\n", __FUNCTION__ ); return; } if( writable == 0 ) // block & deschedule if non writable { // enable NIC-TX IRQ dev_pic_enable_irq( core->lid , chdev_xp ); // block TX server thread thread_block( thread_xp , THREAD_BLOCKED_ISR ); // deschedule TX server thread sched_yield("client blocked on NIC_TX queue full"); // disable NIC-TX IRQ dev_pic_disable_irq( core->lid , chdev_xp ); } else // exit loop if writable { break; } } // initialize WRITE command in server thread descriptor this->nic_cmd.dev_xp = chdev_xp; this->nic_cmd.type = NIC_CMD_WRITE; this->nic_cmd.buffer = buffer; this->nic_cmd.length = length; // call driver to move packet chdev->cmd( thread_xp ); #if DEBUG_DEV_NIC_RX cycle = (uint32_t)hal_get_cycles(); if( DEBUG_DEV_NIC_RX < cycle ) printk("\n[%s] thread[%x,%x] exit for packet %x\n", __FUNCTION__ , this->process->pid, this->trdid , pkd ); #endif return; } // end dev_nic_tx_move_packet() /////////////////////////////////////////////////////////////////////////////////////////// // This static function is called by the dev_nic_tx_server() function to build an UDP // header in the kernel buffer defined by the arguement, as specified by the // argument. The argument defines the number of bytes in payload. // It set the "src_port", "dst_port", "total_length" and "checksum" fields in UDP header. // The payload must be previouly loaded in the pernel buffer. /////////////////////////////////////////////////////////////////////////////////////////// // @ k_buf : [in] pointer on first byte of UDP header in kernel buffer. // @ socket_xp : [in] extended pointer on socket. // @ length : [in] number of bytes in payload. /////////////////////////////////////////////////////////////////////////////////////////// void dev_nic_tx_build_udp_header( uint8_t * k_buf, xptr_t socket_xp, uint32_t length ) { uint16_t checksum; // checksum value uint32_t total_length; // total UDP packet length uint32_t local_addr; // local IP address uint32_t remote_addr; // remote IP address uint32_t local_port; // local port uint32_t remote_port; // remote port // get socket cluster an local pointer socket_t * socket_ptr = GET_PTR( socket_xp ); cxy_t socket_cxy = GET_CXY( socket_xp ); // get relevant infos from socket local_addr = hal_remote_l32(XPTR(socket_cxy , &socket_ptr->local_addr )); remote_addr = hal_remote_l32(XPTR(socket_cxy , &socket_ptr->remote_addr )); local_port = hal_remote_l32(XPTR(socket_cxy , &socket_ptr->local_port )); remote_port = hal_remote_l32(XPTR(socket_cxy , &socket_ptr->remote_port )); // compute UDP packet total length total_length = length + UDP_HEAD_LEN; // set src_port and dst_port in header k_buf[0] = local_port >> 8; k_buf[1] = local_port; k_buf[2] = remote_port >> 8; k_buf[3] = remote_port; // set packet length in header k_buf[4] = total_length >> 8; k_buf[5] = total_length; // compute UDP packet checksum checksum = dev_nic_udp_checksum( k_buf , total_length ); // set checksum k_buf[6] = checksum >> 8; k_buf[7] = checksum; } // end dev_nic_tx_build_udp_header() /////////////////////////////////////////////////////////////////////////////////////////// // This static function is called by the dev_nic_tx_server() function. // It builds a TCP header in the kernel buffer defined by the argument. // The payload must have been previouly registered in this buffer. // The "local_addr", "local_port", "remote_addr", "remote_port", seq_num", "ack_num", // and "window" fields are obtained from the argument. // The argument defines the number of bytes in payload, and the argument // defines the flags to be set in TCP header. /////////////////////////////////////////////////////////////////////////////////////////// // @ k_buf : [in] pointer on first byte of TCP header in kernel buffer. // @ length : [in] number of bytes in payload. // @ socket_xp : [in] extended pointer on socket. // @ flags : [in] flags to be set in TCP header. /////////////////////////////////////////////////////////////////////////////////////////// void dev_nic_tx_build_tcp_header( uint8_t * k_buf, uint32_t length, xptr_t socket_xp, uint8_t flags ) { uint16_t checksum; // global segment checksum uint32_t total_length; // total UDP packet length uint32_t src_addr; // local IP address uint32_t dst_addr; // remote IP address uint16_t src_port; // local port uint16_t dst_port; // remote port uint32_t seq_num; // first byte of segment in TX stream uint32_t ack_num; // next expected byte in RX stream uint16_t window; // window of accepted segments in RX stream // get socket cluster an local pointer socket_t * sock_ptr = GET_PTR( socket_xp ); cxy_t sock_cxy = GET_CXY( socket_xp ); // get relevant infos from socket src_addr = hal_remote_l32(XPTR( sock_cxy , &sock_ptr->local_addr )); dst_addr = hal_remote_l32(XPTR( sock_cxy , &sock_ptr->remote_addr )); src_port = hal_remote_l32(XPTR( sock_cxy , &sock_ptr->local_port )); dst_port = hal_remote_l32(XPTR( sock_cxy , &sock_ptr->remote_port )); seq_num = hal_remote_l32(XPTR( sock_cxy , &sock_ptr->tx_nxt )); ack_num = hal_remote_l32(XPTR( sock_cxy , &sock_ptr->rx_nxt )); window = hal_remote_l32(XPTR( sock_cxy , &sock_ptr->rx_wnd )); // compute TCP segment total length total_length = length + TCP_HEAD_LEN; // set "src_port" and "dst_port" k_buf[0] = src_port >> 8; k_buf[1] = src_port; k_buf[2] = dst_port >> 8; k_buf[3] = dst_port; // set "seq_num" k_buf[4] = seq_num >> 24; k_buf[5] = seq_num >> 16; k_buf[6] = seq_num >> 8; k_buf[7] = seq_num; // set "ack_num" k_buf[8] = ack_num >> 24; k_buf[9] = ack_num >> 16; k_buf[10] = ack_num >> 8; k_buf[11] = ack_num; // set "hlen" k_buf[12] = 5; // set "flags" k_buf[13] = flags & 0x3F; // set "window" k_buf[14] = window >> 8; k_buf[15] = window; // reset "checksum" k_buf[16] = 0; k_buf[17] = 0; // set "urgent_ptr" k_buf[18] = 0; k_buf[19] = 0; // compute TCP segment checksum checksum = dev_nic_tcp_checksum( k_buf, total_length, src_addr, dst_addr ); // set "checksum" k_buf[16] = checksum >> 8; k_buf[17] = checksum; } // end dev_nic_tx_build_tcp_header() /////////////////////////////////////////////////////////////////////////////////////////// // This static function is called by the dev_nic_tx_server() function. // It builds the IP header in the 20 first bytes of . /////////////////////////////////////////////////////////////////////////////////////////// // @ buffer : pointer on first byte of IP header in kernel buffer // @ src_addr : source IP address. // @ dst_addr : destination IP address. // @ length : number of bytes in IP packet payload. /////////////////////////////////////////////////////////////////////////////////////////// void dev_nic_tx_build_ip_header( uint8_t * buffer, uint32_t src_addr, uint32_t dst_addr, uint16_t length ) { uint16_t hcs; uint16_t total = length + IP_HEAD_LEN; buffer[0] = 0x45; // IPV4 / IHL = 20 bytes buffer[1] = 0; // DSCP / ECN buffer[2] = total >> 8; buffer[3] = total; buffer[4] = 0x40; // Don't Fragment buffer[5] = 0; buffer[6] = 0; buffer[7] = 0; buffer[8] = 0xFF; // TTL buffer[9] = 0x11; // UDP protocol buffer[12] = src_addr >> 24; buffer[13] = src_addr >> 16; buffer[14] = src_addr >> 8; buffer[15] = src_addr; buffer[16] = dst_addr >> 24; buffer[17] = dst_addr >> 16; buffer[18] = dst_addr >> 8; buffer[19] = dst_addr; // compute IP header checksum hcs = dev_nic_ip_checksum( buffer ); // set checksum buffer[10] = hcs >> 8; buffer[11] = hcs; } // end dev_nic_tx_build_ip_header /////////////////////////////////////////////////////////////////////////////////////////// // This static function is called by the dev_nic_tx_server() function. // It builds the Ethernet header in the 14 first bytes of . /////////////////////////////////////////////////////////////////////////////////////////// // @ buffer : pointer on first byte of Ethernet header in kernel buffer // @ src_mac_54 : two MSB bytes in source MAC address. // @ src_mac_32 : two MED bytes in source MAC address. // @ src_mac_10 : two LSB bytes in source MAC address. // @ dst_mac_54 : two MSB bytes in destination MAC address. // @ dst_mac_32 : two MED bytes in destination MAC address. // @ dst_mac_10 : two LSB bytes in destination MAC address. // @ length : number of bytes in Ethernet frame payload. /////////////////////////////////////////////////////////////////////////////////////////// void dev_nic_tx_build_eth_header( uint8_t * buffer, uint16_t src_mac_54, uint16_t src_mac_32, uint16_t src_mac_10, uint16_t dst_mac_54, uint16_t dst_mac_32, uint16_t dst_mac_10, uint32_t length ) { buffer[0] = dst_mac_54 >> 8; buffer[1] = dst_mac_54; buffer[2] = dst_mac_32 >> 8; buffer[3] = dst_mac_32; buffer[4] = dst_mac_10 >> 8; buffer[5] = dst_mac_10; buffer[6] = src_mac_54 >> 8; buffer[7] = src_mac_54; buffer[8] = src_mac_32 >> 8; buffer[9] = src_mac_32; buffer[10] = src_mac_10 >> 8; buffer[11] = src_mac_10; buffer[12] = length >> 8; buffer[13] = length; } // end dev_nic_tx_build_eth_header() /////////////////////////////////////////////////////////////////////////////////////////// // This static function is called by the dev_nic_tx_server() function to handle one // TX command, or one R2T request, registered in the socket identified by the // argument. If there is one valid command, or if the R2T queue is non empty (for a TCP // socket), it builds an ETH/IP/UDP packet (or a ETH/IP/TCP segment), in the buffer // defined by the argument, and registers it in the NIC_TX queue defined by the // argument. The supported commands are SOCKET_SEND/SOCKET_CONNECT/SOCKET_CLOSE. // It unblocks the client thread when the command is completed. /////////////////////////////////////////////////////////////////////////////////////////// // When there is a packet to send, it makes the following actions: // 1) it takes the lock protecting the socket state. // 2) it get the command arguments from client thread descriptor. // 3) it build an UDP packet or a TCP segment, depending on both the command type, and // the socket state, updates the socket state, and unblocks the client thread. // 4) it release the lock protecting the socket. // 5) it build the IP header. // 6) it build the ETH header. // 7) it copies the packet in the NIC_TX queue. /////////////////////////////////////////////////////////////////////////////////////////// // @ socket_xp : [in] extended pointer on client socket. // @ k_buf : [in] local pointer on kernel buffer (2 Kbytes). // @ chdev : [in] local pointer on NIC_RX chdev. /////////////////////////////////////////////////////////////////////////////////////////// static void dev_nic_tx_handle_one_cmd( xptr_t socket_xp, uint8_t * k_buf, chdev_t * chdev ) { socket_t * socket_ptr; cxy_t socket_cxy; xptr_t client_xp; // extended pointer on client thread thread_t * client_ptr; cxy_t client_cxy; sock_cmd_t cmd; // NIC command type uint8_t * buf; // pointer on user buffer uint32_t len; // user buffer length uint32_t todo; // number of bytes not yet sent uint32_t socket_type; // socket type (UDP/TCP) uint32_t socket_state; // socket state xptr_t socket_lock_xp; // extended pointer on socket lock xptr_t socket_r2tq_xp; // extended pointer on R2T queue uint32_t src_ip_addr; // source IP address uint32_t dst_ip_addr; // destination IP address uint32_t tx_una; // next byte to be sent uint32_t tx_nxt; // first unacknowledged byte uint32_t nbytes; // number of bytes in UDP/TCP packet payload uint8_t * k_base; // pointer UDP/TCP packet in kernel buffer uint32_t trsp_length; // length of TCP/UDP packet uint8_t r2t_flags; // flags defined by one R2T queue request bool_t do_send; // build & send a packet when true // get socket cluster and local pointer socket_cxy = GET_CXY( socket_xp ); socket_ptr = GET_PTR( socket_xp ); // build extended pointer on socket lock and r2t queue socket_lock_xp = XPTR( socket_cxy , &socket_ptr->lock ); socket_r2tq_xp = XPTR( socket_cxy , &socket_ptr->r2tq ); // 1. take lock protecting this socket remote_rwlock_wr_acquire( socket_lock_xp ); // get pointers on TX client thread from socket client_xp = hal_remote_l64( XPTR( socket_cxy , &socket_ptr->tx_client )); client_cxy = GET_CXY( client_xp ); client_ptr = GET_PTR( client_xp ); // check valid command if( client_xp != XPTR_NULL ) // valid command found { // 2. get command arguments from socket cmd = hal_remote_l32( XPTR(socket_cxy , &socket_ptr->tx_cmd )); buf = hal_remote_lpt( XPTR(socket_cxy , &socket_ptr->tx_buf )); len = hal_remote_l32( XPTR(socket_cxy , &socket_ptr->tx_len )); todo = hal_remote_l32( XPTR(socket_cxy , &socket_ptr->tx_todo )); // get socket type and state socket_type = hal_remote_l32( XPTR(socket_cxy , &socket_ptr->type )); socket_state = hal_remote_l32( XPTR(socket_cxy , &socket_ptr->state )); // 3. UDP : build UDP packet and update UDP socket state if( socket_type == SOCK_DGRAM ) { if( socket_state == UDP_STATE_UNBOUND ) { // report illegal command dev_nic_tx_report_error( socket_xp, cmd, socket_state ); do_send = false; } else // BOUND or CONNECT state { if( cmd == SOCKET_TX_SEND ) { // compute payload length nbytes = ( PAYLOAD_MAX_LEN < todo ) ? PAYLOAD_MAX_LEN : todo; // compute UDP packet base in kernel buffer k_base = k_buf + ETH_HEAD_LEN + IP_HEAD_LEN; // move payload to kernel buffer hal_copy_from_uspace( XPTR(local_cxy , k_base + UDP_HEAD_LEN ), buf + (len - todo), nbytes ); // build UDP header dev_nic_tx_build_udp_header( k_base, socket_xp, nbytes ); // update "tx_todo" in socket descriptor hal_remote_s32( XPTR(socket_cxy , socket_ptr->tx_todo), todo - nbytes ); // unblock client thread when SEND command completed if( nbytes == todo ) { dev_nic_tx_report_success( socket_xp ); } do_send = true; } else { // report illegal command dev_nic_tx_report_error( socket_xp, cmd, socket_state ); do_send = false; } } // compute transport packet length trsp_length = UDP_HEAD_LEN + nbytes; } // end UDP // 3. TCP : build TCP segment and update TCP socket state if( socket_type == SOCK_STREAM ) { // extract one request from TCP socket R2T queue if queue non empty if( remote_buf_status( socket_r2tq_xp ) ) { remote_buf_get_to_kernel( socket_r2tq_xp , &r2t_flags , 1 ); } else { r2t_flags = 0; } ///////////////////////////////////// if( socket_state == TCP_STATE_ESTAB ) // connected TCP socket { if( cmd == SOCKET_TX_SEND ) { // get "tx_nxt", and "tx_una" from socket descriptor tx_nxt = hal_remote_l32( XPTR(socket_cxy , &socket_ptr->tx_nxt )); tx_una = hal_remote_l32( XPTR(socket_cxy , &socket_ptr->tx_una )); // compute actual payload length nbytes = ( PAYLOAD_MAX_LEN < todo ) ? PAYLOAD_MAX_LEN : todo; // compute TCP segment base in kernel buffer k_base = k_buf + ETH_HEAD_LEN + IP_HEAD_LEN; // move payload to kernel buffer hal_copy_from_uspace( XPTR( local_cxy , k_base + TCP_HEAD_LEN ), buf + (len - todo), nbytes ); // build TCP header dev_nic_tx_build_tcp_header( k_base, socket_xp, nbytes, // payload TCP_FLAG_ACK | r2t_flags ); // flags // update "tx_todo" in socket descriptor hal_remote_s32( XPTR( socket_cxy , &socket_ptr->tx_todo ), todo - nbytes ); // update "tx_nxt" in socket descriptor hal_remote_s32( XPTR( socket_cxy , &socket_ptr->tx_nxt ), tx_nxt + nbytes ); // unblock client thread when SEND command completed if( (todo == 0) && (tx_nxt == tx_una) ) { dev_nic_tx_report_success( socket_xp ); } do_send = true; } else if( cmd == SOCKET_TX_CLOSE ) { // build TCP FIN segment dev_nic_tx_build_tcp_header( k_base, socket_xp, 0, // payload TCP_FLAG_FIN | r2t_flags ); // flags // update socket state hal_remote_s32( XPTR( socket_cxy , &socket_ptr->state ), TCP_STATE_FIN_WAIT1 ); do_send = true; } else // cmd == CONNECT { // report illegal command dev_nic_tx_report_error( socket_xp , cmd , socket_state ); do_send = false; } } ////////////////////////////////////////// else if( socket_state == TCP_STATE_BOUND ) // unconnected TCP socket { if ( cmd == SOCKET_TX_CONNECT ) { // set socket.tx_nxt hal_remote_s32( XPTR( socket_cxy , &socket_ptr->tx_nxt ), TCP_ISS ); hal_remote_s32( XPTR( socket_cxy , &socket_ptr->rx_nxt ), 0 ); hal_remote_s32( XPTR( socket_cxy , &socket_ptr->rx_wnd ), NIC_RX_BUF_SIZE); // build TCP SYN segment dev_nic_tx_build_tcp_header( k_base, socket_xp, 0, // payload TCP_FLAG_SYN ); // flags // update socket state hal_remote_s32( XPTR( socket_cxy , &socket_ptr->state ), TCP_STATE_SYN_SENT ); do_send = true; } else // cmd == SEND / CLOSE { // report illegal command dev_nic_tx_report_error( socket_xp, cmd, socket_state ); do_send = false; } } /////////////////////////////////////////// else if( socket_state == TCP_STATE_LISTEN ) // server wait connect { if( cmd == SOCKET_TX_CONNECT ) { // update socket.state hal_remote_s32( XPTR( socket_cxy , &socket_ptr->state ), TCP_STATE_SYN_SENT ); // set socket.tx_una hal_remote_s32( XPTR( socket_cxy , &socket_ptr->tx_una ), TCP_ISS ); // set socket.tx_nxt hal_remote_s32( XPTR( socket_cxy , &socket_ptr->tx_una ), TCP_ISS + 1 ); // build TCP SYN segment dev_nic_tx_build_tcp_header( k_base, socket_xp, 0, // payload TCP_FLAG_SYN ); // flags do_send = true; } else // cmd == CLOSE / SEND { // report illegal command dev_nic_tx_report_error( socket_xp, cmd, socket_state ); do_send = false; } } ///////////////////////////////////////////// else if( socket_state == TCP_STATE_SYN_RCVD ) // socket wait ACK { if( cmd == SOCKET_TX_CLOSE ) { // build TCP FIN segment dev_nic_tx_build_tcp_header( k_base, socket_xp, 0, // payload TCP_FLAG_FIN ); // flags // update socket state hal_remote_s32( XPTR( socket_cxy , &socket_ptr->state ), TCP_STATE_FIN_WAIT1 ); do_send = true; } else // SEND / CONNECT { // report illegal command dev_nic_tx_report_error( socket_xp, cmd, socket_state ); do_send = false; } } //////////////////////////////////////////////// else if( socket_state == TCP_STATE_CLOSE_WAIT ) // wait local close() { if( cmd == SOCKET_TX_CLOSE ) { // build TCP FIN segment dev_nic_tx_build_tcp_header( k_base, socket_xp, 0, // payload TCP_FLAG_FIN ); // flags // update socket state hal_remote_s32( XPTR( socket_cxy , &socket_ptr->state ), TCP_STATE_LAST_ACK ); do_send = true; } else // SEND / CONNECT { // report illegal command dev_nic_tx_report_error( socket_xp, cmd, socket_state ); do_send = false; } } //// else { // report illegal command dev_nic_tx_report_error( socket_xp, cmd, socket_state ); do_send = false; } // compute TCP segment length trsp_length = TCP_HEAD_LEN + nbytes; } } else // no valid command found { if( socket_type == SOCK_DGRAM ) // UDP socket { do_send = false; } else // TCP socket { if( remote_buf_status( socket_r2tq_xp ) == 0 ) // R2T queue empty { do_send = false; } else // pending request in R2T queue { // get one request from R2T queue remote_buf_get_to_kernel( socket_r2tq_xp , &r2t_flags , 1 ); // build TCP header for an empty segment dev_nic_tx_build_tcp_header( k_base, socket_xp, 0, // payload r2t_flags ); // flags do_send = true; } } } // 4. release the lock protecting the socket remote_rwlock_wr_release( socket_lock_xp ); // return if no packet to send if( do_send == false ) return; // 5. build IP header dev_nic_tx_build_ip_header( k_buf + ETH_HEAD_LEN, src_ip_addr, dst_ip_addr, IP_HEAD_LEN + trsp_length ); // 6. build ETH header dev_nic_tx_build_eth_header( k_buf, (uint16_t)SRC_MAC_54, (uint16_t)SRC_MAC_32, (uint16_t)SRC_MAC_10, (uint16_t)DST_MAC_54, (uint16_t)DST_MAC_32, (uint16_t)DST_MAC_10, ETH_HEAD_LEN + IP_HEAD_LEN + trsp_length ); // 7. move packet to NIC_TX queue dev_nic_tx_move_packet( chdev, k_buf, ETH_HEAD_LEN + IP_HEAD_LEN + trsp_length ); } // end dev_nic_tx_handle_one_cmd() ///////////////////////////////////////// void dev_nic_tx_server( chdev_t * chdev ) { uint8_t k_buf[NIC_KERNEL_BUF_SIZE]; // buffer for one packet xptr_t root_xp; // extended pointer on clients list root xptr_t lock_xp; // extended pointer on lock protecting this list xptr_t socket_xp; // extended pointer on on client socket socket_t * socket_ptr; cxy_t socket_cxy; xptr_t entry_xp; // extended pointer on socket tx_list entry thread_t * this = CURRENT_THREAD; // check chdev direction and type assert( (chdev->func == DEV_FUNC_NIC) && (chdev->is_rx == false) , "illegal chdev type or direction" ); // check thread can yield assert( (this->busylocks == 0), "cannot yield : busylocks = %d\n", this->busylocks ); // build extended pointer on client sockets lock & root lock_xp = XPTR( local_cxy , &chdev->wait_lock ); root_xp = XPTR( local_cxy , &chdev->wait_root ); while( 1 ) // TX server infinite loop { // take the lock protecting the client sockets queue remote_busylock_acquire( lock_xp ); /////////////// block and deschedule if no clients if( xlist_is_empty( root_xp ) == false ) { // release the lock protecting the TX client sockets queue remote_busylock_release( lock_xp ); // block and deschedule thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_CLIENT ); sched_yield( "waiting client" ); } ////////////// else { // get first client socket socket_xp = XLIST_FIRST( root_xp , socket_t , tx_list ); socket_cxy = GET_CXY( socket_xp ); socket_ptr = GET_PTR( socket_xp ); // build extended pointer on socket xlist_entry entry_xp = XPTR( socket_cxy , &socket_ptr->tx_list ); // remove this socket from the waiting queue xlist_unlink( entry_xp ); // release the lock protecting the client sockets queue remote_busylock_release( lock_xp ); // handle this TX client dev_nic_tx_handle_one_cmd( socket_xp, k_buf, chdev ); // take the lock protecting the client sockets queue remote_busylock_acquire( lock_xp ); // add this socket in last position of queue xlist_add_last( root_xp , entry_xp ); // release the lock protecting the client sockets queue remote_busylock_release( lock_xp ); } } // end while } // end dev_nic_tx_server()