/* * dev_nic.c - NIC (Network Controler) generic device API implementation. * * Author Alain Greiner (2016,2017,2018,2019,2020) * * Copyright (c) UPMC Sorbonne Universites * * This file is part of ALMOS-MKH. * * ALMOS-MKH is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by * the Free Software Foundation; version 2.0 of the License. * * ALMOS-MKH is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with ALMOS-MKH; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include ///////////////////////////////////////////////////////////////////////////////////////// // Extern global variables ///////////////////////////////////////////////////////////////////////////////////////// extern chdev_directory_t chdev_dir; // allocated in kernel_init.c //////////////////////////////////// void dev_nic_init( chdev_t * chdev ) { assert( __FUNCTION__ , (chdev->func == DEV_FUNC_NIC) , "bad func value"); thread_t * new_thread; error_t error; // get "channel" & "is_rx" fields from chdev descriptor uint32_t channel = chdev->channel; bool_t is_rx = chdev->is_rx; // set chdev name if( is_rx ) snprintk( chdev->name , 16 , "nic%d_rx" , channel ); else snprintk( chdev->name , 16 , "nic%d_tx" , channel ); // initialize the root of the listening sockets list xlist_root_init( XPTR( local_cxy , &chdev->ext.nic.root ) ); // initialize the lock protecting this list remote_busylock_init( XPTR( local_cxy , &chdev->ext.nic.lock ), LOCK_LISTEN_SOCKET ); // call driver init function for this chdev hal_drivers_nic_init( chdev ); // select a core to execute the NIC server thread lid_t lid = cluster_select_local_core( local_cxy ); // bind the NIC IRQ to the selected core dev_pic_bind_irq( lid , chdev ); // build pointer on server function void * server_func = is_rx ? &dev_nic_rx_server : &dev_nic_tx_server; // create server thread error = thread_kernel_create( &new_thread, THREAD_DEV, server_func, chdev, lid ); if( error ) { printk("\n[PANIC] in %s : cannot create server thread\n", __FUNCTION__ ); return; } // set "server" field in chdev descriptor chdev->server = new_thread; // set "chdev" field in thread descriptor new_thread->chdev = chdev; // unblock server thread thread_unblock( XPTR( local_cxy , new_thread ) , THREAD_BLOCKED_GLOBAL ); #if (DEBUG_DEV_NIC_TX || DEBUG_DEV_NIC_RX) thread_t * this = CURRENT_THREAD; if( is_rx ) printk("\n[%s] thread[%x,%x] initialized NIC_RX[%d] / server %x\n", __FUNCTION__, this->process->pid, this->trdid, channel, new_thread->trdid ); else printk("\n[%s] thread[%x,%x] initialized NIC_TX[%d] / server %x\n", __FUNCTION__, this->process->pid, this->trdid, channel, new_thread->trdid ); #endif } // end dev_nic_init() /////////////////////////////////////////////////////////////////////////////////////////// // Functions directly called by the client threads /////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////// uint32_t dev_nic_get_key( uint32_t addr, uint16_t port ) { thread_t * this = CURRENT_THREAD; // get cluster and local pointer fo the nic_tx[0] chdev xptr_t dev_xp = chdev_dir.nic_tx[0]; chdev_t * dev_ptr = GET_PTR( dev_xp ); cxy_t dev_cxy = GET_CXY( dev_xp ); if( dev_xp == XPTR_NULL ) { #if DEBUG_DEV_NIC_ERROR printk("\n[ERROR] in %s : nic_tx[0] chdev undefined in chdev_dir of cluster %x\n", __FUNCTION__, local_cxy ); #endif return -1; } // set command arguments in client thread descriptor this->nic_cmd.buffer = (uint8_t *)addr; this->nic_cmd.length = (uint32_t)port; this->nic_cmd.dev_xp = dev_xp; this->nic_cmd.type = NIC_CMD_GET_KEY; // get cmd function pointer from nic_tx[0] chdev descriptor dev_cmd_t * cmd = hal_remote_lpt( XPTR( dev_cxy , &dev_ptr->cmd )); // call driver cmd( XPTR( local_cxy , this ) ); // return command status return this->nic_cmd.status; } // end dev_nic_get_key() ////////////////////////////////////////// error_t dev_nic_set_run( uint32_t channel, uint32_t run ) { thread_t * this = CURRENT_THREAD; if( channel >= LOCAL_CLUSTER->nb_nic_channels ) return -1; // get cluster and local pointer fo the nic_tx[channel] chdev xptr_t dev_xp = chdev_dir.nic_tx[channel]; chdev_t * dev_ptr = GET_PTR( dev_xp ); cxy_t dev_cxy = GET_CXY( dev_xp ); if( dev_xp == XPTR_NULL ) { #if DEBUG_DEV_NIC_ERROR printk("\n[ERROR] in %s : nic_tx[%d] chdev undefined in chdev_dir of cluster %x\n", __FUNCTION__, channel, local_cxy ); #endif return -1; } // set command arguments in client thread descriptor this->nic_cmd.dev_xp = dev_xp; this->nic_cmd.type = NIC_CMD_SET_RUN; this->nic_cmd.length = channel; this->nic_cmd.status = run; // get cmd function pointer from nic_tx[channel] chdev descriptor dev_cmd_t * cmd = hal_remote_lpt( XPTR( dev_cxy , &dev_ptr->cmd )); // call driver cmd( XPTR( local_cxy , this ) ); // return "error" return this->nic_cmd.error; } // end dev_nic_set_run() ////////////////////////////////// error_t dev_nic_get_instru( void ) { thread_t * this = CURRENT_THREAD; // get cluster and local pointer fo the nic_tx[0] chdev xptr_t dev_xp = chdev_dir.nic_tx[0]; chdev_t * dev_ptr = GET_PTR( dev_xp ); cxy_t dev_cxy = GET_CXY( dev_xp ); if( dev_xp == XPTR_NULL ) { #if DEBUG_DEV_NIC_ERROR printk("\n[ERROR] in %s : nic_tx[0] chdev undefined in chdev_dir of cluster %x\n", __FUNCTION__, local_cxy ); #endif return -1; } // set command arguments in client thread descriptor this->nic_cmd.dev_xp = dev_xp; this->nic_cmd.type = NIC_CMD_GET_INSTRU; // get cmd function pointer from nic_tx[0] chdev descriptor dev_cmd_t * cmd = hal_remote_lpt( XPTR( dev_cxy , &dev_ptr->cmd )); // call driver cmd( XPTR( local_cxy , this ) ); // return "error" return this->nic_cmd.error; } // end dev_nic_get_instru() //////////////////////////////////// error_t dev_nic_clear_instru( void ) { thread_t * this = CURRENT_THREAD; // get cluster and local pointer fo the nic_tx[0] chdev xptr_t dev_xp = chdev_dir.nic_tx[0]; chdev_t * dev_ptr = GET_PTR( dev_xp ); cxy_t dev_cxy = GET_CXY( dev_xp ); if( dev_xp == XPTR_NULL ) { #if DEBUG_DEV_NIC_ERROR printk("\n[ERROR] in %s : nic_tx[0] chdev undefined in chdev_dir of cluster %x\n", __FUNCTION__, local_cxy ); #endif return -1; } // set command arguments in client thread descriptor this->nic_cmd.dev_xp = dev_xp; this->nic_cmd.type = NIC_CMD_GET_INSTRU; // get cmd function pointer from nic_tx[0] chdev descriptor dev_cmd_t * cmd = hal_remote_lpt( XPTR( dev_cxy , &dev_ptr->cmd )); // call driver cmd( XPTR( local_cxy , this ) ); // return "error" return this->nic_cmd.error; } // end dev_nic_clear_instru() //////////////////////////////////////////////////////////////////////////////////////////// // Static functions called by the NIC_RX server & NIC_TX server threads //////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////// // This static function is used by the dev_nic_rx_handle_tcp() function // to check acceptability of a given sequence number. It returns true when // the argument is contained in a wrap-around window defined by the // and arguments. The window wrap-around when (min > max). //////////////////////////////////////////////////////////////////////////////////////////// // @ seq : [in] value to be checked. // @ min : [in] first base. // @ max : [in] window size. //////////////////////////////////////////////////////////////////////////////////////////// static inline bool_t is_in_window( uint32_t seq, uint32_t min, uint32_t max ) { if( max >= min ) // no wrap_around => only one window [min,max] { return( (seq >= min) && (seq <= max) ); } else // window wrap-around => two windows [min,0xFFFFFFFF] and [0,max] { return( (seq <= max) || (seq >= min) ); } } //////////////////////////////////////////////////////////////////////////////////////// // This static function computes the checksum for an IP packet header. // The "checksum" field itself is not taken into account for this computation. //////////////////////////////////////////////////////////////////////////////////////// // @ buffer : [in] pointer on IP packet header (20 bytes) // @ return the checksum value on 16 bits //////////////////////////////////////////////////////////////////////////////////////// static uint16_t dev_nic_ip_checksum( uint8_t * buffer ) { uint32_t i; uint32_t cs; // 32 bits accumulator uint16_t * buf; buf = (uint16_t *)buffer; // compute checksum for( i = 0 , cs = 0 ; i < 10 ; i++ ) { if( i != 5 ) cs += buf[i]; } // one's complement return ~cs; } //////////////////////////////////////////////////////////////////////////////////////// // This static function computes the checksum for a TCP segment or an UDP packet, // defined by the and arguments. // It includes the "pseudo header "defined by the , , and // arguments, and by the UDP/TCP protocol code. //////////////////////////////////////////////////////////////////////////////////////// // @ buffer : [in] pointer on buffer containing the TCP segment or UDP packet. // @ length : [in] number of bytes in this packet/segment (including header). // @ src_ip_addr : [in] source IP address (pseudo header). // @ dst_ip_addr : [in] destination IP address (pseudo header). // @ is_tcp : [in] TCP if true / UDP if false (pseudo header). // @ return the checksum value on 16 bits //////////////////////////////////////////////////////////////////////////////////////// static uint16_t dev_nic_tcp_udp_checksum( uint8_t * buffer, uint32_t length, uint32_t src_ip_addr, uint32_t dst_ip_addr, bool_t is_tcp ) { uint32_t i; uint32_t carry; uint32_t cs; // 32 bits accumulator uint16_t * buf; uint32_t max; // number of uint16_t in segment/paket // compute max & buf buf = (uint16_t *)buffer; max = length >> 1; // extend buffer[] if required if( length & 1 ) { max++; buffer[length] = 0; } // compute checksum for TCP segment for( i = 0 , cs = 0 ; i < max ; i++ ) cs += buf[i]; // complete checksum for pseudo-header cs += (src_ip_addr & 0xFFFF); cs += (src_ip_addr >> 16 ); cs += (dst_ip_addr & 0xFFFF); cs += (dst_ip_addr >> 16 ); cs += length; cs += (is_tcp ? PROTOCOL_TCP : PROTOCOL_UDP); // handle carry carry = (cs >> 16); if( carry ) { cs += carry; carry = (cs >> 16); if( carry ) cs += carry; } // one's complement return ~cs; } /////////////////////////////////////////////////////////////////////////////////////////// // This static function is called by the NIC_TX and NIC_RX server threads to unblock // the TX client thread after completion (success or error) of a TX command registered // in a socket identified by the argument. // The argument defines the command success/failure status. // For all commands, it copies the status value in the tx_sts field, and print an error // message on TXT0 in case of failure. /////////////////////////////////////////////////////////////////////////////////////////// // @ socket_xp : [in] extended pointer on socket // @ status : [in] command status (see above) /////////////////////////////////////////////////////////////////////////////////////////// static void dev_nic_unblock_tx_client( xptr_t socket_xp, int32_t status ) { // get socket thread cluster and local pointer socket_t * socket_ptr = GET_PTR( socket_xp ); cxy_t socket_cxy = GET_CXY( socket_xp ); if( (status != CMD_STS_SUCCESS) && (status != CMD_STS_EOF) ) { uint32_t sock_state = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->state )); uint32_t cmd_type = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->tx_cmd )); pid_t pid = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->pid )); fdid_t fdid = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->fdid )); printk("\n[WARNING] reported by %s : socket[%x,%d] / %s / cmd %s / status %s \n", __FUNCTION__, pid, fdid, socket_state_str(sock_state), socket_cmd_type_str(cmd_type), socket_cmd_sts_str(status) ); } // set tx_sts field in socket descriptor hal_remote_s32( XPTR( socket_cxy , &socket_ptr->tx_sts ) , status ); // get extended point on TX client thread xptr_t client_xp = hal_remote_l64( XPTR( socket_cxy , &socket_ptr->tx_client )); // unblock the client thread thread_unblock( client_xp , THREAD_BLOCKED_IO ); } // end dev_nic_unblock_tx_client() /////////////////////////////////////////////////////////////////////////////////////////// // Functions called by the NIC_RX server thread /////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////// // This static function is called by the NIC_RX server threads to unblock // the RX client thread after completion (success or error) of an RX command registered // in a socket identified by the argument. // The argument defines the command success/failure status. // For all commands, it copies the status value in the rx_sts field, and print an error // message on TXT0 in case of failure. /////////////////////////////////////////////////////////////////////////////////////////// // @ socket_xp : [in] extended pointer on socket // @ status : [in] command status (see above) /////////////////////////////////////////////////////////////////////////////////////////// static void dev_nic_unblock_rx_client( xptr_t socket_xp, int32_t status ) { // get socket thread cluster and local pointer socket_t * socket_ptr = GET_PTR( socket_xp ); cxy_t socket_cxy = GET_CXY( socket_xp ); if( (status != CMD_STS_SUCCESS) && (status != CMD_STS_EOF) ) { uint32_t sock_state = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->state )); uint32_t cmd_type = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->rx_cmd )); pid_t pid = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->pid )); fdid_t fdid = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->fdid )); printk("\n[WARNING] reported by %s : socket[%x,%d] / %s / cmd %s / status %s\n", __FUNCTION__, pid, fdid, socket_state_str(sock_state), socket_cmd_type_str(cmd_type), socket_cmd_sts_str(status) ); } // set rx_sts field in socket descriptor hal_remote_s32( XPTR( socket_cxy , &socket_ptr->rx_sts ) , status ); // get extended point on RX client thread xptr_t client_xp = hal_remote_l64( XPTR( socket_cxy , &socket_ptr->rx_client )); // unblock the client thread thread_unblock( client_xp , THREAD_BLOCKED_IO ); } // end dev_nic_unblock_rx_client() /////////////////////////////////////////////////////////////////////////////////////////// // This static function is called by the dev_nic_rx_server() function. // It analyses an Ethernet frame contained in the kernel buffer defined // by the argument, and returns in the argument the length // of the IP packet contained in the Ethernet packet payload. /////////////////////////////////////////////////////////////////////////////////////////// // @ buffer : [in] pointer on a received Ethernet packet // @ ip_length : [out] length of IP packet (in bytes). // @ return 0 if success / return -1 if illegal packet length. /////////////////////////////////////////////////////////////////////////////////////////// static error_t dev_nic_rx_check_eth( uint8_t * buffer, uint32_t * ip_length ) { uint32_t length = ((uint32_t)buffer[12] << 8) | (uint32_t)buffer[13]; *ip_length = length; return 0; } /////////////////////////////////////////////////////////////////////////////////////////// // This static function analyses the IP packet contained in the kernel buffer // defined by the argument, and returns in the , , // and arguments the informations contained in the IP header. // It checks the IP packet length versus the value contained in Ethernet header. // It checks the IP header checksum. /////////////////////////////////////////////////////////////////////////////////////////// // @ buffer : [in] pointer on the IP packet. // @ expected_length : [in] expected IP packet length (from Ethernet header). // @ ip_src_addr : [out] source IP address. // @ ip_dst_addr : [out] destination IP address. // @ protocol : [out] transport protocol type. // @ return 0 if success / return -1 if illegal packet. /////////////////////////////////////////////////////////////////////////////////////////// static error_t dev_nic_rx_check_ip( uint8_t * buffer, uint32_t expected_length, uint32_t * ip_src_addr, uint32_t * ip_dst_addr, uint32_t * trsp_protocol ) { #if DEBUG_DEV_NIC_RX thread_t * this = CURRENT_THREAD; uint32_t cycle = (uint32_t)hal_get_cycles(); #endif // get packet length uint32_t length = ((uint32_t)buffer[2] << 8) | (uint32_t)buffer[3]; // discard packet if eth_payload_length != ip_length if( length != expected_length ) { #if DEBUG_DEV_NIC_RX if( DEBUG_DEV_NIC_RX < cycle ) printk("\n[%s] thread[%x,%x] failure : length (%d) != expected_length (%d)\n", __FUNCTION__, this->process->pid, this->trdid, length, expected_length ); #endif return -1; } // get transport protocol type uint8_t protocol = buffer[9]; // discard packet if unsupported protocol if( (protocol != PROTOCOL_TCP) && (protocol != PROTOCOL_UDP) ) { #if DEBUG_DEV_NIC_RX if( DEBUG_DEV_NIC_RX < cycle ) printk("\n[%s] thread[%x,%x] failure : unsupported transport protocol (%d)\n", __FUNCTION__, this->process->pid, this->trdid, protocol ); #endif return -1; } // compute IP header checksum uint32_t computed_cs = (uint32_t)dev_nic_ip_checksum( buffer ); // extract IP header checksum uint32_t received_cs = ((uint32_t)buffer[10] << 8) | ((uint32_t)buffer[11]); // discard packet if bad checksum if( received_cs != computed_cs ) { #if DEBUG_DEV_NIC_RX if( DEBUG_DEV_NIC_RX < cycle ) printk("\n[%s] thread[%x,%x] failure : computed checksum (%d) != received checksum (%d)\n", __FUNCTION__, this->process->pid, this->trdid, computed_cs, received_cs ); #endif return -1; } *ip_src_addr = ((uint32_t)buffer[12] << 24) | ((uint32_t)buffer[13] << 16) | ((uint32_t)buffer[14] << 8) | ((uint32_t)buffer[15] ) ; *ip_dst_addr = ((uint32_t)buffer[16] << 24) | ((uint32_t)buffer[17] << 16) | ((uint32_t)buffer[18] << 8) | ((uint32_t)buffer[19] ) ; *trsp_protocol = protocol; return 0; } // end dev_nic_rx_check_ip() /////////////////////////////////////////////////////////////////////////////////////////// // This static function analyses the UDP packet contained in the kernel buffer // defined by the and arguments. // It checks the UDP checksum, and discard corrupted packets. // It scans the list of sockets attached to the NIC_RX chdev to find a matching socket, // and discard the received packet if no UDP socket found. // Finally, it copies the payload to the socket "rx_buf", as long as the packet payload // is not larger than the rx_buf. // It set the "rx_valid" flip-flop, and unblock the client thread when the last expected // byte has been received. /////////////////////////////////////////////////////////////////////////////////////////// // @ chdev : [in] local pointer on local NIC_RX chdev descriptor. // @ k_buf : [in] pointer on the UDP packet in local kernel buffer. // @ k_length : [in] number of bytes in buffer (including UDP header). // @ pkt_src_addr : [in] source IP address (from IP packet header). // @ pkt_dst_addr : [in] destination IP address (from IP packet header). /////////////////////////////////////////////////////////////////////////////////////////// static void dev_nic_rx_handle_udp_packet( chdev_t * chdev, uint8_t * k_buf, uint32_t k_length, uint32_t pkt_src_addr, uint32_t pkt_dst_addr ) { xptr_t root_xp; // extended pointer on attached sockets list root xptr_t lock_xp; // extended pointer on chdev lock xptr_t iter_xp; // iterator on socket list xptr_t socket_xp; // extended pointer on socket descriptor cxy_t socket_cxy; socket_t * socket_ptr; uint32_t socket_type; // socket type uint32_t socket_state; // socket state uint32_t local_addr; // local IP address from socket uint32_t local_port; // local port from socket uint32_t remote_addr; // remote IP address from socket uint32_t remote_port; // remote port from socket bool_t match_socket; // matching socket found uint16_t checksum; // computed checksum uint16_t pkt_checksum; // received checksum xptr_t socket_rbuf_xp; // extended pointer on socket rx_buf xptr_t socket_lock_xp; // extended pointer on socket lock xptr_t socket_rx_client; // socket rx_client thread bool_t socket_rx_valid; // socket rx_command valid uint32_t socket_rx_cmd; // socket rx_command type uint32_t payload; // number of bytes in payload uint32_t status; // number of bytes in rx_buf uint32_t space; // number of free slots in rx_buf uint32_t moved_bytes; // number of bytes actually moved to rx_buf #if DEBUG_DEV_NIC_RX || DEBUG_DEV_NIC_ERROR thread_t * this = CURRENT_THREAD; uint32_t cycle = (uint32_t)hal_get_cycles(); #endif #if DEBUG_DEV_NIC_RX uint32_t fdid; uint32_t pid; if( DEBUG_DEV_NIC_RX < cycle ) printk("\n[%s] thread[%x,%x] enter / channel %d / plen %d / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, chdev->channel, k_length, cycle ); if( (DEBUG_DEV_NIC_RX < cycle) && (DEBUG_DEV_NIC_RX & 1)) putb("64 first bytes in k_buf" , k_buf , 64 ); #endif // build extended pointers on list of sockets attached to NIC_RX chdev root_xp = XPTR( local_cxy , &chdev->wait_root ); lock_xp = XPTR( local_cxy , &chdev->wait_lock ); // extract checksum from received UDP packet header pkt_checksum = ((uint16_t)k_buf[6] << 8) | (uint16_t)k_buf[7]; // reset checksum field k_buf[6] = 0; k_buf[7] = 0; // compute checksum from received UDP packet checksum = dev_nic_tcp_udp_checksum( k_buf, k_length, pkt_src_addr, pkt_dst_addr, false ); // is_not_tcp // discard corrupted packet if( pkt_checksum != checksum ) { #if DEBUG_DEV_NIC_ERROR printk("\n[WARNING] in %s : thread[%x,%x] discard corrupted packet on channel %d / cycle %d\n" " expected checksum %x / received checksum %x\n", __FUNCTION__, this->process->pid, this->trdid, chdev->channel, cycle, (uint32_t)checksum, (uint32_t)pkt_checksum ); #endif return; } // get src_port and dst_port from UDP header uint32_t pkt_src_port = ((uint32_t)k_buf[0] << 8) | (uint32_t)k_buf[1]; uint32_t pkt_dst_port = ((uint32_t)k_buf[2] << 8) | (uint32_t)k_buf[3]; // take the lock protecting the sockets list remote_busylock_acquire( lock_xp ); match_socket = false; // scan sockets list to find a match XLIST_FOREACH( root_xp , iter_xp ) { // get socket cluster and local pointer socket_xp = XLIST_ELEMENT( iter_xp , socket_t , rx_list ); socket_ptr = GET_PTR( socket_xp ); socket_cxy = GET_CXY( socket_xp ); // get socket type socket_type = hal_remote_l32(XPTR(socket_cxy , &socket_ptr->type )); socket_state = hal_remote_l32(XPTR(socket_cxy , &socket_ptr->state )); // skip TCP socket if( socket_type == SOCK_STREAM ) continue; // get relevant info from socket descriptor local_addr = hal_remote_l32(XPTR(socket_cxy , &socket_ptr->local_addr )); remote_addr = hal_remote_l32(XPTR(socket_cxy , &socket_ptr->remote_addr )); local_port = hal_remote_l32(XPTR(socket_cxy , &socket_ptr->local_port )); remote_port = hal_remote_l32(XPTR(socket_cxy , &socket_ptr->remote_port )); // compute matching bool_t local_match = (local_addr == pkt_dst_addr) && (local_port == pkt_dst_port); bool_t remote_match = (remote_addr == pkt_src_addr) && (remote_port == pkt_src_port); if (socket_state == UDP_STATE_ESTAB ) match_socket = local_match && remote_match; else match_socket = local_match; // exit loop if matching if( match_socket ) { #if DEBUG_DEV_NIC_RX fdid = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->fdid ) ); pid = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->pid ) ); if( DEBUG_DEV_NIC_RX < cycle ) printk("\n[%s] thread[%x,%x] found matching UDP socket[%d,%d] / state %s\n", __FUNCTION__, this->process->pid, this->trdid, pid, fdid, socket_state_str(socket_state) ); #endif break; } } // release the lock protecting the sockets list remote_busylock_release( lock_xp ); // discard unexpected packet if( match_socket == false ) { #if DEBUG_DEV_NIC_ERROR printk("\n[WARNING] in %s : thread[%x,%s] discard unexpected packet on channel %d / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, chdev->channel, cycle ); #endif return; } // build extended pointers on socket.rx_buf and socket.lock socket_rbuf_xp = XPTR( socket_cxy , &socket_ptr->rx_buf ); socket_lock_xp = XPTR( socket_cxy , &socket_ptr->lock ); // take the lock protecting the socket remote_queuelock_acquire( socket_lock_xp ); // get status & space from rx_buf status = remote_buf_status( socket_rbuf_xp ); space = (1 << CONFIG_SOCK_RX_BUF_ORDER) - status; // get socket rx_client, rx_valid and rx_cmd values socket_rx_client = hal_remote_l64( XPTR( socket_cxy , &socket_ptr->rx_client ) ); socket_rx_valid = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->rx_valid ) ); socket_rx_cmd = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->rx_cmd ) ); // get number of bytes in payload payload = k_length - UDP_HEAD_LEN; // compute number of bytes to move : min (space , seg_payload) moved_bytes = ( space < payload ) ? space : payload; // move payload from kernel buffer to socket rx_buf remote_buf_put_from_kernel( socket_rbuf_xp, k_buf + UDP_HEAD_LEN, moved_bytes ); #if DEBUG_DEV_NIC_RX if( DEBUG_DEV_NIC_RX < cycle ) printk("\n[%s] thread[%x,%x] for socket[%d,%d] move %d bytes to rx_buf / buf_sts %d\n", __FUNCTION__, this->process->pid, this->trdid, pid, fdid, moved_bytes, remote_buf_status(socket_rbuf_xp), moved_bytes ); #endif // signal client thread if pending RECV command if( (socket_rx_valid == true) && (socket_rx_cmd == CMD_RX_RECV) ) { // reset rx_valid hal_remote_s32( XPTR(socket_cxy , &socket_ptr->rx_valid), false ); // report success to RX client thread dev_nic_unblock_rx_client( socket_xp , CMD_STS_SUCCESS ); #if DEBUG_DEV_NIC_RX if( DEBUG_DEV_NIC_RX < cycle ) printk("\n[%s] thread[%x,%x] for UDP socket[%x,%d] / unblock client thread\n", __FUNCTION__, this->process->pid, this->trdid, pid, fdid ); #endif } else { #if DEBUG_DEV_NIC_RX if( DEBUG_DEV_NIC_RX < cycle ) printk("\n[%s] thread[%x,%x] for socket[%x,%d] / no client thread\n" " rx_valid %d / rx_cmd %s\n", __FUNCTION__, this->process->pid, this->trdid, pid, fdid, socket_rx_valid , socket_cmd_type_str(socket_rx_cmd) ); #endif } // release the lock protecting the socket remote_queuelock_release( socket_lock_xp ); } // end dev_nic_rx_handle_udp_packet() /////////////////////////////////////////////////////////////////////////////////////////// // This static function is called by the dev_nic_rx_server() function to handle one RX // TCP segment contained in a kernel buffer defined by the & arguments. // The and arguments have been extracted from the IP // IP header. The local and remote ports are obtained from the TCP header. // It the received segment doesn't match any connected socket attached to the selected // , or any listening socket waiting connection, or if the segment is corrupted, // the segment is discarded. This function implement the TCP error recovery protocol, // as specified by the RFC. Depending on both the socket state, and the segment header: // - it register data in the RX buffer, // - it update the socket state and TCB, // - it register acknolegce requests in the R2T queue, // - it register connection requests in the CRQ queue, /////////////////////////////////////////////////////////////////////////////////////////// // Implementation note: // A "connected" socket is actually a TCP socket already attached to a given NIC_RX[k] // chdev, and can therefore receive a TCP segment on the NIC channel . A "listening" // socket is a TCP socket in the LISTEN state. This function operates in 6 steps: // 1) It checks the TCP checksum, and discard the corrupted segment if corrupted. // 2) It scans the list of sockets attached to the NIC_RX[k] chdev, to find one TCP // socket matching the TCP segment header. // 3) When a matching connected socket is found, it handles the received segment, including // the SYN, FIN, ACK and RST flags. It updates the socket state when required, moves // data to the rx_buf when possible, and return. It takes the lock protecting the socket, // because a connected socket is accessed by both the NIC_TX and NIC_RX server threads. // 4) If no matching connected socket has been found, it scans the list of listening // sockets to find a matching listening socket. // 5) When a matching listening socket is found, it simply registers a new connection // request in the listening socket CRQ queue, when the SYN flag is set, and insert // a SYN-ACK request in the socket R2T queue. // 6) It discards the packet if no connected or listening socket has been found. /////////////////////////////////////////////////////////////////////////////////////////// // @ chdev : [in] local pointer on local NIC_RX chdev descriptor. // @ k_buf : [in] pointer on the TCP packet in local kernel buffer. // @ k_length : [in] number of bytes in buffer (including TCP header). // @ seg_remote_addr : [in] remote IP address (from IP packet header). // @ seg_local_addr : [in] local IP address (from IP packet header). /////////////////////////////////////////////////////////////////////////////////////////// static void dev_nic_rx_handle_tcp_segment( chdev_t * chdev, uint8_t * k_buf, uint32_t k_length, uint32_t seg_remote_addr, uint32_t seg_local_addr ) { xptr_t root_xp; // extended pointer on connected sockets list root xptr_t lock_xp; // extended pointer on chdev lock xptr_t iter_xp; // iterator for these queues bool_t attached_match; // true if one attached socket match bool_t listening_match; // true if one listening socket match xptr_t socket_xp; // extended pointer on matching socket cxy_t socket_cxy; // cluster identifier of matching socket socket_t * socket_ptr; // local pointer on matching socket uint32_t socket_local_addr; // local IP address from socket uint32_t socket_local_port; // local port from socket uint32_t socket_remote_addr; // remote IP address from socket uint32_t socket_remote_port; // remote port from socket uint32_t socket_state; // socket state uint32_t socket_type; // socket type bool_t socket_tx_valid; // TX command valid uint32_t socket_tx_cmd; // TX command type uint32_t socket_tx_nxt; // next byte to send in TX stream uint32_t socket_tx_una; // first unacknowledged byte in TX stream uint32_t socket_tx_len; // number of bytes in tx_buf uint32_t socket_tx_ack; // number of acknowledged bytes in tx_buf bool_t socket_rx_valid; // RX command valid uint32_t socket_rx_cmd; // TX command type uint32_t socket_rx_nxt; // next expected byte in RX stream uint32_t socket_rx_wnd; // current window value in RX stream uint32_t socket_rx_irs; // initial sequence index in RX stream xptr_t socket_lock_xp; // extended pointer on lock protecting socket state xptr_t socket_rx_buf_xp; // extended pointer on socket rx_buf xptr_t socket_r2tq_xp; // extended pointer on socket R2T queue xptr_t socket_crqq_xp; // extended pointer on socket CRQ queue uint16_t checksum; // computed TCP segment chechsum error_t error; // get relevant infos from TCP segment header uint32_t seg_remote_port = ((uint32_t)k_buf[0] << 8) | (uint32_t)k_buf[1]; uint32_t seg_local_port = ((uint32_t)k_buf[2] << 8) | (uint32_t)k_buf[3]; uint32_t seg_seq_num = ((uint32_t)k_buf[4] << 24) | ((uint32_t)k_buf[5] << 16) | ((uint32_t)k_buf[6] << 8) | ((uint32_t)k_buf[7] ); uint32_t seg_ack_num = ((uint32_t)k_buf[8] << 24) | ((uint32_t)k_buf[9] << 16) | ((uint32_t)k_buf[10] << 8) | ((uint32_t)k_buf[11] ); uint8_t seg_hlen = k_buf[12] << 2; // TCP header length in bytes uint8_t seg_flags = k_buf[13]; bool_t seg_ack_set = ((seg_flags & TCP_FLAG_ACK) != 0); bool_t seg_syn_set = ((seg_flags & TCP_FLAG_SYN) != 0); bool_t seg_fin_set = ((seg_flags & TCP_FLAG_FIN) != 0); bool_t seg_rst_set = ((seg_flags & TCP_FLAG_RST) != 0); uint16_t seg_window = ((uint32_t)k_buf[14] << 8) | (uint32_t)k_buf[15]; uint16_t seg_checksum = ((uint32_t)k_buf[16] << 8) | (uint32_t)k_buf[17]; uint32_t seg_data_len = k_length - seg_hlen; // number of bytes in payload uint32_t seg_data_dup; // number of duplicated bytes in payload uint32_t seg_data_new; // number of new bytes in payload #if DEBUG_DEV_NIC_RX || DEBUG_DEV_NIC_ERROR uint32_t fdid; pid_t pid; thread_t * this = CURRENT_THREAD; uint32_t cycle = (uint32_t)hal_get_cycles(); #endif #if DEBUG_DEV_NIC_RX if( DEBUG_DEV_NIC_RX < cycle ) printk("\n[%s] thread[%x,%x] enters / tcp_length %d / tcp_flags %x / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, k_length, seg_flags , cycle ); #endif // reset checksum field k_buf[16] = 0; k_buf[17] = 0; // compute TCP checksum checksum = dev_nic_tcp_udp_checksum( k_buf, k_length, seg_remote_addr, seg_local_addr, true ); // is_tcp // discard segment if corrupted if( seg_checksum != checksum ) { #if DEBUG_DEV_NIC_ERROR printk("\n[WARNING] in %s : thread[%x,%x] / checksum failure on channel %d / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, chdev->channel, cycle ); #endif return; } // build extended pointer on xlist of sockets attached to NIC_RX chdev root_xp = XPTR( local_cxy , &chdev->wait_root ); lock_xp = XPTR( local_cxy , &chdev->wait_lock ); attached_match = false; // take the lock protecting the list of attached sockets remote_busylock_acquire( lock_xp ); // scan list of attached sockets to find a matching TCP socket XLIST_FOREACH( root_xp , iter_xp ) { // get socket cluster and local pointer socket_xp = XLIST_ELEMENT( iter_xp , socket_t , rx_list ); socket_ptr = GET_PTR( socket_xp ); socket_cxy = GET_CXY( socket_xp ); // get socket type and state socket_type = hal_remote_l32(XPTR(socket_cxy , &socket_ptr->type )); socket_state = hal_remote_l32(XPTR(socket_cxy , &socket_ptr->state )); // skip UDP socket if( socket_type == SOCK_DGRAM ) continue; // get relevant socket infos for matching socket_local_addr = hal_remote_l32(XPTR(socket_cxy , &socket_ptr->local_addr )); socket_remote_addr = hal_remote_l32(XPTR(socket_cxy , &socket_ptr->remote_addr )); socket_local_port = hal_remote_l32(XPTR(socket_cxy , &socket_ptr->local_port )); socket_remote_port = hal_remote_l32(XPTR(socket_cxy , &socket_ptr->remote_port )); // compute matching condition for a connected socket attached_match = (socket_local_addr == seg_local_addr) && (socket_local_port == seg_local_port) && (socket_remote_addr == seg_remote_addr) && (socket_remote_port == seg_remote_port) ; // exit loop if matching if( attached_match ) { #if DEBUG_DEV_NIC_RX || DEBUG_DEV_NIC_ERROR fdid = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->fdid ) ); pid = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->pid ) ); #endif #if DEBUG_DEV_NIC_RX if( DEBUG_DEV_NIC_RX < cycle ) printk("\n[%s] matching attached TCP socket[%d,%d] / state %s\n", __FUNCTION__, pid, fdid, socket_state_str(socket_state) ); #endif break; } } // end loop on attached sockets // release the lock protecting the list of attached sockets remote_busylock_release( lock_xp ); // handle TCP segment for an attached socket if( attached_match ) { // The actions depend on both the socket state and the received segment flags : // - update socket state, // - move data to rx_buf, // - register a request in R2T queue when required // build extended pointers on various socket fields socket_lock_xp = XPTR( socket_cxy , &socket_ptr->lock ); socket_rx_buf_xp = XPTR( socket_cxy , &socket_ptr->rx_buf ); socket_r2tq_xp = XPTR( socket_cxy , &socket_ptr->r2tq ); // take the lock protecting the matching socket remote_queuelock_acquire( socket_lock_xp ); // get relevant socket infos from socket descriptor socket_tx_valid = hal_remote_l32(XPTR( socket_cxy , &socket_ptr->tx_valid )); socket_tx_cmd = hal_remote_l32(XPTR( socket_cxy , &socket_ptr->tx_cmd )); socket_tx_nxt = hal_remote_l32(XPTR( socket_cxy , &socket_ptr->tx_nxt )); socket_tx_una = hal_remote_l32(XPTR( socket_cxy , &socket_ptr->tx_una )); socket_tx_ack = hal_remote_l32(XPTR( socket_cxy , &socket_ptr->tx_ack )); socket_tx_len = hal_remote_l32(XPTR( socket_cxy , &socket_ptr->tx_len )); socket_rx_valid = hal_remote_l32(XPTR( socket_cxy , &socket_ptr->rx_valid )); socket_rx_cmd = hal_remote_l32(XPTR( socket_cxy , &socket_ptr->rx_cmd )); socket_rx_nxt = hal_remote_l32(XPTR( socket_cxy , &socket_ptr->rx_nxt )); socket_rx_wnd = hal_remote_l32(XPTR( socket_cxy , &socket_ptr->rx_wnd )); socket_rx_irs = hal_remote_l32(XPTR( socket_cxy , &socket_ptr->rx_irs )); // handle the received segment, depending on the matching socket state switch( socket_state ) { //////////////////////// case TCP_STATE_SYN_SENT: // TCP client waiting for SYN-ACK { // [1] & [2] check ACK and RST if( seg_ack_set ) { bool_t ack_ok = (seg_ack_num == (CONFIG_SOCK_ISS_CLIENT + 1) ); if( seg_rst_set && ack_ok ) { #if DEBUG_DEV_NIC_RX if( DEBUG_DEV_NIC_RX < cycle ) printk("\n[%s] socket[%x,%d] %s RST received from remote TCP => close\n", __FUNCTION__, pid, fdid, socket_state_str(socket_state) ); #endif // report RST to local TCP client thread dev_nic_unblock_tx_client( socket_xp , CMD_STS_RST ); // update socket state hal_remote_s32( XPTR( socket_cxy , &socket_ptr->state ), TCP_STATE_BOUND ); break; } if( seg_rst_set && (ack_ok == false) ) { #if DEBUG_DEV_NIC_ERROR printk("\n[ERROR] in %s : socket[%x,%d] %s RST but expect ack_num %x != rcvd %x => discard\n", __FUNCTION__, pid, fdid, socket_state_str(socket_state), CONFIG_SOCK_ISS_CLIENT + 1, seg_ack_num ); #endif break; } if( (seg_rst_set == false) && (ack_ok == false) ) { #if DEBUG_DEV_NIC_ERROR printk("\n[ERROR] in %s : socket[%x,%d] %s expected ack_num %x != rcvd %x => send RST\n", __FUNCTION__, pid, fdid, socket_state_str(socket_state), CONFIG_SOCK_ISS_CLIENT + 1, seg_ack_num ); #endif // send RST to remote TCP socket_put_r2t_request( socket_r2tq_xp, TCP_FLAG_RST, chdev->channel ); break; } } // [3] handle security & precedence TODO ... someday // [4] handle SYN-ACK if( seg_syn_set && seg_ack_set ) // received SYN and ACK => report success { #if DEBUG_DEV_NIC_RX if( DEBUG_DEV_NIC_RX < cycle ) printk("\n[%s] socket[%x,%d] %s : received expected SYN-ACK\n", __FUNCTION__, pid, fdid , socket_state_str(socket_state) ); #endif // set socket.tx_una hal_remote_s32( XPTR(socket_cxy , &socket_ptr->tx_una), seg_ack_num ); // set socket.rx_irs hal_remote_s32( XPTR(socket_cxy , &socket_ptr->rx_irs), seg_seq_num ); // set socket.rx_nxt hal_remote_s32( XPTR(socket_cxy , &socket_ptr->rx_nxt), seg_seq_num + 1 ); // update socket.state hal_remote_s32( XPTR(socket_cxy , &socket_ptr->state), TCP_STATE_ESTAB ); // make an ACK request to R2T queue socket_put_r2t_request( socket_r2tq_xp, TCP_FLAG_ACK, chdev->channel ); // report succes to local TX client thread dev_nic_unblock_tx_client( socket_xp , CMD_STS_SUCCESS ); } else // SYN without ACK => TCP client becomes a TCP server { #if DEBUG_DEV_NIC_RX if( DEBUG_DEV_NIC_RX < cycle ) printk("\n[%s] socket[%x,%d] %s : received SYN without ACK => send a SYN_ACK\n", __FUNCTION__, pid, fdid , socket_state_str(socket_state) ); #endif // update socket.state hal_remote_s32( XPTR(socket_cxy,&socket_ptr->state), TCP_STATE_SYN_RCVD ); // set socket.tx_nxt hal_remote_s32( XPTR(socket_cxy , &socket_ptr->tx_nxt), CONFIG_SOCK_ISS_SERVER ); // set socket.rx_nxt to seg_seq_num + 1 hal_remote_s32( XPTR(socket_cxy,&socket_ptr->rx_nxt), seg_seq_num + 1 ); // send SYN.ACK to remote TCP socket_put_r2t_request( socket_r2tq_xp, TCP_FLAG_SYN | TCP_FLAG_ACK, chdev->channel ); } break; } // end state SYN_SENT //////////////////////// all "connected" states case TCP_STATE_SYN_RCVD: case TCP_STATE_ESTAB: case TCP_STATE_FIN_WAIT1: case TCP_STATE_FIN_WAIT2: case TCP_STATE_CLOSE_WAIT: case TCP_STATE_CLOSING: case TCP_STATE_LAST_ACK: case TCP_STATE_TIME_WAIT: { // [1] check SEQ_NUM // - we accept duplicate segments (i.e. seq_num < rx_next) // - we don't accept out of order segment (i.e. seq_num_num > rx_next) // => seq_num must be in window [rx_nxt - rx_win , rx_nxt] bool_t seq_ok = is_in_window( seg_seq_num, (socket_rx_nxt - socket_rx_wnd), socket_rx_nxt ); if( seq_ok == false ) // SEQ_NUM not acceptable { if( seg_rst_set ) { #if DEBUG_DEV_NIC_ERROR printk("\n[ERROR] in %s : socket[%x,%d] %s expect seq_num %x != rcvd %x and RST => discard\n", __FUNCTION__, pid, fdid, socket_state_str(socket_state), CONFIG_SOCK_ISS_CLIENT + 1, seg_seq_num ); #endif break; } else // no RST { // send ACK to remote TCP socket_put_r2t_request( socket_r2tq_xp, TCP_FLAG_ACK, chdev->channel ); #if DEBUG_DEV_NIC_ERROR printk("\n[ERROR] in %s : socket[%x,%d] %s expect seq_num %x != rcvd %x => ACK and discard\n", __FUNCTION__, pid, fdid, socket_state_str(socket_state), CONFIG_SOCK_ISS_CLIENT + 1, seg_seq_num ); #endif break; } } else // SEQ_NUM acceptable { // compute number of new bytes & number of duplicated bytes if( seg_seq_num != socket_rx_nxt ) // duplicate segment { seg_data_dup = socket_rx_nxt - seg_seq_num; seg_data_new = (seg_data_len > seg_data_dup) ? (seg_data_len - seg_data_dup) : 0; } else // expected segment { seg_data_dup = 0; seg_data_new = seg_data_len; } #if DEBUG_DEV_NIC_RX if( DEBUG_DEV_NIC_RX < cycle ) printk("\n[%s] socket[%x,%d] %s seq_num %x / rx_nxt %x / len %d / new %d / dup %d\n", __FUNCTION__, pid, fdid, socket_state_str(socket_state), seg_seq_num, socket_rx_nxt, seg_data_len, seg_data_new, seg_data_dup ); #endif } // [2] handle RST flag (depending on socket state) if( seg_rst_set ) { if( socket_state == TCP_STATE_SYN_RCVD ) { #if DEBUG_DEV_NIC_RX if( DEBUG_DEV_NIC_RX < cycle ) printk("\n[%s] socket[%x,%d] %s RST received from remote TCP => report to user\n", __FUNCTION__, pid, fdid, socket_state_str(socket_state) ); #endif // report RST to local TX client thread dev_nic_unblock_tx_client( socket_xp , CMD_STS_RST ); // update socket state hal_remote_s32( XPTR(socket_cxy , &socket_ptr->state), TCP_STATE_BOUND ); break; } else if( (socket_state == TCP_STATE_ESTAB ) || (socket_state == TCP_STATE_FIN_WAIT1 ) || (socket_state == TCP_STATE_FIN_WAIT2 ) || (socket_state == TCP_STATE_CLOSE_WAIT) ) { #if DEBUG_DEV_NIC_RX if( DEBUG_DEV_NIC_RX < cycle ) printk("\n[%s] socket[%x,%d] %s / received RST flag\n", __FUNCTION__, pid, fdid, socket_state_str(socket_state) ); #endif // report RST to local TX client thread if( socket_tx_valid ) dev_nic_unblock_tx_client( socket_xp, CMD_STS_RST ); // report RST to local RX client thread if( socket_rx_valid ) dev_nic_unblock_rx_client( socket_xp, CMD_STS_RST ); // update socket state hal_remote_s32( XPTR(socket_cxy , &socket_ptr->state), TCP_STATE_BOUND ); break; } else // states CLOSING / LAST_ACK / TIME_WAIT { // update socket state hal_remote_s32( XPTR(socket_cxy , &socket_ptr->state), TCP_STATE_BOUND ); break; } } // [3] handle security & precedence TODO ... someday // [4] check SYN if( seg_syn_set ) // received SYN => send RST to remote { #if DEBUG_DEV_NIC_ERROR printk("\n[ERROR] in %s socket[%x,%d] %s : received SYN flag => send RST-ACK\n", __FUNCTION__, pid, fdid , socket_state_str(socket_state) ); #endif // send RST & ACK to remote TCP socket_put_r2t_request( socket_r2tq_xp, TCP_FLAG_RST | TCP_FLAG_ACK, chdev->channel ); // report RST to local TX client thread if( socket_tx_valid ) dev_nic_unblock_tx_client( socket_xp, CMD_STS_RST ); // report RST to local RX client thread if( socket_rx_valid ) dev_nic_unblock_rx_client( socket_xp, CMD_STS_RST ); // update socket state hal_remote_s32( XPTR(socket_cxy , &socket_ptr->state), TCP_STATE_BOUND ); break; } // [5] handle ACK (depending on socket state) if( seg_ack_set == false ) // missing ACK => discard segment { #if DEBUG_DEV_NIC_ERROR printk("\n[ERROR] in %s : socket[%x,%d] %s / no ACK in segment => discard\n", __FUNCTION__, pid, fdid, socket_state_str(socket_state) ); #endif break; } // compute acceptable ACK bool_t ack_ok = is_in_window( seg_ack_num, socket_tx_una, socket_tx_nxt ); if( socket_state == TCP_STATE_SYN_RCVD ) { if( ack_ok ) // acceptable ACK { #if DEBUG_DEV_NIC_RX if( DEBUG_DEV_NIC_RX < cycle ) printk("\n[%s] socket[%x,%d] %s : received expected ACK => update socket\n", __FUNCTION__, pid, fdid , socket_state_str(socket_state) ); #endif // set socket.tx_una hal_remote_s32( XPTR(socket_cxy , &socket_ptr->tx_una), seg_ack_num ); // update socket.state hal_remote_s32( XPTR(socket_cxy , &socket_ptr->state), TCP_STATE_ESTAB ); // report success to local TX client thread dev_nic_unblock_tx_client( socket_xp , CMD_STS_SUCCESS ); } else // send RST to remote { #if DEBUG_DEV_NIC_ERROR printk("\n[ERROR] in %s : socket[%x,%d] %s / ACK %x not in [%x,%x] => discard\n", __FUNCTION__, pid, fdid, socket_state_str(socket_state), seg_ack_num, socket_tx_una, socket_tx_nxt ); #endif // send RST & ACK to remote TCP socket_put_r2t_request( socket_r2tq_xp, TCP_FLAG_RST | TCP_FLAG_ACK, chdev->channel ); break; } } else if( (socket_state == TCP_STATE_ESTAB) || (socket_state == TCP_STATE_FIN_WAIT1) || (socket_state == TCP_STATE_FIN_WAIT2) || (socket_state == TCP_STATE_FIN_WAIT2) || (socket_state == TCP_STATE_CLOSE_WAIT) || (socket_state == TCP_STATE_CLOSING) ) { if( ack_ok ) // acceptable ack { // compute number of acknowledged bytes uint32_t ack_bytes = seg_ack_num - socket_tx_una; if( ack_bytes ) // handle acknowledged bytes { #if DEBUG_DEV_NIC_RX if( DEBUG_DEV_NIC_RX < cycle ) printk("\n[%s] socket[%x,%d] %d bytes acknowledged => update socket\n", __FUNCTION__, pid, fdid, ack_bytes ); #endif // update socket.tx_una, socket.tx_ack, and socket.tx_wnd fields hal_remote_s32( XPTR(socket_cxy , &socket_ptr->tx_una), seg_ack_num ); hal_remote_s32( XPTR(socket_cxy , &socket_ptr->tx_ack), socket_tx_ack + ack_bytes ); hal_remote_s32( XPTR(socket_cxy , &socket_ptr->tx_wnd), seg_window ); // unblock the TX client thread if last byte acknowledged if( (socket_tx_ack + ack_bytes) == socket_tx_len ) { // report success to TX client thread dev_nic_unblock_tx_client( socket_xp , CMD_STS_SUCCESS ); #if DEBUG_DEV_NIC_RX if( DEBUG_DEV_NIC_RX < cycle ) printk("\n[%s] socket[%x,%d] %s : last ack => unblock TX client thread\n", __FUNCTION__, pid, fdid, socket_state_str(socket_state) ); #endif } } if( socket_state == TCP_STATE_FIN_WAIT1 ) { // update socket state hal_remote_s32( XPTR(socket_cxy , &socket_ptr->state), TCP_STATE_FIN_WAIT2 ); } if( socket_state == TCP_STATE_FIN_WAIT2 ) { // TODO } else if( socket_state == TCP_STATE_CLOSING ) { // update socket state hal_remote_s32( XPTR(socket_cxy , &socket_ptr->state), TCP_STATE_TIME_WAIT ); } else if( socket_state == TCP_STATE_CLOSING ) { // TODO } } else // unacceptable ACK => discard segment { #if DEBUG_DEV_NIC_ERROR printk("\n[ERROR] in %s : socket[%x,%d] %s / ACK %x not in [%x,%x] => discard\n", __FUNCTION__, pid, fdid, socket_state_str(socket_state), seg_ack_num, socket_tx_una, socket_tx_nxt ); #endif break; } } else if( socket_state == TCP_STATE_LAST_ACK ) { // TODO } else if( socket_state == TCP_STATE_TIME_WAIT ) { // TODO } // [6] handle URG flag TODO ... someday // [7] handle received data : update socket state, // move data to rx_buf, register ACK request to R2T queue, // unblock the RX client thread in case of pending RX_RECV command if((socket_state == TCP_STATE_ESTAB) || (socket_state == TCP_STATE_FIN_WAIT1) || (socket_state == TCP_STATE_FIN_WAIT2) ) { // register new bytes if requested if( seg_data_new ) { // get number of bytes already stored in rx_buf uint32_t status = remote_buf_status( socket_rx_buf_xp ); // compute space in rx_buf and actual number of acceptable bytes // when (space < seg_data_new) the last new bytes are discarded uint32_t space = (1 << CONFIG_SOCK_RX_BUF_ORDER) - status; uint32_t rcv_bytes = (space < seg_data_new) ? space : seg_data_new; // move new bytes from k_buf to rx_buf remote_buf_put_from_kernel( socket_rx_buf_xp, k_buf + seg_hlen + seg_data_dup, rcv_bytes ); #if DEBUG_DEV_NIC_RX if( DEBUG_DEV_NIC_RX < cycle ) printk("\n[%s] socket[%x,%d] %s : move %d bytes to rx_buf\n", __FUNCTION__, pid, fdid, socket_state_str(socket_state), rcv_bytes ); #endif // update socket.rx_nxt and socket_rx_wnd fields hal_remote_s32( XPTR( socket_cxy , &socket_ptr->rx_nxt ), socket_rx_nxt + rcv_bytes ); hal_remote_s32( XPTR( socket_cxy , &socket_ptr->rx_wnd ), socket_rx_wnd - rcv_bytes ); // unblock RX client if required if( (socket_rx_valid == true) && (socket_rx_cmd == CMD_RX_RECV) ) { // reset rx_valid hal_remote_s32( XPTR(socket_cxy,&socket_ptr->rx_valid), false ); // report success to RX client thread dev_nic_unblock_rx_client( socket_xp , CMD_STS_SUCCESS ); #if DEBUG_DEV_NIC_RX if( DEBUG_DEV_NIC_RX < cycle ) printk("\n[%s] socket[%x,%d] %s : last data => unblock RX client thread\n", __FUNCTION__, pid, fdid, socket_state_str(socket_state) ); #endif } } // make an ACK request to remote socket_put_r2t_request( socket_r2tq_xp, TCP_FLAG_ACK, chdev->channel ); } // end payload handling // [8] handle FIN flag depending on socket state if( (socket_state == TCP_STATE_SYN_RCVD) || (socket_state == TCP_STATE_ESTAB ) ) { if( seg_fin_set ) { #if DEBUG_DEV_NIC_RX if( DEBUG_DEV_NIC_RX < cycle ) printk("\n[%s] socket[%x,%d] %s : FIN-ACK => goes CLOSE_WAIT\n", __FUNCTION__, pid, fdid, socket_state_str(socket_state) ); #endif // update socket.rx_nxt when FIN received hal_remote_s32( XPTR( socket_cxy , &socket_ptr->rx_nxt ), socket_rx_nxt + 1 ); // update socket state hal_remote_s32( XPTR( socket_cxy , &socket_ptr->state ), TCP_STATE_CLOSE_WAIT ); // send ACK to remote TCP socket_put_r2t_request( socket_r2tq_xp, TCP_FLAG_ACK, chdev->channel ); // check pending RX_RECV command if( (socket_rx_valid == true) && (socket_rx_cmd == CMD_RX_RECV) ) { // reset rx_valid hal_remote_s32( XPTR(socket_cxy,&socket_ptr->rx_valid), false ); // report FIN to RX client thread dev_nic_unblock_rx_client( socket_xp , CMD_STS_EOF ); #if DEBUG_DEV_NIC_RX if( DEBUG_DEV_NIC_RX < cycle ) printk("\n[%s] socket[%x,%d] %s : unblock RX client waiting on RECV\n", __FUNCTION__, pid, fdid, socket_state_str(socket_state) ); #endif } } } else if( socket_state == TCP_STATE_FIN_WAIT1 ) { if( seg_fin_set ) { #if DEBUG_DEV_NIC_RX if( DEBUG_DEV_NIC_RX < cycle ) printk("\n[%s] socket[%x,%d] %s : FIN-ACK => goes CLOSING\n", __FUNCTION__, pid, fdid, socket_state_str(socket_state) ); #endif // update socket.rx_nxt hal_remote_s32( XPTR( socket_cxy , &socket_ptr->rx_nxt ), socket_rx_nxt + 1 ); // update socket state hal_remote_s32( XPTR( socket_cxy , &socket_ptr->state ), TCP_STATE_CLOSING ); // send ACK request to remote socket_put_r2t_request( socket_r2tq_xp, TCP_FLAG_ACK, chdev->channel ); } else // received ACK only { #if DEBUG_DEV_NIC_RX if( DEBUG_DEV_NIC_RX < cycle ) printk("\n[%s] socket[%x,%d] %s : only ACK => goes FIN_WAIT2\n", __FUNCTION__, pid, fdid, socket_state_str(socket_state) ); #endif // update socket state hal_remote_s32( XPTR( socket_cxy , &socket_ptr->state ), TCP_STATE_FIN_WAIT2 ); } } else if( socket_state == TCP_STATE_FIN_WAIT2 ) { if( seg_fin_set ) // received ACK & FIN { #if DEBUG_DEV_NIC_RX if( DEBUG_DEV_NIC_RX < cycle ) printk("\n[%s] socket[%x,%d] %s : FIN-ACK => goes CLOSED / unblock client\n", __FUNCTION__, pid, fdid, socket_state_str(socket_state) ); #endif // update socket.rx_nxt when FIN received hal_remote_s32( XPTR( socket_cxy , &socket_ptr->rx_nxt ), socket_rx_nxt + 1 ); // update socket.state hal_remote_s32( XPTR( socket_cxy , &socket_ptr->state ), TCP_STATE_CLOSED ); // make an ACK request to R2T queue socket_put_r2t_request( socket_r2tq_xp, TCP_FLAG_ACK, chdev->channel ); // report success to TX client thread dev_nic_unblock_tx_client( socket_xp , CMD_STS_SUCCESS ); } } else if( socket_state == TCP_STATE_CLOSING ) // received ACK { // update socket.state hal_remote_s32( XPTR( socket_cxy , &socket_ptr->state ), TCP_STATE_CLOSED ); // report success to TX client thread dev_nic_unblock_tx_client( socket_xp , CMD_STS_SUCCESS ); } else if( socket_state == TCP_STATE_CLOSE_WAIT ) { // do nothing } else if( socket_state == TCP_STATE_LAST_ACK ) { // update socket.state when ACK received hal_remote_s32( XPTR( socket_cxy , &socket_ptr->state ), TCP_STATE_CLOSED ); // unblock TX client thead for success dev_nic_unblock_tx_client( socket_xp , CMD_STS_SUCCESS ); } } // end case connected states } // end switch socket state // release the lock protecting socket state remote_queuelock_release( socket_lock_xp ); return; } // end if attached socket // 4. scan the list of listening sockets listening_match = false; // get pointers on NIC_RX[0] chdev xptr_t rx0_chdev_xp = chdev_dir.nic_rx[0]; chdev_t * rx0_chdev_ptr = GET_PTR( rx0_chdev_xp ); cxy_t rx0_chdev_cxy = GET_CXY( rx0_chdev_xp ); // build extended pointers on list of listening sockets xptr_t rx0_root_xp = XPTR( rx0_chdev_cxy , &rx0_chdev_ptr->ext.nic.root ); xptr_t rx0_lock_xp = XPTR( rx0_chdev_cxy , &rx0_chdev_ptr->ext.nic.lock ); // take the lock protecting the list of listening sockets remote_busylock_acquire( rx0_lock_xp ); // scan the xlist of listening socket XLIST_FOREACH( rx0_root_xp , iter_xp ) { // get socket cluster and local pointer socket_xp = XLIST_ELEMENT( iter_xp , socket_t , rx_list ); socket_ptr = GET_PTR( socket_xp ); socket_cxy = GET_CXY( socket_xp ); // get relevant socket type and state socket_type = hal_remote_l32(XPTR(socket_cxy , &socket_ptr->type )); socket_state = hal_remote_l32(XPTR(socket_cxy , &socket_ptr->state )); // check socket type and state assert( __FUNCTION__, (socket_type == SOCK_STREAM ) , "illegal socket type" ); assert( __FUNCTION__, (socket_state == TCP_STATE_LISTEN ) , "illegal socket state" ); // get relevant socket infos for matching socket_local_addr = hal_remote_l32(XPTR(socket_cxy , &socket_ptr->local_addr )); socket_local_port = hal_remote_l32(XPTR(socket_cxy , &socket_ptr->local_port )); // compute matching condition for a listening socket listening_match = (socket_local_addr == seg_local_addr) && (socket_local_port == seg_local_port); // exit loop if matching if( listening_match ) { #if DEBUG_DEV_NIC_RX || DEBUG_DEV_NIC_ERROR fdid = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->fdid ) ); pid = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->pid ) ); #endif #if DEBUG_DEV_NIC_RX if( DEBUG_DEV_NIC_RX < cycle ) printk("\n[%s] matching listening socket[%d,%d] / state %s\n", __FUNCTION__, pid, fdid, socket_state_str(socket_state) ); #endif break; } } // end loop on listening sockets // release the lock protecting the list of listening sockets remote_busylock_release( rx0_lock_xp ); // 5. handle TCP segment for a matching listening socket if( listening_match ) { // The actions depend on the received segment flags // - discard segment for RST or ACK, // - for SYN, register the connect request in listening socket CRQ queue, // and unblock the client thread in case of pending RX_ACCEPT command. // [1] check RST if( seg_rst_set ) // discard segment { #if DEBUG_DEV_NIC_ERROR printk("\n[ERROR] in %s : socket[%x,%d] %s / received RST => discard segment\n", __FUNCTION__, pid, fdid, socket_state_str(socket_state) ); #endif return; } // [2] check ACK if( seg_ack_set ) // send RST to remote { #if DEBUG_DEV_NIC_ERROR printk("\n[ERROR] in %s : socket[%x,%d] %s received ACK => send RST & discard \n", __FUNCTION__, pid, fdid, socket_state_str(socket_state) ); #endif // make an RST request to R2T queue socket_put_r2t_request( socket_r2tq_xp, TCP_FLAG_RST, chdev->channel ); return; } // [3] handle security & precedence TODO ... someday // handle SYN == CONNECT request if( seg_syn_set ) { // build extended pointers on various listening socket fields socket_lock_xp = XPTR( socket_cxy , &socket_ptr->lock ); socket_crqq_xp = XPTR( socket_cxy , &socket_ptr->crqq ); socket_r2tq_xp = XPTR( socket_cxy , &socket_ptr->r2tq ); // take the lock protecting the matching socket remote_queuelock_acquire( socket_lock_xp ); // try to register request into CRQ queue error = socket_put_crq_request( socket_crqq_xp, seg_remote_addr, seg_remote_port, seg_seq_num, seg_window ); if ( error ) // CRQ full { #if DEBUG_DEV_NIC_ERROR printk("\n[ERROR] in %s : listening socket[%x,%d] %s receive SYN but CRQ full => send RST\n", __FUNCTION__, pid, fdid ); #endif // make an RST request to R2T queue socket_put_r2t_request( socket_r2tq_xp, TCP_FLAG_RST, chdev->channel ); } else // register request in listening socket CRQ { #if DEBUG_DEV_NIC_RX if( DEBUG_DEV_NIC_RX < cycle ) if( cycle > DEBUG_DEV_NIC_RX ) printk("\n[%s] listening socket[%x,%d] register request in CRQ\n", __FUNCTION__, pid, fdid ); #endif bool_t rx_valid = hal_remote_l32( XPTR(socket_cxy , &socket_ptr->rx_valid)); uint32_t rx_cmd = hal_remote_l32( XPTR(socket_cxy , &socket_ptr->rx_cmd)); // check pending ACCEPT command if( rx_valid && (rx_cmd == CMD_RX_ACCEPT) ) { // reset rx_valid hal_remote_s32( XPTR( socket_cxy , &socket_ptr->rx_valid ), false ); // report success to RX client thread, that will // create a new socket and request a SYN-ACK to TX server thread dev_nic_unblock_rx_client( socket_xp , CMD_STS_SUCCESS ); #if DEBUG_DEV_NIC_RX if( DEBUG_DEV_NIC_RX < cycle ) if( cycle > DEBUG_DEV_NIC_RX ) printk("\n[%s] listening socket[%x,%d] unblock RX client thread\n", __FUNCTION__, fdid ); #endif } } // end register request in CRQ // release the lock protecting the matching socket remote_queuelock_release( socket_lock_xp ); } // end if SYN return; } // end if listening_match // 6. no attached socket found and no listening socket found => discard segment #if DEBUG_DEV_NIC_ERROR printk("\n[ERROR] in %s : thread[%x,%d] / unexpected TCP segment => discard / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, chdev->channel, cycle ); #endif } // end dev_nic_rx_handle_tcp_segment() ///////////////////////////////////////// void dev_nic_rx_server( chdev_t * chdev ) { uint8_t k_buf[2048]; // kernel buffer for one ETH/IP/UDP packet uint32_t pkt_src_addr; // packet source IP address uint32_t pkt_dst_addr; // packet destination IP address uint32_t trsp_protocol; // transport protocol (TCP / UDP) uint32_t eth_length; // size of Ethernet packet (bytes) uint32_t ip_length; // size of IP packet in bytes error_t error; thread_t * this = CURRENT_THREAD; // check thread can yield thread_assert_can_yield( this , __FUNCTION__ ); // check chdev direction and type assert( __FUNCTION__, (chdev->func == DEV_FUNC_NIC) && (chdev->is_rx == true) , "illegal chdev type or direction" ); #if DEBUG_DEV_NIC_RX || DEBUG_DEV_NIC_ERROR uint32_t cycle = (uint32_t)hal_get_cycles(); #endif #if DEBUG_DEV_NIC_RX if( cycle > DEBUG_DEV_NIC_RX ) printk("\n[%s] thread[%x,%x] starts / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, cycle ); #endif // avoid warning ip_length = 0; error = 0; // get extended pointers on server tread and chdev xptr_t thread_xp = XPTR( local_cxy , this ); xptr_t chdev_xp = XPTR( local_cxy , chdev ); while( 1 ) { // call NIC driver to move one packet from NIC_RX queue to kernel buffer this->nic_cmd.dev_xp = chdev_xp; this->nic_cmd.type = NIC_CMD_READ; this->nic_cmd.buffer = k_buf; chdev->cmd( XPTR( local_cxy , this ) ); // get packet length eth_length = this->nic_cmd.status; // check success if( eth_length == 0 ) // queue empty => block and deschedule { #if DEBUG_DEV_NIC_RX cycle = (uint32_t)hal_get_cycles(); if( DEBUG_DEV_NIC_RX < cycle ) printk("\n[%s] thread[%x,%x] NIC_RX_QUEUE empty => blocks on / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, cycle ); #endif // enable NIC_RX IRQ dev_pic_enable_irq( this->core->lid , chdev_xp ); // block and deschedule thread_block( thread_xp , THREAD_BLOCKED_ISR ); sched_yield("client blocked on NIC_TX queue full"); // disable NIC-RX IRQ dev_pic_disable_irq( this->core->lid , chdev_xp ); #if DEBUG_DEV_NIC_RX cycle = (uint32_t)hal_get_cycles(); if( DEBUG_DEV_NIC_RX < cycle ) printk("\n[%s] thread[%x,%x] resumes / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, cycle ); #endif // check possible error reported by NIC ISR if( this->nic_cmd.error ) { printk("\n[PANIC] in %s : %s DMA engine cannot access RX_QUEUE / cycle %d\n", __FUNCTION__, chdev->name , (uint32_t)hal_get_cycles() ); } } else // success => handle packet { #if DEBUG_DEV_NIC_RX cycle = (uint32_t)hal_get_cycles(); if( DEBUG_DEV_NIC_RX < cycle ) #endif // analyse the ETH header error = dev_nic_rx_check_eth( k_buf, &ip_length ); // discard packet if error reported by Ethernet layer if( error ) { #if DEBUG_DEV_NIC_ERROR printk("\n[WARNING] in %s : thread[%x,%x] discard ETH packet / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, cycle ); #endif continue; } #if (DEBUG_DEV_NIC_RX & 1) cycle = (uint32_t)hal_get_cycles(); if( DEBUG_DEV_NIC_RX < cycle ) printk("\n[%s] thread[%x,%x] successfully checked ETH packet / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, cycle ); #endif // analyse the IP header error = dev_nic_rx_check_ip( k_buf + ETH_HEAD_LEN, ip_length, &pkt_src_addr, &pkt_dst_addr, &trsp_protocol ); // discard packet if error reported by IP layer if( error ) { #if DEBUG_DEV_NIC_ERROR printk("\n[WARNING] in %s : thread[%x,%x] discard IP packet / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, cycle ); #endif continue; } #if (DEBUG_DEV_NIC_RX & 1 ) cycle = (uint32_t)hal_get_cycles(); if( (DEBUG_DEV_NIC_RX < cycle) && (trsp_protocol == PROTOCOL_UDP) ) printk("\n[%s] thread[%x,%x] successfully checked UDP packet / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, cycle ); if( (DEBUG_DEV_NIC_RX < cycle) && (trsp_protocol == PROTOCOL_TCP) ) printk("\n[%s] thread[%x,%x] successfully checked TCP segment / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, cycle ); #endif // call relevant transport protocol if( trsp_protocol == PROTOCOL_UDP ) { dev_nic_rx_handle_udp_packet( chdev, k_buf + ETH_HEAD_LEN + IP_HEAD_LEN, ip_length - IP_HEAD_LEN, pkt_src_addr, pkt_dst_addr ); } else if ( trsp_protocol == PROTOCOL_TCP) { dev_nic_rx_handle_tcp_segment( chdev, k_buf + ETH_HEAD_LEN + IP_HEAD_LEN, ip_length - IP_HEAD_LEN, pkt_src_addr, pkt_dst_addr ); } else // discard packet if unsupported transport protocol { #if DEBUG_DEV_NIC_ERROR cycle = (uint32_t)hal_get_cycles(); if( DEBUG_DEV_NIC_RX < cycle ) printk("\n[WARNING] in %s : thread[%x,%x] unsupported transport protocol %d / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, trsp_protocol, cycle ); #endif continue; } } // end else success } // end of while loop } // end dev_nic_rx_server() /////////////////////////////////////////////////////////////////////////////////////////// // Functions used by the NIC_TX server thread /////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////// // This static function is called by the dev_nic_tx_send_packet() function. // It moves one ETH/IP/UDP packet from the kernel buffer identified by the and // arguments to the NIC_TX_QUEUE identified the argument. // It blocks and deschedules on the BLOCKED_ISR condition if the queue is full. /////////////////////////////////////////////////////////////////////////////////////////// // @ chdev : [in] local pointer on NIC_TX chdev. // @ k_buf : [in] pointer on a local kernel buffer (2K bytes). // @ length : [in] actual Ethernet packet length in bytes. /////////////////////////////////////////////////////////////////////////////////////////// static void dev_nic_tx_move_packet( chdev_t * chdev, uint8_t * k_buf, uint32_t length ) { thread_t * this = CURRENT_THREAD; // get extended pointers on server tread and chdev xptr_t thread_xp = XPTR( local_cxy , this ); xptr_t chdev_xp = XPTR( local_cxy , chdev ); // check thread can yield thread_assert_can_yield( this , __FUNCTION__ ); #if (DEBUG_DEV_NIC_TX & 1) uint32_t cycle = (uint32_t)hal_get_cycles(); if( DEBUG_DEV_NIC_TX < cycle ) printk("\n[%s] thread[%x,%x] enter / buf %x / length %d / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, k_buf, length, cycle ); #endif // initialize WRITE command in server thread descriptor this->nic_cmd.dev_xp = chdev_xp; this->nic_cmd.type = NIC_CMD_WRITE; this->nic_cmd.buffer = k_buf; this->nic_cmd.length = length; this->nic_cmd.error = 0; while( 1 ) { // call driver to move TX packet chdev->cmd( thread_xp ); // exit while if success if( this->nic_cmd.status == length ) // exit while & return if success { #if (DEBUG_DEV_NIC_TX & 1) cycle = (uint32_t)hal_get_cycles(); if( DEBUG_DEV_NIC_TX < cycle ) printk("\n[%s] thread[%x,%x] exit SUCCESS / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, cycle ); #endif break; } else // block and deschedule if queue full { #if (DEBUG_DEV_NIC_TX & 1) cycle = (uint32_t)hal_get_cycles(); if( DEBUG_DEV_NIC_TX < cycle ) printk("\n[%s] thread[%x,%x] NIC_TX_QUEUE full => blocks on ISR / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, cycle ); #endif // enable NIC_TX IRQ dev_pic_enable_irq( this->core->lid , chdev_xp ); // TX server thread blocks and deschedules thread_block( thread_xp , THREAD_BLOCKED_ISR ); sched_yield("client blocked on NIC_TX queue full"); // disable NIC-TX IRQ dev_pic_disable_irq( this->core->lid , chdev_xp ); #if (DEBUG_DEV_NIC_TX & 1) cycle = (uint32_t)hal_get_cycles(); if( DEBUG_DEV_NIC_TX < cycle ) printk("\n[%s] thread[%x,%x] resumes / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, cycle ); #endif } } } // end dev_nic_tx_move_packet() /////////////////////////////////////////////////////////////////////////////////////////// // This static function is called by the dev_nic_tx_server() function to build an UDP // header in the kernel buffer defined by the arguement, as specified by the // argument. The argument defines the number of bytes in payload. // It set the "src_port", "dst_port", "total_length" and "checksum" fields in UDP header. // The payload must be previouly loaded in the kernel buffer. /////////////////////////////////////////////////////////////////////////////////////////// // @ k_buf : [in] pointer on first byte of UDP header in kernel buffer. // @ socket_xp : [in] extended pointer on socket. // @ length : [in] number of bytes in payload. /////////////////////////////////////////////////////////////////////////////////////////// static void dev_nic_tx_build_udp_header( uint8_t * k_buf, xptr_t socket_xp, uint32_t length ) { uint16_t checksum; // checksum value uint32_t total_length; // total UDP packet length uint32_t local_addr; // local IP address uint32_t remote_addr; // remote IP address uint32_t local_port; // local port uint32_t remote_port; // remote port // get socket cluster an local pointer socket_t * socket_ptr = GET_PTR( socket_xp ); cxy_t socket_cxy = GET_CXY( socket_xp ); // get relevant infos from socket local_addr = hal_remote_l32(XPTR(socket_cxy , &socket_ptr->local_addr )); remote_addr = hal_remote_l32(XPTR(socket_cxy , &socket_ptr->remote_addr )); local_port = hal_remote_l32(XPTR(socket_cxy , &socket_ptr->local_port )); remote_port = hal_remote_l32(XPTR(socket_cxy , &socket_ptr->remote_port )); // compute UDP packet total length total_length = length + UDP_HEAD_LEN; // set src_port and dst_port in header k_buf[0] = local_port >> 8; k_buf[1] = local_port; k_buf[2] = remote_port >> 8; k_buf[3] = remote_port; // reset checksum k_buf[6] = 0; k_buf[7] = 0; // set packet length in header k_buf[4] = total_length >> 8; k_buf[5] = total_length; // compute UDP packet checksum checksum = dev_nic_tcp_udp_checksum( k_buf, total_length, local_addr, remote_addr, false ); // is_not_tcp // set checksum k_buf[6] = checksum >> 8; k_buf[7] = checksum; } // end dev_nic_tx_build_udp_header() /////////////////////////////////////////////////////////////////////////////////////////// // This static function is called by the dev_nic_tx_server() function. // It builds a TCP header in the kernel buffer defined by the argument. // The payload must have been previouly registered in this buffer (for checksum). // The "local_addr", "local_port", "remote_addr", "remote_port", seq_num", "ack_num", // and "window" fields are obtained from the argument. // The argument defines the number of bytes in payload, and the argument // defines the flags to be set in TCP header. /////////////////////////////////////////////////////////////////////////////////////////// // @ k_buf : [in] pointer on first byte of TCP header in kernel buffer. // @ socket_xp : [in] extended pointer on socket. // @ length : [in] number of bytes in payload. // @ flags : [in] flags to be set in TCP header. /////////////////////////////////////////////////////////////////////////////////////////// static void dev_nic_tx_build_tcp_header( uint8_t * k_buf, xptr_t socket_xp, uint32_t length, uint8_t flags ) { uint16_t checksum; // global segment checksum uint32_t total_length; // total UDP packet length uint32_t src_addr; // local IP address uint32_t dst_addr; // remote IP address uint16_t src_port; // local port uint16_t dst_port; // remote port uint32_t seq_num; // first byte of segment in TX stream uint32_t ack_num; // next expected byte in RX stream uint32_t window; // window of accepted segments in RX stream // get socket cluster an local pointer socket_t * sock_ptr = GET_PTR( socket_xp ); cxy_t sock_cxy = GET_CXY( socket_xp ); // get relevant infos from socket src_addr = hal_remote_l32(XPTR( sock_cxy , &sock_ptr->local_addr )); dst_addr = hal_remote_l32(XPTR( sock_cxy , &sock_ptr->remote_addr )); src_port = hal_remote_l32(XPTR( sock_cxy , &sock_ptr->local_port )); dst_port = hal_remote_l32(XPTR( sock_cxy , &sock_ptr->remote_port )); seq_num = hal_remote_l32(XPTR( sock_cxy , &sock_ptr->tx_nxt )); ack_num = hal_remote_l32(XPTR( sock_cxy , &sock_ptr->rx_nxt )); window = hal_remote_l32(XPTR( sock_cxy , &sock_ptr->rx_wnd )); // compute TCP segment total length total_length = length + TCP_HEAD_LEN; // set "src_port" and "dst_port" k_buf[0] = src_port >> 8; k_buf[1] = src_port; k_buf[2] = dst_port >> 8; k_buf[3] = dst_port; // set "seq_num" k_buf[4] = seq_num >> 24; k_buf[5] = seq_num >> 16; k_buf[6] = seq_num >> 8; k_buf[7] = seq_num; // set "ack_num" k_buf[8] = ack_num >> 24; k_buf[9] = ack_num >> 16; k_buf[10] = ack_num >> 8; k_buf[11] = ack_num; // set "hlen" k_buf[12] = 5; // set "flags" k_buf[13] = flags & 0x3F; // set "window" k_buf[14] = window >> 8; k_buf[15] = window; // reset "checksum" k_buf[16] = 0; k_buf[17] = 0; // set "urgent_ptr" k_buf[18] = 0; k_buf[19] = 0; // compute TCP segment checksum checksum = dev_nic_tcp_udp_checksum( k_buf, total_length, src_addr, dst_addr, true ); // is_tcp // set "checksum" k_buf[16] = checksum >> 8; k_buf[17] = checksum; } // end dev_nic_tx_build_tcp_header() /////////////////////////////////////////////////////////////////////////////////////////// // This static function is called by the dev_nic_tx_server() function. // It builds the IP header in the 20 first bytes of . /////////////////////////////////////////////////////////////////////////////////////////// // @ buffer : pointer on first byte of IP header in kernel buffer // @ src_addr : source IP address. // @ dst_addr : destination IP address. // @ length : number of bytes in IP packet payload. /////////////////////////////////////////////////////////////////////////////////////////// static void dev_nic_tx_build_ip_header( uint8_t * buffer, uint32_t src_addr, uint32_t dst_addr, uint8_t protocol, uint16_t length ) { uint16_t hcs; uint16_t total = length + IP_HEAD_LEN; buffer[0] = 0x45; // IPV4 / IHL = 20 bytes buffer[1] = 0; // DSCP / ECN buffer[2] = total >> 8; buffer[3] = total; buffer[4] = 0x40; // Don't Fragment buffer[5] = 0; buffer[6] = 0; buffer[7] = 0; buffer[8] = 0xFF; // TTL buffer[9] = protocol; // transport protocol buffer[12] = src_addr >> 24; buffer[13] = src_addr >> 16; buffer[14] = src_addr >> 8; buffer[15] = src_addr; buffer[16] = dst_addr >> 24; buffer[17] = dst_addr >> 16; buffer[18] = dst_addr >> 8; buffer[19] = dst_addr; // compute IP header checksum hcs = dev_nic_ip_checksum( buffer ); // set checksum buffer[10] = hcs >> 8; buffer[11] = hcs; } // end dev_nic_tx_build_ip_header /////////////////////////////////////////////////////////////////////////////////////////// // This static function is called by the dev_nic_tx_server() function. // It builds the Ethernet header in the 14 first bytes of . /////////////////////////////////////////////////////////////////////////////////////////// // @ buffer : pointer on first byte of Ethernet header in kernel buffer // @ src_mac_54 : two MSB bytes in source MAC address. // @ src_mac_32 : two MED bytes in source MAC address. // @ src_mac_10 : two LSB bytes in source MAC address. // @ dst_mac_54 : two MSB bytes in destination MAC address. // @ dst_mac_32 : two MED bytes in destination MAC address. // @ dst_mac_10 : two LSB bytes in destination MAC address. // @ length : number of bytes in Ethernet frame payload. /////////////////////////////////////////////////////////////////////////////////////////// static void dev_nic_tx_build_eth_header( uint8_t * buffer, uint8_t src_mac_5, uint8_t src_mac_4, uint8_t src_mac_3, uint8_t src_mac_2, uint8_t src_mac_1, uint8_t src_mac_0, uint8_t dst_mac_5, uint8_t dst_mac_4, uint8_t dst_mac_3, uint8_t dst_mac_2, uint8_t dst_mac_1, uint8_t dst_mac_0, uint32_t length ) { buffer[0] = dst_mac_5; buffer[1] = dst_mac_4; buffer[2] = dst_mac_3; buffer[3] = dst_mac_2; buffer[4] = dst_mac_1; buffer[5] = dst_mac_0; buffer[6] = src_mac_5; buffer[7] = src_mac_4; buffer[8] = src_mac_3; buffer[9] = src_mac_2; buffer[10] = src_mac_1; buffer[11] = src_mac_0; buffer[12] = length >> 8; buffer[13] = length; } // end dev_nic_tx_build_eth_header() /////////////////////////////////////////////////////////////////////////////////////////// // This static function implement the TCP protocol as specified by the RFC. // It is called by the dev_nic_tx_server() function to handle one TX command, // or one R2T request, for the socket identified by the argument. // It builds an ETH/IP/UDP packet or ETH/IP/TCP segment, in the 2 Kbytes kernel buffer, // defined by the argument from informations found in socket descriptor. // It returns a command status code (defined in the ksocket.h file), and returns in the // argument the actual packet length. // It updates the "socket.state", "socket.tx_nxt", "socket.r2tq", "socket.crqq", // "socket.todo" fields as required by the command type, but it does NOT reset // the "socket.tx_valid" field and does NOT unblock the client thread. // It does NOt take the socket lock, that is taken by the dev_nic_server(). /////////////////////////////////////////////////////////////////////////////////////////// // To build a packet, it makes the following actions: // 1) it get the command arguments from socket descriptor. // 2) it build an UDP packet or a TCP segment, and update socket state. // 3) it build the IP header. // 4) it build the ETH header. /////////////////////////////////////////////////////////////////////////////////////////// // @ socket_xp : [in] extended pointer on client socket. // @ k_buf : [in] local pointer on kernel buffer (2 Kbytes). // @ total_length : [out] total number of bytes written in k_buf. // @ return command status. /////////////////////////////////////////////////////////////////////////////////////////// static socket_cmd_sts_t dev_nic_tx_build_packet( xptr_t socket_xp, uint8_t * k_buf, uint32_t * total_length ) { socket_t * socket_ptr; cxy_t socket_cxy; xptr_t client_xp; // extended pointer on client thread bool_t cmd_valid; // valid user command bool_t r2t_valid; // valid R2T queue request uint32_t cmd_type; // NIC command type uint8_t * tx_buf; // local pointer on socket buffer for payload uint32_t len; // tx_buf length (bytes) uint32_t todo; // number of bytes not yet sent uint32_t socket_type; // socket type (UDP/TCP) uint32_t socket_state; // socket state xptr_t socket_r2tq_xp; // extended pointer on R2T queue uint32_t src_ip_addr; // source IP address uint32_t dst_ip_addr; // destination IP address uint32_t tx_nxt; // next sequence number in TX stream uint32_t nbytes; // number of bytes in UDP/TCP packet payload uint8_t * k_trsp_base; // pointer on UDP/TCP packet in kernel buffer uint32_t trsp_length; // length of TCP/UDP packet uint8_t trsp_protocol; // transport protocol type (UDP/TCP) uint8_t r2t_flags; // flags defined by one R2T queue request // get socket cluster and local pointer socket_cxy = GET_CXY( socket_xp ); socket_ptr = GET_PTR( socket_xp ); #if DEBUG_DEV_NIC_TX || DEBUG_DEV_NIC_ERROR uint32_t cycle = (uint32_t)hal_get_cycles(); uint32_t socket_fdid = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->fdid )); uint32_t socket_pid = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->pid )); #endif // build extended pointer on socket r2t queue socket_r2tq_xp = XPTR( socket_cxy , &socket_ptr->r2tq ); // get cmd_valid & t2t_valid from socket descriptor cmd_valid = (bool_t)hal_remote_l32( XPTR( socket_cxy , &socket_ptr->tx_valid )); r2t_valid = (bool_t)remote_buf_status( XPTR( socket_cxy , &socket_ptr->r2tq )); #if DEBUG_DEV_NIC_TX if( cycle > DEBUG_DEV_NIC_TX ) printk("\n[%s] enter for socket[%x,%d] : cmd_val %d / r2t_val %d / cycle %d\n", __FUNCTION__, socket_pid, socket_fdid, cmd_valid, r2t_valid, cycle ); #endif // 1. get relevant socket infos socket_type = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->type )); socket_state = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->state )); src_ip_addr = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->local_addr )); dst_ip_addr = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->remote_addr )); // compute UDP/TCP packet base in local kernel buffer k_trsp_base = k_buf + ETH_HEAD_LEN + IP_HEAD_LEN; // default value trsp_length = 0; if( cmd_valid ) // handle TX command depending on type { // get command arguments from socket cmd_type = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->tx_cmd )); tx_buf = hal_remote_lpt( XPTR( socket_cxy , &socket_ptr->tx_buf )); len = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->tx_len )); todo = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->tx_todo )); client_xp = hal_remote_l64( XPTR( socket_cxy , &socket_ptr->tx_client )); #if DEBUG_DEV_NIC_TX if( cycle > DEBUG_DEV_NIC_TX ) printk("\n[%s] socket[%x,%d] / %s / command %s \n", __FUNCTION__, socket_pid, socket_fdid, socket_cmd_type_str(cmd_type),socket_state_str(socket_state) ); #endif ////////////////////////////////////////////////////////// // 2. UDP : build UDP packet and update UDP socket state if( socket_type == SOCK_DGRAM ) { trsp_protocol = PROTOCOL_UDP; if( socket_state != UDP_STATE_ESTAB ) { return CMD_STS_BADSTATE; } else if( cmd_type == CMD_TX_SEND ) { // compute payload length nbytes = ( CONFIG_SOCK_PAYLOAD_MAX < todo ) ? CONFIG_SOCK_PAYLOAD_MAX : todo; // move payload from remote socket tx_buf to local kernel buffer hal_remote_memcpy( XPTR( local_cxy , k_trsp_base + UDP_HEAD_LEN ), XPTR( socket_cxy , tx_buf + (len - todo) ), nbytes ); // build UDP header dev_nic_tx_build_udp_header( k_trsp_base, socket_xp, nbytes ); // update "tx_todo" in socket descriptor hal_remote_s32( XPTR(socket_cxy , &socket_ptr->tx_todo), todo - nbytes ); // set UDP packet length trsp_length = UDP_HEAD_LEN + nbytes; #if DEBUG_DEV_NIC_TX if( cycle > DEBUG_DEV_NIC_TX ) printk("\n[%s] socket[%x,%d] UDP packet build / %d bytes\n", __FUNCTION__, socket_pid, socket_fdid, nbytes ); #endif } else // CONNECT, ACCEPT, or CLOSE commands are illegal for UDP { #if DEBUG_DEV_NIC_ERROR printk("\n[ERROR] in %s : bad state %s for socket[%x,%x] / cycle %d\n", __FUNCTION__, socket_state_str(socket_state), socket_pid, socket_fdid, cycle ); #endif return CMD_STS_BADCMD; } } // end UDP /////////////////////////////////////////////////////////// // 2. TCP : build TCP segment and update TCP socket state else if( socket_type == SOCK_STREAM ) { trsp_protocol = PROTOCOL_TCP; // handle R2T request / initialize r2t_flags if( r2t_valid ) { // build extended pointers on r2t queue socket_r2tq_xp = XPTR( socket_cxy , &socket_ptr->r2tq ); // get one request from R2T queue, and update R2T queue socket_get_r2t_request( socket_r2tq_xp , &r2t_flags ); } else { r2t_flags = 0; } //////////////////////////////// if( cmd_type == CMD_TX_CONNECT ) // always executed by a TCP client { if( (socket_state == TCP_STATE_BOUND) || (socket_state == TCP_STATE_LISTEN) ) // send a SYN segment { // initialises socket tx_nxt, and rx_wnd hal_remote_s32(XPTR(socket_cxy , &socket_ptr->tx_nxt), CONFIG_SOCK_ISS_CLIENT ); hal_remote_s32(XPTR(socket_cxy , &socket_ptr->rx_wnd), CONFIG_SOCK_MAX_WINDOW ); // build TCP SYN segment dev_nic_tx_build_tcp_header( k_trsp_base, socket_xp, 0, // length TCP_FLAG_SYN ); // set TCP packet length trsp_length = TCP_HEAD_LEN; // update socket.state hal_remote_s32( XPTR( socket_cxy , &socket_ptr->state ), TCP_STATE_SYN_SENT ); // update socket.tx_nxt hal_remote_s32( XPTR( socket_cxy , &socket_ptr->tx_nxt ), CONFIG_SOCK_ISS_CLIENT + 1 ); #if DEBUG_DEV_NIC_TX if( cycle > DEBUG_DEV_NIC_TX ) printk("\n[%s] socket[%x,%d] %s / CONNECT / TCP SYN build\n", __FUNCTION__, socket_pid, socket_fdid, socket_state_str(socket_state) ); #endif } else // report error for all other socket states { #if DEBUG_DEV_NIC_ERROR printk("\n[ERROR] in %s : bad state %s socket[%x,%x] / cycle %d\n", __FUNCTION__, socket_state_str(socket_state), socket_pid, socket_fdid, cycle ); #endif return CMD_STS_BADSTATE; } } //////////////////////////////////// else if( cmd_type == CMD_TX_ACCEPT ) // always executed by a TCP server { if( socket_state == TCP_STATE_SYN_RCVD ) // send a SYN-ACK segment { // initialize socket tx_nxt, and rx_wnd hal_remote_s32(XPTR(socket_cxy , &socket_ptr->tx_nxt), CONFIG_SOCK_ISS_SERVER ); hal_remote_s32(XPTR(socket_cxy , &socket_ptr->rx_wnd), (1 << CONFIG_SOCK_RX_BUF_ORDER) ); // build TCP ACK-SYN segment dev_nic_tx_build_tcp_header( k_trsp_base, socket_xp, 0, // length TCP_FLAG_SYN | TCP_FLAG_ACK ); // set TCP packet length trsp_length = TCP_HEAD_LEN; // update socket.state hal_remote_s32( XPTR( socket_cxy , &socket_ptr->state ), TCP_STATE_SYN_RCVD ); // update socket.tx_nxt hal_remote_s32( XPTR( socket_cxy , &socket_ptr->tx_nxt ), CONFIG_SOCK_ISS_SERVER + 1 ); #if DEBUG_DEV_NIC_TX if( cycle > DEBUG_DEV_NIC_TX ) printk("\n[%s] socket[%x,%d] %s / ACCEPT / SYN-ACK build\n", __FUNCTION__, socket_pid, socket_fdid, socket_state_str(socket_state) ); #endif } else // report error in all other socket states { #if DEBUG_DEV_NIC_ERROR printk("\n[ERROR] in %s : bad state %s for socket[%x,%x] / cycle %d\n", __FUNCTION__, socket_state_str(socket_state), socket_pid, socket_fdid, cycle ); #endif return CMD_STS_BADSTATE; } } /////////////////////////////////// else if( cmd_type == CMD_TX_CLOSE ) { if( (socket_state == TCP_STATE_SYN_RCVD ) || (socket_state == TCP_STATE_ESTAB ) || (socket_state == TCP_STATE_CLOSE_WAIT) ) // send a FIN-ACK segment { // get "tx_nxt" from socket descriptor tx_nxt = hal_remote_l32( XPTR(socket_cxy , &socket_ptr->tx_nxt )); // compute next state uint32_t state = (socket_state == TCP_STATE_CLOSE_WAIT) ? TCP_STATE_LAST_ACK : TCP_STATE_FIN_WAIT1; // update socket state hal_remote_s32( XPTR( socket_cxy , &socket_ptr->state ), state ); // build TCP FIN segment dev_nic_tx_build_tcp_header( k_trsp_base, socket_xp, 0, // length TCP_FLAG_FIN | TCP_FLAG_ACK ); // update "tx_nxt" in socket descriptor hal_remote_s32( XPTR(socket_cxy , &socket_ptr->tx_nxt), tx_nxt + 1 ); // set TCP packet length trsp_length = TCP_HEAD_LEN; #if DEBUG_DEV_NIC_TX if( cycle > DEBUG_DEV_NIC_TX ) printk("\n[%s] socket[%x,%d] %s / CLOSE / FIN-ACK build\n", __FUNCTION__, socket_pid, socket_fdid, socket_state_str(socket_state) ); #endif } else // all other states => signal error { #if DEBUG_DEV_NIC_ERROR printk("\n[ERROR] in %s : bad state %s for socket[%x,%x] / cycle %d\n", __FUNCTION__, socket_state_str(socket_state), socket_pid, socket_fdid, cycle ); #endif return CMD_STS_BADSTATE; } } ////////////////////////////////// else if( cmd_type == CMD_TX_SEND ) { if( (socket_state == TCP_STATE_ESTAB ) || (socket_state == TCP_STATE_CLOSE_WAIT) ) { // get "tx_nxt" from socket descriptor tx_nxt = hal_remote_l32( XPTR(socket_cxy , &socket_ptr->tx_nxt )); // compute actual payload length nbytes = ( CONFIG_SOCK_PAYLOAD_MAX < todo ) ? CONFIG_SOCK_PAYLOAD_MAX : todo; // move payload from remote tx_buf to local kernel buffer hal_remote_memcpy( XPTR( local_cxy , k_trsp_base + TCP_HEAD_LEN ), XPTR( socket_cxy , tx_buf + (len - todo) ), nbytes ); // build TCP header dev_nic_tx_build_tcp_header( k_trsp_base, socket_xp, nbytes, // payload TCP_FLAG_ACK | r2t_flags ); // flags // update "tx_todo" in socket descriptor hal_remote_s32( XPTR(socket_cxy , &socket_ptr->tx_todo), todo - nbytes ); // update "tx_nxt" in socket descriptor hal_remote_s32( XPTR(socket_cxy , &socket_ptr->tx_nxt), tx_nxt + nbytes ); // set TCP packet length trsp_length = TCP_HEAD_LEN + nbytes; #if DEBUG_DEV_NIC_TX if( cycle > DEBUG_DEV_NIC_TX ) printk("\n[%s] socket[%x,%d] %s / SEND / %d bytes\n", __FUNCTION__, socket_pid, socket_fdid, socket_state_str(socket_state), nbytes ); #endif } else // all other socket states { #if DEBUG_DEV_NIC_ERROR printk("\n[ERROR] in %s : bad state %s for socket[%x,%x] / cycle %d\n", __FUNCTION__, socket_state_str(socket_state), socket_pid, socket_fdid, cycle ); #endif return CMD_STS_BADSTATE; } } /////////////////////////////////// else // undefined TX command type { #if DEBUG_DEV_NIC_ERROR printk("\n[ERROR] in %s : undefined command type for socket[%x,%x] %s / cycle %d\n", __FUNCTION__, socket_pid, socket_fdid, socket_state_str(socket_state), cycle ); #endif return CMD_STS_BADCMD; } } // end TCP } else // no valid TX command => handle R2T request only { assert( __FUNCTION__ , (socket_type == SOCK_STREAM) , "don't use R2T queue for UDP" ); // get one request from R2T queue socket_get_r2t_request( socket_r2tq_xp , &r2t_flags ); #if DEBUG_DEV_NIC_TX cycle = (uint32_t)hal_get_cycles(); if( cycle > DEBUG_DEV_NIC_TX ) printk("\n[%s] socket[%x,%d] %s / send only flags %x / no data\n", __FUNCTION__, socket_pid, socket_fdid, socket_state_str(socket_state), r2t_flags ); #endif // build TCP header dev_nic_tx_build_tcp_header( k_trsp_base, socket_xp, 0, // no payload r2t_flags ); // flags // set protocol trsp_protocol = PROTOCOL_TCP; // set TCP packet length trsp_length = TCP_HEAD_LEN; } // 3. build IP header dev_nic_tx_build_ip_header( k_buf + ETH_HEAD_LEN, src_ip_addr, dst_ip_addr, trsp_protocol, trsp_length ); // 4. build ETH header dev_nic_tx_build_eth_header( k_buf, (uint8_t)DST_MAC_5, (uint8_t)DST_MAC_4, (uint8_t)DST_MAC_3, (uint8_t)DST_MAC_2, (uint8_t)DST_MAC_1, (uint8_t)DST_MAC_0, (uint8_t)SRC_MAC_5, (uint8_t)SRC_MAC_4, (uint8_t)SRC_MAC_3, (uint8_t)SRC_MAC_2, (uint8_t)SRC_MAC_1, (uint8_t)SRC_MAC_0, IP_HEAD_LEN + trsp_length ); #if DEBUG_DEV_NIC_TX cycle = (uint32_t)hal_get_cycles(); if( cycle > DEBUG_DEV_NIC_TX ) printk("\n[%s] exit for socket[%x,%d] / packet build / cycle %d\n", __FUNCTION__, socket_pid, socket_fdid, cycle ); #endif // return success and total packet length *total_length = ETH_HEAD_LEN + IP_HEAD_LEN + trsp_length; return CMD_STS_SUCCESS; } // end dev_nic_tx_build_packet() ///////////////////////////////////////// void dev_nic_tx_server( chdev_t * chdev ) { uint8_t k_buf[CONFIG_SOCK_PKT_BUF_SIZE]; // buffer for one packet xptr_t queue_lock_xp; // extended pointer on lock for sockets list xptr_t root_xp; // extended pointer on sockets list root xptr_t iter_xp; // iterator for loop on sockets list xptr_t list_xp; // extended pointer on socket tx_list field xptr_t socket_xp; // extended pointer on found socket socket_t * socket_ptr; // local pointer on found socket cxy_t socket_cxy; // found socket cluster identifier xptr_t socket_lock_xp; // extented pointer on found socket lock bool_t cmd_valid; // TX command valid in socket descriptor bool_t r2t_valid; // valid R2T request in socket descriptor uint32_t sock_type; // socket type socket_cmd_sts_t cmd_sts; // value returned by dev_nic_tx_build_packet() socket_cmd_type_t tx_cmd; // socket TX command type uint32_t tx_todo; // socket number of bytes not sent yet uint32_t total_length; // length of the ETH/IP/TCP packet (bytes) bool_t found; // one active socket found thread_t * this = CURRENT_THREAD; #if DEBUG_DEV_NIC_TX uint32_t cycle = (uint32_t)hal_get_cycles(); uint32_t pid; uint32_t fdid; if( cycle > DEBUG_DEV_NIC_TX ) printk("\n[%s] thread[%x,%x] starts / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, cycle ); #endif // check chdev direction and type assert( __FUNCTION__, (chdev->func == DEV_FUNC_NIC) && (chdev->is_rx == false) , "illegal chdev type or direction" ); // build extended pointers on client sockets queue lock queue_lock_xp = XPTR( local_cxy , &chdev->wait_lock ); // build extended pointers on client sockets queue root and first item root_xp = XPTR( local_cxy , &chdev->wait_root ); while( 1 ) // TX server infinite loop { // take the lock protecting the client sockets queue remote_busylock_acquire( queue_lock_xp ); found = false; // scan registered sockets to find one active socket // with a round robin priority between the registered sockets if( xlist_is_empty( root_xp ) == false ) { XLIST_FOREACH( root_xp , iter_xp ) { // get client socket cluster and pointers socket_xp = XLIST_ELEMENT( iter_xp , socket_t , tx_list ); socket_ptr = GET_PTR( socket_xp ); socket_cxy = GET_CXY( socket_xp ); // build extended pointer on socket tx_list field list_xp = XPTR( socket_cxy , &socket_ptr->tx_list ); // get cmd_valid & r2t_valid from socket descriptor cmd_valid = (bool_t)hal_remote_l32( XPTR( socket_cxy , &socket_ptr->tx_valid )); // get r2t_valid from socket descriptor r2t_valid = (bool_t)remote_buf_status( XPTR( socket_cxy , &socket_ptr->r2tq )); if( cmd_valid || r2t_valid ) // active => move socket, and exit loop { // move selected socket to last position for round-robin xlist_unlink( list_xp ); xlist_add_last( root_xp , list_xp ); // exit loop found = true; break; } } // end loop on sockets } // release the lock protecting the client sockets queue remote_busylock_release( queue_lock_xp ); if( found == false ) // block & deschedule if no active socket { #if DEBUG_DEV_NIC_TX cycle = (uint32_t)hal_get_cycles(); if( cycle > DEBUG_DEV_NIC_TX ) printk("\n[%s] thread[%x,%x] no active socket => blocks on / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, cycle ); #endif // block and deschedule thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_CLIENT ); sched_yield( "waiting client" ); #if DEBUG_DEV_NIC_TX cycle = (uint32_t)hal_get_cycles(); if( cycle > DEBUG_DEV_NIC_TX ) printk("\n[%s] thread[%x,%x] resumes / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, cycle ); #endif } else // handle active socket request { // avoid warning total_length = 0; // build extended pointer on socket lock socket_lock_xp = XPTR( socket_cxy , &socket_ptr->lock ); // take socket lock remote_queuelock_acquire( socket_lock_xp ); #if DEBUG_DEV_NIC_TX cycle = (uint32_t)hal_get_cycles(); pid = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->pid )); fdid = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->fdid )); #endif #if DEBUG_DEV_NIC_TX if( cycle > DEBUG_DEV_NIC_TX ) printk("\n[%s] thread[%x,%x] select socket[%x,%d] / cmd_val %d / r2t_val %d / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, pid, fdid, cmd_valid, r2t_valid, cycle ); #endif // build one UDP packet / TCP segment cmd_sts = dev_nic_tx_build_packet( socket_xp, k_buf, &total_length ); #if DEBUG_DEV_NIC_TX cycle = (uint32_t)hal_get_cycles(); if( cycle > DEBUG_DEV_NIC_TX ) printk("\n[%s] thread[%x,%x] for socket[%x,%x] build packet / %d bytes / sts %d / cycle %d\n", __FUNCTION__, this->process->pid, this->trdid, pid, fdid, total_length, cmd_sts, cycle ); #endif // release socket lock remote_queuelock_release( socket_lock_xp ); if( cmd_sts == CMD_STS_SUCCESS ) // move packet to TX queue { // move packet to NIC_TX queue dev_nic_tx_move_packet( chdev, k_buf, total_length ); #if DEBUG_DEV_NIC_TX cycle = (uint32_t)hal_get_cycles(); if( cycle > DEBUG_DEV_NIC_TX ) dev_nic_packet_display( pid, fdid, cycle, k_buf ); #endif // get socket.type, socket.tx_cmd and socket.tx_todo values tx_cmd = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->tx_cmd )); tx_todo = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->tx_todo )); sock_type = hal_remote_l32( XPTR( socket_cxy , &socket_ptr->type )); // client signaling depends on command type and socket type if( (tx_cmd == CMD_TX_SEND) && (tx_todo == 0) ) { // reset tx_valid for both UDP and TCP hal_remote_s32( XPTR( socket_cxy , &socket_ptr->tx_valid), false ); // unblock client thread for UDP only if(sock_type == SOCK_DGRAM) dev_nic_unblock_tx_client( socket_xp , CMD_STS_SUCCESS ); } else // type is CONNECT / ACCEPT / CLOSE { // reset tx_valid hal_remote_s32( XPTR( socket_cxy , &socket_ptr->tx_valid), false ); } } else // signal error to client thread { // reset tx_valid hal_remote_s32( XPTR( socket_cxy , &socket_ptr->tx_valid), false ); // unblock tx_client thread dev_nic_unblock_tx_client( socket_xp , cmd_sts ); } } // end active socket handling } // end infinite while loop } // end dev_nic_tx_server() ////////////////////////////////////////////////// void dev_nic_packet_display( pid_t socket_pid, uint32_t socket_fdid, uint32_t cycle, uint8_t * buf ) { // get ETH header fields uint64_t eth_dst_mac = ((uint64_t)buf[5] << 40) | ((uint64_t)buf[4] << 32) | ((uint64_t)buf[3] << 24) | ((uint64_t)buf[2] << 16) | ((uint64_t)buf[1] << 8) | ((uint64_t)buf[0] ) ; uint64_t eth_src_mac = ((uint64_t)buf[11] << 40) | ((uint64_t)buf[10] << 32) | ((uint64_t)buf[9] << 24) | ((uint64_t)buf[8] << 16) | ((uint64_t)buf[7] << 8) | ((uint64_t)buf[6] ) ; uint16_t eth_length = ((uint16_t)buf[12] << 8) | ((uint16_t)buf[13] ) ; // get IP header fields uint8_t ip_version = buf[14]; uint8_t ip_tos = buf[15]; uint16_t ip_length = ((uint16_t)buf[16] << 8) | ((uint16_t)buf[17] ) ; uint16_t ip_ident = ((uint16_t)buf[18] << 8) | ((uint16_t)buf[19] ) ; uint16_t ip_offset = ((uint16_t)buf[20] << 8) | ((uint16_t)buf[21] ) ; uint8_t ip_ttl = buf[22]; uint8_t ip_protocol = buf[23]; uint16_t ip_checksum = ((uint16_t)buf[24] << 8) | ((uint16_t)buf[25] ) ; uint32_t ip_src_addr = ((uint32_t)buf[26] << 24) | ((uint32_t)buf[27] << 16) | ((uint32_t)buf[28] << 8) | ((uint32_t)buf[29] ) ; uint32_t ip_dst_addr = ((uint32_t)buf[30] << 24) | ((uint32_t)buf[31] << 16) | ((uint32_t)buf[32] << 8) | ((uint32_t)buf[33] ) ; // get pointers on TXT0 chdev xptr_t txt0_xp = chdev_dir.txt_tx[0]; cxy_t txt0_cxy = GET_CXY( txt0_xp ); chdev_t * txt0_ptr = GET_PTR( txt0_xp ); // get extended pointer on remote TXT0 chdev lock xptr_t lock_xp = XPTR( txt0_cxy , &txt0_ptr->wait_lock ); // get TXT0 lock remote_busylock_acquire( lock_xp ); nolock_printk("\n***** packet sent by NIC_TX server for socket[%x,%d] / cycle %d\n", socket_pid, socket_fdid, cycle ); nolock_printk(" ETH header\n"); nolock_printk(" - dst_mac [6] = %l\n" , eth_dst_mac ); nolock_printk(" - src_mac [6] = %l\n" , eth_src_mac ); nolock_printk(" - length [2] = %d\n" , (uint32_t)eth_length ); nolock_printk(" IP header\n"); nolock_printk(" - version [1] = %x\n" , (uint32_t)ip_version ); nolock_printk(" - tos [1] = %x\n" , (uint32_t)ip_tos ); nolock_printk(" - length [2] = %d\n" , (uint32_t)ip_length ); nolock_printk(" - ident [2] = %x\n" , (uint32_t)ip_ident ); nolock_printk(" - offset [2] = %x\n" , (uint32_t)ip_offset ); nolock_printk(" - ttl [1] = %x\n" , (uint32_t)ip_ttl ); nolock_printk(" - protocol [1] = %x\n" , (uint32_t)ip_protocol ); nolock_printk(" - checksum [2] = %x\n" , (uint32_t)ip_checksum ); nolock_printk(" - src_addr [4] = %x\n" , (uint32_t)ip_src_addr ); nolock_printk(" - dst_addr [4] = %x\n" , (uint32_t)ip_dst_addr ); // get UDP / TCP fields if ( ip_protocol == PROTOCOL_UDP ) { uint16_t udp_src_port = ((uint16_t)buf[34] << 8) | ((uint16_t)buf[35] ) ; uint16_t udp_dst_port = ((uint16_t)buf[36] << 8) | ((uint16_t)buf[37] ) ; nolock_printk(" UDP header\n"); nolock_printk(" - src_port [2] = %d\n" , (uint32_t)udp_src_port ); nolock_printk(" - dst_port [2] = %d\n" , (uint32_t)udp_dst_port ); } else if( ip_protocol == PROTOCOL_TCP ) { uint16_t tcp_src_port = ((uint16_t)buf[34] << 8) | ((uint16_t)buf[35] ) ; uint16_t tcp_dst_port = ((uint16_t)buf[36] << 8) | ((uint16_t)buf[37] ) ; uint32_t tcp_seq_num = ((uint32_t)buf[38] << 24) | ((uint32_t)buf[39] << 16) | ((uint32_t)buf[40] << 8) | ((uint32_t)buf[41] ) ; uint32_t tcp_ack_num = ((uint32_t)buf[42] << 24) | ((uint32_t)buf[43] << 16) | ((uint32_t)buf[44] << 8) | ((uint32_t)buf[45] ) ; uint8_t tcp_hlen = buf[46]; uint8_t tcp_flags = buf[47]; uint16_t tcp_window = ((uint16_t)buf[48] << 8) | ((uint16_t)buf[49] ) ; uint16_t tcp_checksum = ((uint16_t)buf[50] << 8) | ((uint16_t)buf[51] ) ; uint16_t tcp_urgent = ((uint16_t)buf[52] << 8) | ((uint16_t)buf[53] ) ; nolock_printk(" TCP header\n"); nolock_printk(" - src_port [2] = %x\n" , (uint32_t)tcp_src_port ); nolock_printk(" - dst_port [2] = %x\n" , (uint32_t)tcp_dst_port ); nolock_printk(" - seq_num [4] = %x\n" , (uint32_t)tcp_seq_num ); nolock_printk(" - ack_num [4] = %x\n" , (uint32_t)tcp_ack_num ); nolock_printk(" - hlen [1] = %d\n" , (uint32_t)tcp_hlen ); nolock_printk(" - flags [1] = %x\n" , (uint32_t)tcp_flags ); nolock_printk(" - window [2] = %x\n" , (uint32_t)tcp_window ); nolock_printk(" - checksum [2] = %x\n" , (uint32_t)tcp_checksum ); nolock_printk(" - urgent [2] = %x\n" , (uint32_t)tcp_urgent ); } else { nolock_printk("!!!!! undefined transport protocol !!!!!\n"); } // release TXT0 lock remote_busylock_release( lock_xp ); } // end dev_nic_packet_display()