You are on page 1of 23

TCP (Transport Control Protocol)

chapter 11 TCP: Data structures and input processing


provide reliable, flow controlled, end-to-end, stream service between two machines of arbitrary processing speed using the unreliable IP mechanism for communication handle delayed, duplicated, lost, out of order, data corrupted Three processes
input process output process timer management

each active connection maintains one TCB


SA/DA IP addresses and SA/DA port numbers round-trip time estimate data sent or received sequence ACK/Retransmission

Software Architecture
Queue (XINU port)

AP process AP process

tcpinp() tcpinp() process process


tcps_iport

tcpout() tcpout() process process

tcptimer() tcptimer() process process


Event Signal

Operating System IPC facilities


ip_iport

tcp_in() tcp_in()
arp_in() ip_in() arp_in() ip_in()

tcpsend() tcpsend() ipsend() ipsend()


udp_in() udp_in() icmp_in() icmp_in()

Timer Timer slowtimer() slowtimer() process process ipftimer ipftimer

ipproc() ipproc() process process netwrite() netwrite() write() local_out() write() local_out()

NIC Driver NIC Driver ISR ISR

arptimer arptimer

rttimer rttimer

TCB data structure


struct tcb { short tcb_state; /* TCP state short tcb_ostate; /* output state short tcb_type; /* TCP type (SERVER, CLIENT) int tcb_mutex; /* tcb mutual exclusion short tcb_code; /* TCP code for next packet short tcb_flags; /* various TCB state flags short tcb_error; /* return error for user side IPaddr tcb_rip; u_short IPaddr tcb_lip; u_short struct netif /* remote IP address tcb_rport; /* remote TCP port /* local IP address tcb_lport; /* local TCP port *tcb_pni; /* pointer to our interface */ */ */ */ */ */ */ */ */ */ */ */ */ */ */ */ */ */ */ */ */

tcpseq tcb_suna; /* send unacked tcpseq tcb_snext; /* send next tcpseq tcb_slast; /* sequence of FIN, if TCBF_SNDFIN u_longtcb_swindow; /* send window size (octets) tcpseq tcb_lwseq; /* sequence of last window update tcpseq tcb_lwack; /* ack seq of last window update */ u_int tcb_cwnd; /* congestion window size (octets) u_int tcb_ssthresh; /* slow start threshold (octets) u_int tcb_smss; /* send max segment size (octets) tcpseq tcb_iss; /* initial send sequence

TCB data structure


int int int int int int tcb_srt; /* smoothed Round Trip Time tcb_rtde;/* Round Trip deviation estimator */ tcb_persist; /* persist timeout value tcb_keep; /* keepalive timeout value tcb_rexmt; /* retransmit timeout value tcb_rexmtcount; /* number of rexmts sent tcb_rnext; tcb_rupseq; tcb_supseq; /* receive next /* receive urgent pointer /* send urgent pointer */ */ */ */ */ */ */ */ */ */ */ */

tcpseq tcpseq tcpseq

int tcb_lqsize; int tcb_listenq; struct tcb *tcb_pptcb; int tcb_ocsem; int tcb_dvnum;

/* listen queue size (SERVERs) /* listen queue port (SERVERs) /* pointer to parent TCB (for ACCEPT) /* open/close semaphore */ /* TCP slave pseudo device number

TCB data structure


int tcb_ssema;/* send semaphore u_char*tcb_sndbuf; /* send buffer u_int tcb_sbstart; /* start of valid data u_int tcb_sbcount; /* data character count u_int tcb_sbsize;/* send buffer size (bytes) int tcb_rsema;/* receive semaphore u_char*tcb_rcvbuf; /* receive buffer (circular) u_int tcb_rbstart; /* start of valid data u_int tcb_rbcount; /* data character count u_int tcb_rbsize;/* receive buffer size (bytes) u_int tcb_rmss; /* receive max segment size tcpseq tcb_cwin; /* seq of currently advertised window int tcb_rsegq; /* segment fragment queue tcpseq tcb_finseq;/* FIN sequence number, or 0 tcpseq tcb_pushseq; /* PUSH sequence number, or 0 }; /* TCP fragment structure */ struct tcpfrag { tcpseq tf_seq; int tf_len; }; */ */ */ */ */ */ */ */ */ */ */ */ */ */ */

Packet Format

15 16 31 source port destination port sequence number acknowledgement number

U A P R S F header reser R C S S Y I len G K H T N N

window size urgent pointer

checksum

TCP header data structure


/* tcp packet format */ struct tcp { unsigned short unsigned short tcpseq tcpseq unsigned char unsigned char unsigned short unsigned short unsigned short unsigned char }; /* TCP Control Bits */ #define #define #define #define #define #define #define #define #define TCPF_URG 0x20 TCPF_ACK 0x10 TCPF_PSH 0x08 TCPF_RST 0x04 TCPF_SYN 0x02 TCPF_FIN 0x01 TCPMHLEN TCPHOFFSET TCP_HLEN(ptcp) /* urgent pointer is valid */ /* acknowledgement field is valid */ /* this segment requests a push /* reset the connection /* synchronize sequence numbers /* sender has reached end of its stream 20 0x50

tcp_sport; /* source port tcp_dport; /* destination port */ tcp_seq; /* sequence */ tcp_ack; /* acknowledged sequence */ tcp_offset; tcp_code; /* control flags */ tcp_window; /* window advertisement tcp_cksum; /* check sum tcp_urgptr; /* urgent pointer */ tcp_data[1];

*/

*/ */

*/ */ */ */ */

/* minimum TCP header length /* tcp_offset value for TCPMHLEN */ (((ptcp)->tcp_offset & 0xf0)>>2)

/* tcp_in.c - tcp_in */ /*-----------------------------------------------------------------------* tcp_in - deliver an inbound TCP packet to the TCP process *-----------------------------------------------------------------------*/ int tcp_in(struct netif *pni, struct ep *pep) { /* drop instead of blocking on psend */ TcpInSegs++; if (pcount(tcps_iport) >= TCPQLEN) { freebuf(pep); return SYSERR; } psend(tcps_iport, (int)pep); return OK; } ISA/PCI
Network interface rx Ether-frame

AP process AP process
socket interface

tcpinp() tcpinp() process process IPC


pep tcps-iport

OS
tcp_in() tcp_in() ip_in() ip_in()

Data Structures
Receive buffer (circular)

tcbtab[Ntcp]
tcb_rcvbuf tcb_sndbuf tcb_pni Send buffer

nif[Net.nif]

Network interface Table

TCP Control Block

Receiving
ptcb->tcb_rcvbuf ptcb->tcb_rbstart ptcb->tcb_rnext ptcb->tcb_cwin

ptcb->tcb_rbcount ptcb->tcb_rbsize

ptcb->tcb_rmss

Sending
ptcb->tcb_sndbuf ptcb->tcb_sbstart ptcb->tcb_snext

ptcb->tcb_sbcount ptcb->tcb_suna

ptcb->tcb_cwnd ptcb->tcb_swindow

ptcb->tcb_sbsize

ptcb->tcb_iss Initial send sequence

ptcb->tcb_smss Send max segment size (octets)

Input Processing (tcpinp)


XINU port (tcps_iport). tcps_iport . RESET tcpreset() TCB ACK tcppackit() tcpopts() tcpswitch() tcpchsum() tcpnet2h() tcpdemux() ? TCB tcpok() ? Segment Rx-windows TCP TCP, , packet, next state Checksum TCB

/*-----------------------------------------------------------------------* tcpinp - handle TCP segment coming in from IP *------------------------------------------------------------------------ */ PROCESS tcpinp(void) { struct ep *pep; struct ip *pip; struct tcp *ptcp; struct tcb *ptcb; tcps_iport = pcreate(TCPQLEN); signal(Net.sema); while (TRUE) { pep = (struct ep *)preceive(tcps_iport); if ((int)pep == SYSERR) break; pip = (struct ip *)pep->ep_data; if (tcpcksum(pep, pip->ip_len - IP_HLEN(pip))) { ++TcpInErrs; freebuf(pep); continue; } ptcp = (struct tcp *)pip->ip_data; tcpnet2h(ptcp); /* convert all fields to host order */ pep->ep_order |= EPO_TCP;

ptcb = tcpdemux(pep); if (ptcb == 0) { ++TcpInErrs; tcpreset(pep); freebuf(pep); continue; } if (!tcpok(ptcb, pep)) tcpackit(ptcb, pep); else { tcpopts(ptcb, pep); tcpswitch[ptcb->tcb_state](ptcb, pep); } if (ptcb->tcb_state != TCPS_FREE) signal(ptcb->tcb_mutex); freebuf(pep); } }

unsigned short tcpcksum(struct ep *pep, unsigned len) { struct ip *pip = (struct ip *)pep->ep_data; struct tcp *ptcp = (struct tcp *)pip->ip_data; unsigned short *sptr; unsigned long tcksum; unsigned i; tcksum = 0; sptr = (unsigned short *) &pip->ip_src; 0 31 for (i=0; i<IP_ALEN; ++i) tcksum += *sptr++; SA IP sptr = (unsigned short *)ptcp; DA IP tcksum += hs2net(IPT_TCP + len); zero p-type TCP Len if (len % 2) { ((char *)ptcp)[len] = 0; /* pad */ len += 1; /* for the following division */ } len >>= 1; /* convert to length in shorts */ for (i=0; i<len; ++i) tcksum += *sptr++; tcksum = (tcksum >> 16) + (tcksum & 0xffff); tcksum += (tcksum >> 16); return (short)(~tcksum & 0xffff); }

tcpdemux()
(tcbn == Ntcp) TCB? (tcbn >= 0) SYN? tcbn = lstcbn tcbtab TCB? NULL TCB tcbn = -1

pepEthernet tcbtab tcbtab ? (tcbn < Ntcp) / LISTEN TCB (lstcbn) tcbn++

TCB

TCB

struct tcb *tcpdemux(struct ep *pep) { struct ip *pip = (struct ip *)pep->ep_data; struct tcp *ptcp = (struct tcp *)pip->ip_data; struct tcb *ptcb; if client connection int tcbn, lstcbn; SA/DA portIP wait(tcps_tmutex); if server side for (tcbn=0, lstcbn = -1; tcbn<Ntcp; ++tcbn) { DAportIP if (tcbtab[tcbn].tcb_state == TCPS_FREE) continue; if (ptcp->tcp_dport == tcbtab[tcbn].tcb_lport && ptcp->tcp_sport == tcbtab[tcbn].tcb_rport && pip->ip_src == tcbtab[tcbn].tcb_rip && pip->ip_dst == tcbtab[tcbn].tcb_lip) { break; } if (tcbtab[tcbn].tcb_state == TCPS_LISTEN && // SERVER Side ptcp->tcp_dport == tcbtab[tcbn].tcb_lport) lstcbn = tcbn; } if (tcbn >= Ntcp) if (ptcp->tcp_code & TCPF_SYN) //clientserver tcbn = lstcbn; //, serveractive else tcbn = -1; signal(tcps_tmutex); if (tcbn < 0) return 0; wait(tcbtab[tcbn].tcb_mutex); if (tcbtab[tcbn].tcb_state == TCPS_FREE) return 0; /* OOPS! Lost it... */ return &tcbtab[tcbn]; }

tcpok() : packetreceive window


1. syn or fin => , seglen ++ 2. segmentwindow , , seglen = 0 => return ok 3. segmentwindow , , window= 0, data, return ok
ptcb->tcb_rnext wlast

ptcb->tcb_rbcount ptcb->tcb_rbsize

rwindow

seqlen ptcp->tcp_seq slast

wlast, slast, ptcb->tcb_rnext : sequence #

Bool tcpok(struct tcb *ptcb, struct ep *pep) { struct ip *pip = (struct ip *)pep->ep_data; struct tcp *ptcp = (struct tcp *) pip->ip_data; int seglen, rwindow; tcpseq wlast, slast, sup; Bool rv; if (ptcb->tcb_state < TCPS_SYNRCVD) // return TRUE; seglen = pip->ip->len - IP-HLEN(pip) - TCP-HLEN(ptcp); /* add SYN and FIN, SYNFINSequence number */ if (ptcp->tcp_code & TCPF_SYN) ++seglen; if (ptcp->tcp_code & TCPF_FIN) ++seglen; rwindow = ptcb->tcb_rbsize - ptcb->tcb_rbcount; if (rwindow == 0 && seglen == 0) //windwos = 0 packet length = 0 return ptcp->tcp-seq == ptcb->tcb_rnext; wlast = ptcb->tcb-rnext + rwindow - 1; rv = (ptcp->tcp-seq - ptcb->tcb-rnext) >= 0 //segmentwindow && (ptcp->tcp-seq - wlast) <= 0; if (seglen == 0) return rv; slast = ptcp->tcp_seq + seglen - 1; rv |= (slast - ptcb->tcb_rnext) >= 0 && (slast - wlast) <= 0; //segmentwindow /* If no window, strip data but keep ACK, RST and URG, */ if (rwindow == 0) pip->ip_len = IP_HLEN(pip) + TCP_HLEN(ptcp); return rv;

(*tcpswitch[...])()
TCPS_FREE TCPS_CLOSED TCPS_LISTEN TCPS_SYNSENT TCPS_SYNRCVD TCPS_ESTABLISHED TCPS_FINWAIT1 TCPS_FINWAIT2 TCPS_CLOSEWAIT TCPS_LASTACK TCPS_CLOSING TCPS_TIMEWAIT

ioerr() tcpclose() tcplisten() tcpsynsent() tcpsynrcvd() tcpestablished() tcpfin1() tcpfin2() tcpclosewait() tcplastack() tcpclosing() tcptimewait()

*tcperror[] = { "no error", "connection reset", /* TCPE_RESET */ "connection refused", /* TCPE_REFUSED */ "not enough buffer space", /* TCPE_TOOBIG */ "connection timed out", /* TCPE_TIMEDOUT */ "urgent data pending", /* TCPE_URGENTMODE */ "end of urgent data", /* TCPE_NORMALMODE */ }; /* SEGMENT ARRIVES state processing */ typedef int (tcpin_t)(struct tcb *, struct ep *); tcpin_t tcpclosed, tcplisten, tcpsynsent, tcpsynrcvd, tcpestablished, tcpfin1, tcpfin2, tcpclosewait, tcpclosing, tcplastack, tcptimewait; tcpin_t *tcpswitch[NTCPSTATES] = { 0, /* TCPS_FREE tcpclosed, /* TCPS_CLOSED */ tcplisten, /* TCPS_LISTEN */ tcpsynsent, /* TCPS_SYNSENT */ tcpsynrcvd, /* TCPS_SYNRCVD */ tcpestablished, /* TCPS_ESTABLISHED tcpfin1, /* TCPS_FINWAIT1 */ tcpfin2, /* TCPS_FINWAIT2 */ tcpclosewait, /* TCPS_CLOSEWAIT */ tcplastack, /* TCPS_LASTACK */ tcpclosing, /* TCPS_CLOSING */ tcptimewait, /* TCPS_TIMEWAIT */ };

char

*/

*/

/* Output event processing */ typedef int (tcpout_t)(int tcbnum, int event); tcpout_t tcpidle, tcppersist, tcpxmit, tcprexmt; tcpout_t *tcposwitch[NTCPOSTATES] = { tcpidle, /* TCPO_IDLE tcppersist, /* TCPO_PERSIST tcpxmit, /* TCPO_XMIT tcprexmt, /* TCPO_REXMT };

*/ */ */ */

You might also like