1 /* 2 * Copyright (C) 1995-2003 by Darren Reed. 3 * 4 * See the IPFILTER.LICENCE file for details on licencing. 5 * 6 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 7 */ 8 9 #if defined(KERNEL) || defined(_KERNEL) 10 # undef KERNEL 11 # undef _KERNEL 12 # define KERNEL 1 13 # define _KERNEL 1 14 #endif 15 #include <sys/errno.h> 16 #include <sys/types.h> 17 #include <sys/param.h> 18 #include <sys/file.h> 19 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \ 20 defined(_KERNEL) 21 # include "opt_ipfilter_log.h" 22 #endif 23 #if defined(_KERNEL) && defined(__FreeBSD_version) && \ 24 (__FreeBSD_version >= 400000) && !defined(KLD_MODULE) 25 #include "opt_inet6.h" 26 #endif 27 #if !defined(_KERNEL) && !defined(__KERNEL__) 28 # include <stdio.h> 29 # include <stdlib.h> 30 # include <string.h> 31 # define _KERNEL 32 # ifdef __OpenBSD__ 33 struct file; 34 # endif 35 # include <sys/uio.h> 36 # undef _KERNEL 37 #endif 38 #if defined(_KERNEL) && (__FreeBSD_version >= 220000) 39 # include <sys/filio.h> 40 # include <sys/fcntl.h> 41 # if (__FreeBSD_version >= 300000) && !defined(IPFILTER_LKM) 42 # include "opt_ipfilter.h" 43 # endif 44 #else 45 # include <sys/ioctl.h> 46 #endif 47 #include <sys/time.h> 48 #if !defined(linux) 49 # include <sys/protosw.h> 50 #endif 51 #include <sys/socket.h> 52 #if defined(_KERNEL) 53 # include <sys/systm.h> 54 # if !defined(__SVR4) && !defined(__svr4__) 55 # include <sys/mbuf.h> 56 # endif 57 #endif 58 #if defined(__SVR4) || defined(__svr4__) 59 # include <sys/filio.h> 60 # include <sys/byteorder.h> 61 # ifdef _KERNEL 62 # include <sys/dditypes.h> 63 # endif 64 # include <sys/stream.h> 65 # include <sys/kmem.h> 66 #endif 67 68 #include <net/if.h> 69 #ifdef sun 70 # include <net/af.h> 71 #endif 72 #include <net/route.h> 73 #include <netinet/in.h> 74 #include <netinet/in_systm.h> 75 #include <netinet/ip.h> 76 #include <netinet/tcp.h> 77 #if !defined(linux) 78 # include <netinet/ip_var.h> 79 #endif 80 #if !defined(__hpux) && !defined(linux) 81 # include <netinet/tcp_fsm.h> 82 #endif 83 #include <netinet/udp.h> 84 #include <netinet/ip_icmp.h> 85 #include "netinet/ip_compat.h" 86 #include <netinet/tcpip.h> 87 #include "netinet/ip_fil.h" 88 #include "netinet/ip_nat.h" 89 #include "netinet/ip_frag.h" 90 #include "netinet/ip_state.h" 91 #include "netinet/ip_proxy.h" 92 #include "netinet/ipf_stack.h" 93 #ifdef IPFILTER_SYNC 94 #include "netinet/ip_sync.h" 95 #endif 96 #ifdef IPFILTER_SCAN 97 #include "netinet/ip_scan.h" 98 #endif 99 #ifdef USE_INET6 100 #include <netinet/icmp6.h> 101 #endif 102 #if (__FreeBSD_version >= 300000) 103 # include <sys/malloc.h> 104 # if defined(_KERNEL) && !defined(IPFILTER_LKM) 105 # include <sys/libkern.h> 106 # include <sys/systm.h> 107 # endif 108 #endif 109 /* END OF INCLUDES */ 110 111 112 #if !defined(lint) 113 static const char sccsid[] = "@(#)ip_state.c 1.8 6/5/96 (C) 1993-2000 Darren Reed"; 114 static const char rcsid[] = "@(#)$Id: ip_state.c,v 2.186.2.36 2005/08/11 19:58:03 darrenr Exp $"; 115 #endif 116 117 #ifdef USE_INET6 118 static ipstate_t *fr_checkicmp6matchingstate __P((fr_info_t *)); 119 #endif 120 static ipstate_t *fr_matchsrcdst __P((fr_info_t *, ipstate_t *, i6addr_t *, 121 i6addr_t *, tcphdr_t *, u_32_t)); 122 static ipstate_t *fr_checkicmpmatchingstate __P((fr_info_t *)); 123 static int fr_state_flush __P((int, int, ipf_stack_t *)); 124 static ips_stat_t *fr_statetstats __P((ipf_stack_t *)); 125 static int fr_state_remove __P((caddr_t, ipf_stack_t *)); 126 static void fr_ipsmove __P((ipstate_t *, u_int, ipf_stack_t *)); 127 static int fr_tcpstate __P((fr_info_t *, tcphdr_t *, ipstate_t *)); 128 static int fr_tcpoptions __P((fr_info_t *, tcphdr_t *, tcpdata_t *)); 129 static ipstate_t *fr_stclone __P((fr_info_t *, tcphdr_t *, ipstate_t *)); 130 static void fr_fixinisn __P((fr_info_t *, ipstate_t *)); 131 static void fr_fixoutisn __P((fr_info_t *, ipstate_t *)); 132 static void fr_checknewisn __P((fr_info_t *, ipstate_t *)); 133 static int fr_stateiter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); 134 135 int fr_stputent __P((caddr_t, ipf_stack_t *)); 136 int fr_stgetent __P((caddr_t, ipf_stack_t *)); 137 138 #define ONE_DAY IPF_TTLVAL(1 * 86400) /* 1 day */ 139 #define FIVE_DAYS (5 * ONE_DAY) 140 #define DOUBLE_HASH(x, ifs) \ 141 (((x) + ifs->ifs_ips_seed[(x) % ifs->ifs_fr_statesize]) % ifs->ifs_fr_statesize) 142 143 144 /* ------------------------------------------------------------------------ */ 145 /* Function: fr_stateinit */ 146 /* Returns: int - 0 == success, -1 == failure */ 147 /* Parameters: ifs - ipf stack instance */ 148 /* */ 149 /* Initialise all the global variables used within the state code. */ 150 /* This action also includes initiailising locks. */ 151 /* ------------------------------------------------------------------------ */ 152 int fr_stateinit(ifs) 153 ipf_stack_t *ifs; 154 { 155 #if defined(NEED_LOCAL_RAND) || !defined(_KERNEL) 156 struct timeval tv; 157 #endif 158 int i; 159 160 KMALLOCS(ifs->ifs_ips_table, ipstate_t **, 161 ifs->ifs_fr_statesize * sizeof(ipstate_t *)); 162 if (ifs->ifs_ips_table == NULL) 163 return -1; 164 bzero((char *)ifs->ifs_ips_table, 165 ifs->ifs_fr_statesize * sizeof(ipstate_t *)); 166 167 KMALLOCS(ifs->ifs_ips_seed, u_long *, 168 ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_seed)); 169 if (ifs->ifs_ips_seed == NULL) 170 return -2; 171 #if defined(NEED_LOCAL_RAND) || !defined(_KERNEL) 172 tv.tv_sec = 0; 173 GETKTIME(&tv); 174 #endif 175 for (i = 0; i < ifs->ifs_fr_statesize; i++) { 176 /* 177 * XXX - ips_seed[X] should be a random number of sorts. 178 */ 179 #if !defined(NEED_LOCAL_RAND) && defined(_KERNEL) 180 ifs->ifs_ips_seed[i] = ipf_random(); 181 #else 182 ifs->ifs_ips_seed[i] = ((u_long)ifs->ifs_ips_seed + i) * 183 ifs->ifs_fr_statesize; 184 ifs->ifs_ips_seed[i] += tv.tv_sec; 185 ifs->ifs_ips_seed[i] *= (u_long)ifs->ifs_ips_seed; 186 ifs->ifs_ips_seed[i] ^= 0x5a5aa5a5; 187 ifs->ifs_ips_seed[i] *= ifs->ifs_fr_statemax; 188 #endif 189 } 190 191 /* fill icmp reply type table */ 192 for (i = 0; i <= ICMP_MAXTYPE; i++) 193 icmpreplytype4[i] = -1; 194 icmpreplytype4[ICMP_ECHO] = ICMP_ECHOREPLY; 195 icmpreplytype4[ICMP_TSTAMP] = ICMP_TSTAMPREPLY; 196 icmpreplytype4[ICMP_IREQ] = ICMP_IREQREPLY; 197 icmpreplytype4[ICMP_MASKREQ] = ICMP_MASKREPLY; 198 #ifdef USE_INET6 199 /* fill icmp reply type table */ 200 for (i = 0; i <= ICMP6_MAXTYPE; i++) 201 icmpreplytype6[i] = -1; 202 icmpreplytype6[ICMP6_ECHO_REQUEST] = ICMP6_ECHO_REPLY; 203 icmpreplytype6[ICMP6_MEMBERSHIP_QUERY] = ICMP6_MEMBERSHIP_REPORT; 204 icmpreplytype6[ICMP6_NI_QUERY] = ICMP6_NI_REPLY; 205 icmpreplytype6[ND_ROUTER_SOLICIT] = ND_ROUTER_ADVERT; 206 icmpreplytype6[ND_NEIGHBOR_SOLICIT] = ND_NEIGHBOR_ADVERT; 207 #endif 208 209 KMALLOCS(ifs->ifs_ips_stats.iss_bucketlen, u_long *, 210 ifs->ifs_fr_statesize * sizeof(u_long)); 211 if (ifs->ifs_ips_stats.iss_bucketlen == NULL) 212 return -1; 213 bzero((char *)ifs->ifs_ips_stats.iss_bucketlen, 214 ifs->ifs_fr_statesize * sizeof(u_long)); 215 216 if (ifs->ifs_fr_state_maxbucket == 0) { 217 for (i = ifs->ifs_fr_statesize; i > 0; i >>= 1) 218 ifs->ifs_fr_state_maxbucket++; 219 ifs->ifs_fr_state_maxbucket *= 2; 220 } 221 222 fr_sttab_init(ifs->ifs_ips_tqtqb, ifs); 223 ifs->ifs_ips_tqtqb[IPF_TCP_NSTATES - 1].ifq_next = &ifs->ifs_ips_udptq; 224 ifs->ifs_ips_udptq.ifq_ttl = (u_long)ifs->ifs_fr_udptimeout; 225 ifs->ifs_ips_udptq.ifq_ref = 1; 226 ifs->ifs_ips_udptq.ifq_head = NULL; 227 ifs->ifs_ips_udptq.ifq_tail = &ifs->ifs_ips_udptq.ifq_head; 228 MUTEX_INIT(&ifs->ifs_ips_udptq.ifq_lock, "ipftq udp tab"); 229 ifs->ifs_ips_udptq.ifq_next = &ifs->ifs_ips_udpacktq; 230 ifs->ifs_ips_udpacktq.ifq_ttl = (u_long)ifs->ifs_fr_udpacktimeout; 231 ifs->ifs_ips_udpacktq.ifq_ref = 1; 232 ifs->ifs_ips_udpacktq.ifq_head = NULL; 233 ifs->ifs_ips_udpacktq.ifq_tail = &ifs->ifs_ips_udpacktq.ifq_head; 234 MUTEX_INIT(&ifs->ifs_ips_udpacktq.ifq_lock, "ipftq udpack tab"); 235 ifs->ifs_ips_udpacktq.ifq_next = &ifs->ifs_ips_icmptq; 236 ifs->ifs_ips_icmptq.ifq_ttl = (u_long)ifs->ifs_fr_icmptimeout; 237 ifs->ifs_ips_icmptq.ifq_ref = 1; 238 ifs->ifs_ips_icmptq.ifq_head = NULL; 239 ifs->ifs_ips_icmptq.ifq_tail = &ifs->ifs_ips_icmptq.ifq_head; 240 MUTEX_INIT(&ifs->ifs_ips_icmptq.ifq_lock, "ipftq icmp tab"); 241 ifs->ifs_ips_icmptq.ifq_next = &ifs->ifs_ips_icmpacktq; 242 ifs->ifs_ips_icmpacktq.ifq_ttl = (u_long)ifs->ifs_fr_icmpacktimeout; 243 ifs->ifs_ips_icmpacktq.ifq_ref = 1; 244 ifs->ifs_ips_icmpacktq.ifq_head = NULL; 245 ifs->ifs_ips_icmpacktq.ifq_tail = &ifs->ifs_ips_icmpacktq.ifq_head; 246 MUTEX_INIT(&ifs->ifs_ips_icmpacktq.ifq_lock, "ipftq icmpack tab"); 247 ifs->ifs_ips_icmpacktq.ifq_next = &ifs->ifs_ips_iptq; 248 ifs->ifs_ips_iptq.ifq_ttl = (u_long)ifs->ifs_fr_iptimeout; 249 ifs->ifs_ips_iptq.ifq_ref = 1; 250 ifs->ifs_ips_iptq.ifq_head = NULL; 251 ifs->ifs_ips_iptq.ifq_tail = &ifs->ifs_ips_iptq.ifq_head; 252 MUTEX_INIT(&ifs->ifs_ips_iptq.ifq_lock, "ipftq ip tab"); 253 ifs->ifs_ips_iptq.ifq_next = &ifs->ifs_ips_deletetq; 254 /* entry's ttl in deletetq is just 1 tick */ 255 ifs->ifs_ips_deletetq.ifq_ttl = (u_long) 1; 256 ifs->ifs_ips_deletetq.ifq_ref = 1; 257 ifs->ifs_ips_deletetq.ifq_head = NULL; 258 ifs->ifs_ips_deletetq.ifq_tail = &ifs->ifs_ips_deletetq.ifq_head; 259 MUTEX_INIT(&ifs->ifs_ips_deletetq.ifq_lock, "state delete queue"); 260 ifs->ifs_ips_deletetq.ifq_next = NULL; 261 262 RWLOCK_INIT(&ifs->ifs_ipf_state, "ipf IP state rwlock"); 263 MUTEX_INIT(&ifs->ifs_ipf_stinsert, "ipf state insert mutex"); 264 ifs->ifs_fr_state_init = 1; 265 266 ifs->ifs_ips_last_force_flush = ifs->ifs_fr_ticks; 267 return 0; 268 } 269 270 271 /* ------------------------------------------------------------------------ */ 272 /* Function: fr_stateunload */ 273 /* Returns: Nil */ 274 /* Parameters: ifs - ipf stack instance */ 275 /* */ 276 /* Release and destroy any resources acquired or initialised so that */ 277 /* IPFilter can be unloaded or re-initialised. */ 278 /* ------------------------------------------------------------------------ */ 279 void fr_stateunload(ifs) 280 ipf_stack_t *ifs; 281 { 282 ipftq_t *ifq, *ifqnext; 283 ipstate_t *is; 284 285 while ((is = ifs->ifs_ips_list) != NULL) 286 (void) fr_delstate(is, 0, ifs); 287 288 /* 289 * Proxy timeout queues are not cleaned here because although they 290 * exist on the state list, appr_unload is called after fr_stateunload 291 * and the proxies actually are responsible for them being created. 292 * Should the proxy timeouts have their own list? There's no real 293 * justification as this is the only complicationA 294 */ 295 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifqnext) { 296 ifqnext = ifq->ifq_next; 297 if (((ifq->ifq_flags & IFQF_PROXY) == 0) && 298 (fr_deletetimeoutqueue(ifq) == 0)) 299 fr_freetimeoutqueue(ifq, ifs); 300 } 301 302 ifs->ifs_ips_stats.iss_inuse = 0; 303 ifs->ifs_ips_num = 0; 304 305 if (ifs->ifs_fr_state_init == 1) { 306 fr_sttab_destroy(ifs->ifs_ips_tqtqb); 307 MUTEX_DESTROY(&ifs->ifs_ips_udptq.ifq_lock); 308 MUTEX_DESTROY(&ifs->ifs_ips_icmptq.ifq_lock); 309 MUTEX_DESTROY(&ifs->ifs_ips_udpacktq.ifq_lock); 310 MUTEX_DESTROY(&ifs->ifs_ips_icmpacktq.ifq_lock); 311 MUTEX_DESTROY(&ifs->ifs_ips_iptq.ifq_lock); 312 MUTEX_DESTROY(&ifs->ifs_ips_deletetq.ifq_lock); 313 } 314 315 if (ifs->ifs_ips_table != NULL) { 316 KFREES(ifs->ifs_ips_table, 317 ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_table)); 318 ifs->ifs_ips_table = NULL; 319 } 320 321 if (ifs->ifs_ips_seed != NULL) { 322 KFREES(ifs->ifs_ips_seed, 323 ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_seed)); 324 ifs->ifs_ips_seed = NULL; 325 } 326 327 if (ifs->ifs_ips_stats.iss_bucketlen != NULL) { 328 KFREES(ifs->ifs_ips_stats.iss_bucketlen, 329 ifs->ifs_fr_statesize * sizeof(u_long)); 330 ifs->ifs_ips_stats.iss_bucketlen = NULL; 331 } 332 333 if (ifs->ifs_fr_state_maxbucket_reset == 1) 334 ifs->ifs_fr_state_maxbucket = 0; 335 336 if (ifs->ifs_fr_state_init == 1) { 337 ifs->ifs_fr_state_init = 0; 338 RW_DESTROY(&ifs->ifs_ipf_state); 339 MUTEX_DESTROY(&ifs->ifs_ipf_stinsert); 340 } 341 } 342 343 344 /* ------------------------------------------------------------------------ */ 345 /* Function: fr_statetstats */ 346 /* Returns: ips_state_t* - pointer to state stats structure */ 347 /* Parameters: Nil */ 348 /* */ 349 /* Put all the current numbers and pointers into a single struct and return */ 350 /* a pointer to it. */ 351 /* ------------------------------------------------------------------------ */ 352 static ips_stat_t *fr_statetstats(ifs) 353 ipf_stack_t *ifs; 354 { 355 ifs->ifs_ips_stats.iss_active = ifs->ifs_ips_num; 356 ifs->ifs_ips_stats.iss_statesize = ifs->ifs_fr_statesize; 357 ifs->ifs_ips_stats.iss_statemax = ifs->ifs_fr_statemax; 358 ifs->ifs_ips_stats.iss_table = ifs->ifs_ips_table; 359 ifs->ifs_ips_stats.iss_list = ifs->ifs_ips_list; 360 ifs->ifs_ips_stats.iss_ticks = ifs->ifs_fr_ticks; 361 return &ifs->ifs_ips_stats; 362 } 363 364 /* ------------------------------------------------------------------------ */ 365 /* Function: fr_state_remove */ 366 /* Returns: int - 0 == success, != 0 == failure */ 367 /* Parameters: data(I) - pointer to state structure to delete from table */ 368 /* ifs - ipf stack instance */ 369 /* */ 370 /* Search for a state structure that matches the one passed, according to */ 371 /* the IP addresses and other protocol specific information. */ 372 /* ------------------------------------------------------------------------ */ 373 static int fr_state_remove(data, ifs) 374 caddr_t data; 375 ipf_stack_t *ifs; 376 { 377 ipstate_t *sp, st; 378 int error; 379 380 sp = &st; 381 error = fr_inobj(data, &st, IPFOBJ_IPSTATE); 382 if (error) 383 return EFAULT; 384 385 WRITE_ENTER(&ifs->ifs_ipf_state); 386 for (sp = ifs->ifs_ips_list; sp; sp = sp->is_next) 387 if ((sp->is_p == st.is_p) && (sp->is_v == st.is_v) && 388 !bcmp((caddr_t)&sp->is_src, (caddr_t)&st.is_src, 389 sizeof(st.is_src)) && 390 !bcmp((caddr_t)&sp->is_dst, (caddr_t)&st.is_dst, 391 sizeof(st.is_dst)) && 392 !bcmp((caddr_t)&sp->is_ps, (caddr_t)&st.is_ps, 393 sizeof(st.is_ps))) { 394 (void) fr_delstate(sp, ISL_REMOVE, ifs); 395 RWLOCK_EXIT(&ifs->ifs_ipf_state); 396 return 0; 397 } 398 RWLOCK_EXIT(&ifs->ifs_ipf_state); 399 return ESRCH; 400 } 401 402 403 /* ------------------------------------------------------------------------ */ 404 /* Function: fr_state_ioctl */ 405 /* Returns: int - 0 == success, != 0 == failure */ 406 /* Parameters: data(I) - pointer to ioctl data */ 407 /* cmd(I) - ioctl command integer */ 408 /* mode(I) - file mode bits used with open */ 409 /* uid(I) - uid of caller */ 410 /* ctx(I) - pointer to give the uid context */ 411 /* ifs - ipf stack instance */ 412 /* */ 413 /* Processes an ioctl call made to operate on the IP Filter state device. */ 414 /* ------------------------------------------------------------------------ */ 415 int fr_state_ioctl(data, cmd, mode, uid, ctx, ifs) 416 caddr_t data; 417 ioctlcmd_t cmd; 418 int mode, uid; 419 void *ctx; 420 ipf_stack_t *ifs; 421 { 422 int arg, ret, error = 0; 423 424 switch (cmd) 425 { 426 /* 427 * Delete an entry from the state table. 428 */ 429 case SIOCDELST : 430 error = fr_state_remove(data, ifs); 431 break; 432 /* 433 * Flush the state table 434 */ 435 case SIOCIPFFL : 436 error = BCOPYIN(data, (char *)&arg, sizeof(arg)); 437 if (error != 0) { 438 error = EFAULT; 439 } else { 440 if (VALID_TABLE_FLUSH_OPT(arg)) { 441 WRITE_ENTER(&ifs->ifs_ipf_state); 442 ret = fr_state_flush(arg, 4, ifs); 443 RWLOCK_EXIT(&ifs->ifs_ipf_state); 444 error = BCOPYOUT((char *)&ret, data, 445 sizeof(ret)); 446 if (error != 0) 447 return EFAULT; 448 } else { 449 error = EINVAL; 450 } 451 } 452 break; 453 454 #ifdef USE_INET6 455 case SIOCIPFL6 : 456 error = BCOPYIN(data, (char *)&arg, sizeof(arg)); 457 if (error != 0) { 458 error = EFAULT; 459 } else { 460 if (VALID_TABLE_FLUSH_OPT(arg)) { 461 WRITE_ENTER(&ifs->ifs_ipf_state); 462 ret = fr_state_flush(arg, 6, ifs); 463 RWLOCK_EXIT(&ifs->ifs_ipf_state); 464 error = BCOPYOUT((char *)&ret, data, 465 sizeof(ret)); 466 if (error != 0) 467 return EFAULT; 468 } else { 469 error = EINVAL; 470 } 471 } 472 break; 473 #endif 474 #ifdef IPFILTER_LOG 475 /* 476 * Flush the state log. 477 */ 478 case SIOCIPFFB : 479 if (!(mode & FWRITE)) 480 error = EPERM; 481 else { 482 int tmp; 483 484 tmp = ipflog_clear(IPL_LOGSTATE, ifs); 485 error = BCOPYOUT((char *)&tmp, data, sizeof(tmp)); 486 if (error != 0) 487 error = EFAULT; 488 } 489 break; 490 /* 491 * Turn logging of state information on/off. 492 */ 493 case SIOCSETLG : 494 if (!(mode & FWRITE)) { 495 error = EPERM; 496 } else { 497 error = BCOPYIN((char *)data, 498 (char *)&ifs->ifs_ipstate_logging, 499 sizeof(ifs->ifs_ipstate_logging)); 500 if (error != 0) 501 error = EFAULT; 502 } 503 break; 504 /* 505 * Return the current state of logging. 506 */ 507 case SIOCGETLG : 508 error = BCOPYOUT((char *)&ifs->ifs_ipstate_logging, 509 (char *)data, 510 sizeof(ifs->ifs_ipstate_logging)); 511 if (error != 0) 512 error = EFAULT; 513 break; 514 /* 515 * Return the number of bytes currently waiting to be read. 516 */ 517 case FIONREAD : 518 arg = ifs->ifs_iplused[IPL_LOGSTATE]; /* returned in an int */ 519 error = BCOPYOUT((char *)&arg, data, sizeof(arg)); 520 if (error != 0) 521 error = EFAULT; 522 break; 523 #endif 524 /* 525 * Get the current state statistics. 526 */ 527 case SIOCGETFS : 528 error = fr_outobj(data, fr_statetstats(ifs), IPFOBJ_STATESTAT); 529 break; 530 /* 531 * Lock/Unlock the state table. (Locking prevents any changes, which 532 * means no packets match). 533 */ 534 case SIOCSTLCK : 535 if (!(mode & FWRITE)) { 536 error = EPERM; 537 } else { 538 error = fr_lock(data, &ifs->ifs_fr_state_lock); 539 } 540 break; 541 /* 542 * Add an entry to the current state table. 543 */ 544 case SIOCSTPUT : 545 if (!ifs->ifs_fr_state_lock || !(mode & FWRITE)) { 546 error = EACCES; 547 break; 548 } 549 error = fr_stputent(data, ifs); 550 break; 551 /* 552 * Get a state table entry. 553 */ 554 case SIOCSTGET : 555 if (!ifs->ifs_fr_state_lock) { 556 error = EACCES; 557 break; 558 } 559 error = fr_stgetent(data, ifs); 560 break; 561 562 case SIOCGENITER : 563 { 564 ipftoken_t *token; 565 ipfgeniter_t iter; 566 567 error = fr_inobj(data, &iter, IPFOBJ_GENITER); 568 if (error != 0) 569 break; 570 571 token = ipf_findtoken(IPFGENITER_STATE, uid, ctx, ifs); 572 if (token != NULL) 573 error = fr_stateiter(token, &iter, ifs); 574 else 575 error = ESRCH; 576 RWLOCK_EXIT(&ifs->ifs_ipf_tokens); 577 break; 578 } 579 580 case SIOCIPFDELTOK : 581 error = BCOPYIN(data, (char *)&arg, sizeof(arg)); 582 if (error != 0) { 583 error = EFAULT; 584 } else { 585 error = ipf_deltoken(arg, uid, ctx, ifs); 586 } 587 break; 588 589 default : 590 error = EINVAL; 591 break; 592 } 593 return error; 594 } 595 596 597 /* ------------------------------------------------------------------------ */ 598 /* Function: fr_stgetent */ 599 /* Returns: int - 0 == success, != 0 == failure */ 600 /* Parameters: data(I) - pointer to state structure to retrieve from table */ 601 /* */ 602 /* Copy out state information from the kernel to a user space process. If */ 603 /* there is a filter rule associated with the state entry, copy that out */ 604 /* as well. The entry to copy out is taken from the value of "ips_next" in */ 605 /* the struct passed in and if not null and not found in the list of current*/ 606 /* state entries, the retrieval fails. */ 607 /* ------------------------------------------------------------------------ */ 608 int fr_stgetent(data, ifs) 609 caddr_t data; 610 ipf_stack_t *ifs; 611 { 612 ipstate_t *is, *isn; 613 ipstate_save_t ips; 614 int error; 615 616 error = fr_inobj(data, &ips, IPFOBJ_STATESAVE); 617 if (error) 618 return EFAULT; 619 620 isn = ips.ips_next; 621 if (isn == NULL) { 622 isn = ifs->ifs_ips_list; 623 if (isn == NULL) { 624 if (ips.ips_next == NULL) 625 return ENOENT; 626 return 0; 627 } 628 } else { 629 /* 630 * Make sure the pointer we're copying from exists in the 631 * current list of entries. Security precaution to prevent 632 * copying of random kernel data. 633 */ 634 for (is = ifs->ifs_ips_list; is; is = is->is_next) 635 if (is == isn) 636 break; 637 if (!is) 638 return ESRCH; 639 } 640 ips.ips_next = isn->is_next; 641 bcopy((char *)isn, (char *)&ips.ips_is, sizeof(ips.ips_is)); 642 ips.ips_rule = isn->is_rule; 643 if (isn->is_rule != NULL) 644 bcopy((char *)isn->is_rule, (char *)&ips.ips_fr, 645 sizeof(ips.ips_fr)); 646 error = fr_outobj(data, &ips, IPFOBJ_STATESAVE); 647 if (error) 648 return EFAULT; 649 return 0; 650 } 651 652 653 /* ------------------------------------------------------------------------ */ 654 /* Function: fr_stputent */ 655 /* Returns: int - 0 == success, != 0 == failure */ 656 /* Parameters: data(I) - pointer to state information struct */ 657 /* ifs - ipf stack instance */ 658 /* */ 659 /* This function implements the SIOCSTPUT ioctl: insert a state entry into */ 660 /* the state table. If the state info. includes a pointer to a filter rule */ 661 /* then also add in an orphaned rule (will not show up in any "ipfstat -io" */ 662 /* output. */ 663 /* ------------------------------------------------------------------------ */ 664 int fr_stputent(data, ifs) 665 caddr_t data; 666 ipf_stack_t *ifs; 667 { 668 ipstate_t *is, *isn; 669 ipstate_save_t ips; 670 int error, i; 671 frentry_t *fr; 672 char *name; 673 674 error = fr_inobj(data, &ips, IPFOBJ_STATESAVE); 675 if (error) 676 return EFAULT; 677 678 /* 679 * Trigger automatic call to fr_state_flush() if the 680 * table has reached capacity specified by hi watermark. 681 */ 682 if (ST_TAB_WATER_LEVEL(ifs) > ifs->ifs_state_flush_level_hi) 683 ifs->ifs_fr_state_doflush = 1; 684 685 /* 686 * If automatic flushing did not do its job, and the table 687 * has filled up, don't try to create a new entry. 688 */ 689 if (ifs->ifs_ips_num >= ifs->ifs_fr_statemax) { 690 ATOMIC_INCL(ifs->ifs_ips_stats.iss_max); 691 return ENOMEM; 692 } 693 694 KMALLOC(isn, ipstate_t *); 695 if (isn == NULL) 696 return ENOMEM; 697 698 bcopy((char *)&ips.ips_is, (char *)isn, sizeof(*isn)); 699 bzero((char *)isn, offsetof(struct ipstate, is_pkts)); 700 isn->is_sti.tqe_pnext = NULL; 701 isn->is_sti.tqe_next = NULL; 702 isn->is_sti.tqe_ifq = NULL; 703 isn->is_sti.tqe_parent = isn; 704 isn->is_ifp[0] = NULL; 705 isn->is_ifp[1] = NULL; 706 isn->is_ifp[2] = NULL; 707 isn->is_ifp[3] = NULL; 708 isn->is_sync = NULL; 709 fr = ips.ips_rule; 710 711 if (fr == NULL) { 712 READ_ENTER(&ifs->ifs_ipf_state); 713 fr_stinsert(isn, 0, ifs); 714 MUTEX_EXIT(&isn->is_lock); 715 RWLOCK_EXIT(&ifs->ifs_ipf_state); 716 return 0; 717 } 718 719 if (isn->is_flags & SI_NEWFR) { 720 KMALLOC(fr, frentry_t *); 721 if (fr == NULL) { 722 KFREE(isn); 723 return ENOMEM; 724 } 725 bcopy((char *)&ips.ips_fr, (char *)fr, sizeof(*fr)); 726 isn->is_rule = fr; 727 ips.ips_is.is_rule = fr; 728 MUTEX_NUKE(&fr->fr_lock); 729 MUTEX_INIT(&fr->fr_lock, "state filter rule lock"); 730 731 /* 732 * Look up all the interface names in the rule. 733 */ 734 for (i = 0; i < 4; i++) { 735 name = fr->fr_ifnames[i]; 736 fr->fr_ifas[i] = fr_resolvenic(name, fr->fr_v, ifs); 737 name = isn->is_ifname[i]; 738 isn->is_ifp[i] = fr_resolvenic(name, isn->is_v, ifs); 739 } 740 741 fr->fr_ref = 0; 742 fr->fr_dsize = 0; 743 fr->fr_data = NULL; 744 fr->fr_type = FR_T_NONE; 745 746 fr_resolvedest(&fr->fr_tif, fr->fr_v, ifs); 747 fr_resolvedest(&fr->fr_dif, fr->fr_v, ifs); 748 fr_resolvedest(&fr->fr_rif, fr->fr_v, ifs); 749 750 /* 751 * send a copy back to userland of what we ended up 752 * to allow for verification. 753 */ 754 error = fr_outobj(data, &ips, IPFOBJ_STATESAVE); 755 if (error) { 756 KFREE(isn); 757 MUTEX_DESTROY(&fr->fr_lock); 758 KFREE(fr); 759 return EFAULT; 760 } 761 READ_ENTER(&ifs->ifs_ipf_state); 762 fr_stinsert(isn, 0, ifs); 763 MUTEX_EXIT(&isn->is_lock); 764 RWLOCK_EXIT(&ifs->ifs_ipf_state); 765 766 } else { 767 READ_ENTER(&ifs->ifs_ipf_state); 768 for (is = ifs->ifs_ips_list; is; is = is->is_next) 769 if (is->is_rule == fr) { 770 fr_stinsert(isn, 0, ifs); 771 MUTEX_EXIT(&isn->is_lock); 772 break; 773 } 774 775 if (is == NULL) { 776 KFREE(isn); 777 isn = NULL; 778 } 779 RWLOCK_EXIT(&ifs->ifs_ipf_state); 780 781 return (isn == NULL) ? ESRCH : 0; 782 } 783 784 return 0; 785 } 786 787 788 /* ------------------------------------------------------------------------ */ 789 /* Function: fr_stinsert */ 790 /* Returns: Nil */ 791 /* Parameters: is(I) - pointer to state structure */ 792 /* rev(I) - flag indicating forward/reverse direction of packet */ 793 /* */ 794 /* Inserts a state structure into the hash table (for lookups) and the list */ 795 /* of state entries (for enumeration). Resolves all of the interface names */ 796 /* to pointers and adjusts running stats for the hash table as appropriate. */ 797 /* */ 798 /* Locking: it is assumed that some kind of lock on ipf_state is held. */ 799 /* Exits with is_lock initialised and held. */ 800 /* ------------------------------------------------------------------------ */ 801 void fr_stinsert(is, rev, ifs) 802 ipstate_t *is; 803 int rev; 804 ipf_stack_t *ifs; 805 { 806 frentry_t *fr; 807 u_int hv; 808 int i; 809 810 MUTEX_INIT(&is->is_lock, "ipf state entry"); 811 812 fr = is->is_rule; 813 if (fr != NULL) { 814 MUTEX_ENTER(&fr->fr_lock); 815 fr->fr_ref++; 816 fr->fr_statecnt++; 817 MUTEX_EXIT(&fr->fr_lock); 818 } 819 820 /* 821 * Look up all the interface names in the state entry. 822 */ 823 for (i = 0; i < 4; i++) { 824 if (is->is_ifp[i] != NULL) 825 continue; 826 is->is_ifp[i] = fr_resolvenic(is->is_ifname[i], is->is_v, ifs); 827 } 828 829 /* 830 * If we could trust is_hv, then the modulous would not be needed, but 831 * when running with IPFILTER_SYNC, this stops bad values. 832 */ 833 hv = is->is_hv % ifs->ifs_fr_statesize; 834 is->is_hv = hv; 835 836 /* 837 * We need to get both of these locks...the first because it is 838 * possible that once the insert is complete another packet might 839 * come along, match the entry and want to update it. 840 */ 841 MUTEX_ENTER(&is->is_lock); 842 MUTEX_ENTER(&ifs->ifs_ipf_stinsert); 843 844 /* 845 * add into list table. 846 */ 847 if (ifs->ifs_ips_list != NULL) 848 ifs->ifs_ips_list->is_pnext = &is->is_next; 849 is->is_pnext = &ifs->ifs_ips_list; 850 is->is_next = ifs->ifs_ips_list; 851 ifs->ifs_ips_list = is; 852 853 if (ifs->ifs_ips_table[hv] != NULL) 854 ifs->ifs_ips_table[hv]->is_phnext = &is->is_hnext; 855 else 856 ifs->ifs_ips_stats.iss_inuse++; 857 is->is_phnext = ifs->ifs_ips_table + hv; 858 is->is_hnext = ifs->ifs_ips_table[hv]; 859 ifs->ifs_ips_table[hv] = is; 860 ifs->ifs_ips_stats.iss_bucketlen[hv]++; 861 ifs->ifs_ips_num++; 862 MUTEX_EXIT(&ifs->ifs_ipf_stinsert); 863 864 fr_setstatequeue(is, rev, ifs); 865 } 866 867 /* ------------------------------------------------------------------------ */ 868 /* Function: fr_match_ipv4addrs */ 869 /* Returns: int - 2 strong match (same addresses, same direction) */ 870 /* 1 weak match (same address, opposite direction) */ 871 /* 0 no match */ 872 /* */ 873 /* Function matches IPv4 addresses. */ 874 /* ------------------------------------------------------------------------ */ 875 static int fr_match_ipv4addrs(is1, is2) 876 ipstate_t *is1; 877 ipstate_t *is2; 878 { 879 int rv; 880 881 if (is1->is_saddr == is2->is_saddr && is1->is_daddr == is2->is_daddr) 882 rv = 2; 883 else if (is1->is_saddr == is2->is_daddr && 884 is1->is_daddr == is2->is_saddr) 885 rv = 1; 886 else 887 rv = 0; 888 889 return (rv); 890 } 891 892 /* ------------------------------------------------------------------------ */ 893 /* Function: fr_match_ipv6addrs */ 894 /* Returns: int - 2 strong match (same addresses, same direction) */ 895 /* 1 weak match (same addresses, opposite direction) */ 896 /* 0 no match */ 897 /* */ 898 /* Function matches IPv6 addresses. */ 899 /* ------------------------------------------------------------------------ */ 900 static int fr_match_ipv6addrs(is1, is2) 901 ipstate_t *is1; 902 ipstate_t *is2; 903 { 904 int rv; 905 906 if (IP6_EQ(&is1->is_src, &is2->is_src) && 907 IP6_EQ(&is1->is_dst, &is2->is_dst)) 908 rv = 2; 909 else if (IP6_EQ(&is1->is_src, &is2->is_dst) && 910 IP6_EQ(&is1->is_dst, &is2->is_src)) { 911 rv = 1; 912 } 913 else 914 rv = 0; 915 916 return (rv); 917 } 918 /* ------------------------------------------------------------------------ */ 919 /* Function: fr_match_addresses */ 920 /* Returns: int - 2 strong match (same addresses, same direction) */ 921 /* 1 weak match (same address, opposite directions) */ 922 /* 0 no match */ 923 /* Parameters: is1, is2 pointers to states we are checking */ 924 /* */ 925 /* Matches addresses, function uses fr_match_ipvXaddrs() to deal with IPv4 */ 926 /* and IPv6 address format. */ 927 /* ------------------------------------------------------------------------ */ 928 static int fr_match_addresses(is1, is2) 929 ipstate_t *is1; 930 ipstate_t *is2; 931 { 932 int rv; 933 934 if (is1->is_v == 4) { 935 rv = fr_match_ipv4addrs(is1, is2); 936 } else { 937 rv = fr_match_ipv6addrs(is1, is2); 938 } 939 940 return (rv); 941 } 942 943 /* ------------------------------------------------------------------------ */ 944 /* Function: fr_match_ppairs */ 945 /* Returns: int - 2 strong match (same ports, same direction) */ 946 /* 1 weak match (same ports, different direction) */ 947 /* 0 no match */ 948 /* Parameters ppairs1, ppairs - src, dst ports we want to match. */ 949 /* */ 950 /* Matches two port_pair_t types (port pairs). Each port pair contains */ 951 /* src, dst port, which belong to session (state entry). */ 952 /* ------------------------------------------------------------------------ */ 953 static int fr_match_ppairs(ppairs1, ppairs2) 954 port_pair_t *ppairs1; 955 port_pair_t *ppairs2; 956 { 957 int rv; 958 959 if (ppairs1->pp_sport == ppairs2->pp_sport && 960 ppairs1->pp_dport == ppairs2->pp_dport) 961 rv = 2; 962 else if (ppairs1->pp_sport == ppairs2->pp_dport && 963 ppairs1->pp_dport == ppairs2->pp_sport) 964 rv = 1; 965 else 966 rv = 0; 967 968 return (rv); 969 } 970 971 /* ------------------------------------------------------------------------ */ 972 /* Function: fr_match_l4_hdr */ 973 /* Returns: int - 0 no match, */ 974 /* 1 weak match (same ports, different directions) */ 975 /* 2 strong match (same ports, same direction) */ 976 /* Parameters is1, is2 - states we want to match */ 977 /* */ 978 /* Function matches L4 header data (source ports for TCP, UDP, CallIds for */ 979 /* GRE protocol). */ 980 /* ------------------------------------------------------------------------ */ 981 static int fr_match_l4_hdr(is1, is2) 982 ipstate_t *is1; 983 ipstate_t *is2; 984 { 985 int rv = 0; 986 port_pair_t pp1; 987 port_pair_t pp2; 988 989 if (is1->is_p != is2->is_p) 990 return (0); 991 992 switch (is1->is_p) { 993 case IPPROTO_TCP: 994 pp1.pp_sport = is1->is_ps.is_ts.ts_sport; 995 pp1.pp_dport = is1->is_ps.is_ts.ts_dport; 996 pp2.pp_sport = is2->is_ps.is_ts.ts_sport; 997 pp2.pp_dport = is2->is_ps.is_ts.ts_dport; 998 rv = fr_match_ppairs(&pp1, &pp2); 999 break; 1000 case IPPROTO_UDP: 1001 pp1.pp_sport = is1->is_ps.is_us.us_sport; 1002 pp1.pp_dport = is1->is_ps.is_us.us_dport; 1003 pp2.pp_sport = is2->is_ps.is_us.us_sport; 1004 pp2.pp_dport = is2->is_ps.is_us.us_dport; 1005 rv = fr_match_ppairs(&pp1, &pp2); 1006 break; 1007 case IPPROTO_GRE: 1008 /* greinfo_t can be also interprted as port pair */ 1009 pp1.pp_sport = is1->is_ps.is_ug.gs_call[0]; 1010 pp1.pp_dport = is1->is_ps.is_ug.gs_call[1]; 1011 pp2.pp_sport = is2->is_ps.is_ug.gs_call[0]; 1012 pp2.pp_dport = is2->is_ps.is_ug.gs_call[1]; 1013 rv = fr_match_ppairs(&pp1, &pp2); 1014 break; 1015 case IPPROTO_ICMP: 1016 case IPPROTO_ICMPV6: 1017 if (bcmp(&is1->is_ps, &is2->is_ps, sizeof (icmpinfo_t))) 1018 rv = 1; 1019 else 1020 rv = 0; 1021 break; 1022 default: 1023 rv = 0; 1024 } 1025 1026 return (rv); 1027 } 1028 1029 /* ------------------------------------------------------------------------ */ 1030 /* Function: fr_matchstates */ 1031 /* Returns: int - nonzero match, zero no match */ 1032 /* Parameters is1, is2 - states we want to match */ 1033 /* */ 1034 /* The state entries are equal (identical match) if they belong to the same */ 1035 /* session. Any time new state entry is being added the fr_addstate() */ 1036 /* function creates temporal state entry from the data it gets from IP and */ 1037 /* L4 header. The fr_matchstats() must be also aware of packet direction, */ 1038 /* which is also stored within the state entry. We should keep in mind the */ 1039 /* information about packet direction is spread accross L3 (addresses) and */ 1040 /* L4 (ports). There are three possible relationships betwee is1, is2: */ 1041 /* - no match (match(is1, is2) == 0)) */ 1042 /* - weak match same addresses (ports), but different */ 1043 /* directions (1) (fr_match_xxxx(is1, is2) == 1) */ 1044 /* - strong match same addresses (ports) and same directions */ 1045 /* (2) (fr_match_xxxx(is1, is2) == 2) */ 1046 /* */ 1047 /* There are functions, which match match addresses (L3 header) in is1, is2 */ 1048 /* and functions, which are used to compare ports (L4 header) data. We say */ 1049 /* the is1 and is2 are same (identical) if there is a match */ 1050 /* (fr_match_l4_hdr(is1, is2) != 0) and matchlevels are same for entries */ 1051 /* (fr_match_l3_hdr(is1, is2) == fr_match_l4_hdr(is1, is2)) for is1, is2. */ 1052 /* Such requirement deals with case as follows: */ 1053 /* suppose there are two connections between hosts A, B. Connection 1: */ 1054 /* a.a.a.a:12345 <=> b.b.b.b:54321 */ 1055 /* Connection 2: */ 1056 /* a.a.a.a:54321 <=> b.b.b.b:12345 */ 1057 /* since we've introduced match levels into our fr_matchstates(), we are */ 1058 /* able to identify, which packets belong to connection A and which belong */ 1059 /* to connection B. Assume there are two entries is1, is2. is1 has been */ 1060 /* from con. 1 packet, which travelled from A to B: */ 1061 /* a.a.a.a:12345 -> b.b.b.b:54321 */ 1062 /* while s2, has been created from packet which belongs to con. 2 and is */ 1063 /* also coming from A to B: */ 1064 /* a.a.a.a:54321 -> b.b.b.b:12345 */ 1065 /* fr_match_l3_hdr(is1, is2) == 2 -> strong match, while */ 1066 /* fr_match_l4_hdr(is1, is2) == 1 -> weak match. Since match levels are */ 1067 /* different the state entries are not identical -> no match as a final */ 1068 /* result. */ 1069 /* ------------------------------------------------------------------------ */ 1070 static int fr_matchstates(is1, is2) 1071 ipstate_t *is1; 1072 ipstate_t *is2; 1073 { 1074 int rv; 1075 int amatch; 1076 int pmatch; 1077 1078 if (bcmp(&is1->is_pass, &is2->is_pass, 1079 offsetof(struct ipstate, is_ps) - 1080 offsetof(struct ipstate, is_pass)) == 0) { 1081 1082 pmatch = fr_match_l4_hdr(is1, is2); 1083 amatch = fr_match_addresses(is1, is2); 1084 /* 1085 * If addresses match (amatch != 0), then 'match levels' 1086 * must be same for matching entries. If amatch and pmatch 1087 * have different values (different match levels), then 1088 * is1 and is2 belong to different sessions. 1089 */ 1090 rv = (amatch != 0) && (amatch == pmatch); 1091 } 1092 else 1093 rv = 0; 1094 1095 return (rv); 1096 } 1097 1098 /* ------------------------------------------------------------------------ */ 1099 /* Function: fr_addstate */ 1100 /* Returns: ipstate_t* - NULL == failure, else pointer to new state */ 1101 /* Parameters: fin(I) - pointer to packet information */ 1102 /* stsave(O) - pointer to place to save pointer to created */ 1103 /* state structure. */ 1104 /* flags(I) - flags to use when creating the structure */ 1105 /* */ 1106 /* Creates a new IP state structure from the packet information collected. */ 1107 /* Inserts it into the state table and appends to the bottom of the active */ 1108 /* list. If the capacity of the table has reached the maximum allowed then */ 1109 /* the call will fail and a flush is scheduled for the next timeout call. */ 1110 /* ------------------------------------------------------------------------ */ 1111 ipstate_t *fr_addstate(fin, stsave, flags) 1112 fr_info_t *fin; 1113 ipstate_t **stsave; 1114 u_int flags; 1115 { 1116 ipstate_t *is, ips; 1117 struct icmp *ic; 1118 u_int pass, hv; 1119 frentry_t *fr; 1120 tcphdr_t *tcp; 1121 grehdr_t *gre; 1122 void *ifp; 1123 int out; 1124 ipf_stack_t *ifs = fin->fin_ifs; 1125 1126 if (ifs->ifs_fr_state_lock || 1127 (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGBODY|FI_BAD))) 1128 return NULL; 1129 1130 if ((fin->fin_flx & FI_OOW) && !(fin->fin_tcpf & TH_SYN)) 1131 return NULL; 1132 1133 /* 1134 * Trigger automatic call to fr_state_flush() if the 1135 * table has reached capacity specified by hi watermark. 1136 */ 1137 if (ST_TAB_WATER_LEVEL(ifs) > ifs->ifs_state_flush_level_hi) 1138 ifs->ifs_fr_state_doflush = 1; 1139 1140 /* 1141 * If the max number of state entries has been reached, and there is no 1142 * limit on the state count for the rule, then do not continue. In the 1143 * case where a limit exists, it's ok allow the entries to be created as 1144 * long as specified limit itself has not been reached. 1145 * 1146 * Note that because the lock isn't held on fr, it is possible to exceed 1147 * the specified size of the table. However, the cost of this is being 1148 * ignored here; as the number by which it can go over is a product of 1149 * the number of simultaneous threads that could be executing in here. 1150 * So, a limit of 100 won't result in 200, but could result in 101 or 102. 1151 * 1152 * Also note that, since the automatic flush should have been triggered 1153 * well before we reach the maximum number of state table entries, the 1154 * likelihood of reaching the max (and thus exceedng it) is minimal. 1155 */ 1156 fr = fin->fin_fr; 1157 if (fr != NULL) { 1158 if ((ifs->ifs_ips_num >= ifs->ifs_fr_statemax) && 1159 (fr->fr_statemax == 0)) { 1160 ATOMIC_INCL(ifs->ifs_ips_stats.iss_max); 1161 return NULL; 1162 } 1163 if ((fr->fr_statemax != 0) && 1164 (fr->fr_statecnt >= fr->fr_statemax)) { 1165 ATOMIC_INCL(ifs->ifs_ips_stats.iss_maxref); 1166 ifs->ifs_fr_state_doflush = 1; 1167 return NULL; 1168 } 1169 } 1170 1171 ic = NULL; 1172 tcp = NULL; 1173 out = fin->fin_out; 1174 is = &ips; 1175 bzero((char *)is, sizeof(*is)); 1176 1177 if (fr == NULL) { 1178 pass = ifs->ifs_fr_flags; 1179 is->is_tag = FR_NOLOGTAG; 1180 } else { 1181 pass = fr->fr_flags; 1182 } 1183 1184 is->is_die = 1 + ifs->ifs_fr_ticks; 1185 /* 1186 * We want to check everything that is a property of this packet, 1187 * but we don't (automatically) care about it's fragment status as 1188 * this may change. 1189 */ 1190 is->is_pass = pass; 1191 is->is_v = fin->fin_v; 1192 is->is_opt[0] = fin->fin_optmsk; 1193 is->is_optmsk[0] = 0xffffffff; 1194 /* 1195 * The reverse direction option mask will be set in fr_matchsrcdst(), 1196 * when we will see the first packet from the peer. We will leave it 1197 * as zero for now. 1198 */ 1199 is->is_optmsk[1] = 0x0; 1200 1201 if (is->is_v == 6) { 1202 is->is_opt[0] &= ~0x8; 1203 is->is_optmsk[0] &= ~0x8; 1204 } 1205 is->is_sec = fin->fin_secmsk; 1206 is->is_secmsk = 0xffff; 1207 is->is_auth = fin->fin_auth; 1208 is->is_authmsk = 0xffff; 1209 1210 /* 1211 * Copy and calculate... 1212 */ 1213 hv = (is->is_p = fin->fin_fi.fi_p); 1214 is->is_src = fin->fin_fi.fi_src; 1215 hv += is->is_saddr; 1216 is->is_dst = fin->fin_fi.fi_dst; 1217 hv += is->is_daddr; 1218 #ifdef USE_INET6 1219 if (fin->fin_v == 6) { 1220 /* 1221 * For ICMPv6, we check to see if the destination address is 1222 * a multicast address. If it is, do not include it in the 1223 * calculation of the hash because the correct reply will come 1224 * back from a real address, not a multicast address. 1225 */ 1226 if ((is->is_p == IPPROTO_ICMPV6) && 1227 IN6_IS_ADDR_MULTICAST(&is->is_dst.in6)) { 1228 /* 1229 * So you can do keep state with neighbour discovery. 1230 * 1231 * Here we could use the address from the neighbour 1232 * solicit message to put in the state structure and 1233 * we could use that without a wildcard flag too... 1234 */ 1235 is->is_flags |= SI_W_DADDR; 1236 hv -= is->is_daddr; 1237 } else { 1238 hv += is->is_dst.i6[1]; 1239 hv += is->is_dst.i6[2]; 1240 hv += is->is_dst.i6[3]; 1241 } 1242 hv += is->is_src.i6[1]; 1243 hv += is->is_src.i6[2]; 1244 hv += is->is_src.i6[3]; 1245 } 1246 #endif 1247 if ((fin->fin_v == 4) && 1248 (fin->fin_flx & (FI_MULTICAST|FI_BROADCAST|FI_MBCAST))) { 1249 if (fin->fin_out == 0) { 1250 flags |= SI_W_DADDR|SI_CLONE; 1251 hv -= is->is_daddr; 1252 } else { 1253 flags |= SI_W_SADDR|SI_CLONE; 1254 hv -= is->is_saddr; 1255 } 1256 } 1257 1258 switch (is->is_p) 1259 { 1260 #ifdef USE_INET6 1261 case IPPROTO_ICMPV6 : 1262 ic = fin->fin_dp; 1263 1264 switch (ic->icmp_type) 1265 { 1266 case ICMP6_ECHO_REQUEST : 1267 is->is_icmp.ici_type = ic->icmp_type; 1268 hv += (is->is_icmp.ici_id = ic->icmp_id); 1269 break; 1270 case ICMP6_MEMBERSHIP_QUERY : 1271 case ND_ROUTER_SOLICIT : 1272 case ND_NEIGHBOR_SOLICIT : 1273 case ICMP6_NI_QUERY : 1274 is->is_icmp.ici_type = ic->icmp_type; 1275 break; 1276 default : 1277 return NULL; 1278 } 1279 ATOMIC_INCL(ifs->ifs_ips_stats.iss_icmp); 1280 break; 1281 #endif 1282 case IPPROTO_ICMP : 1283 ic = fin->fin_dp; 1284 1285 switch (ic->icmp_type) 1286 { 1287 case ICMP_ECHO : 1288 case ICMP_ECHOREPLY : 1289 case ICMP_TSTAMP : 1290 case ICMP_IREQ : 1291 case ICMP_MASKREQ : 1292 is->is_icmp.ici_type = ic->icmp_type; 1293 hv += (is->is_icmp.ici_id = ic->icmp_id); 1294 break; 1295 default : 1296 return NULL; 1297 } 1298 ATOMIC_INCL(ifs->ifs_ips_stats.iss_icmp); 1299 break; 1300 1301 case IPPROTO_GRE : 1302 gre = fin->fin_dp; 1303 1304 is->is_gre.gs_flags = gre->gr_flags; 1305 is->is_gre.gs_ptype = gre->gr_ptype; 1306 if (GRE_REV(is->is_gre.gs_flags) == 1) { 1307 is->is_call[0] = fin->fin_data[0]; 1308 is->is_call[1] = fin->fin_data[1]; 1309 } 1310 break; 1311 1312 case IPPROTO_TCP : 1313 tcp = fin->fin_dp; 1314 1315 if (tcp->th_flags & TH_RST) 1316 return NULL; 1317 /* 1318 * The endian of the ports doesn't matter, but the ack and 1319 * sequence numbers do as we do mathematics on them later. 1320 */ 1321 is->is_sport = htons(fin->fin_data[0]); 1322 is->is_dport = htons(fin->fin_data[1]); 1323 if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) { 1324 hv += is->is_sport; 1325 hv += is->is_dport; 1326 } 1327 1328 /* 1329 * If this is a real packet then initialise fields in the 1330 * state information structure from the TCP header information. 1331 */ 1332 1333 is->is_maxdwin = 1; 1334 is->is_maxswin = ntohs(tcp->th_win); 1335 if (is->is_maxswin == 0) 1336 is->is_maxswin = 1; 1337 1338 if ((fin->fin_flx & FI_IGNORE) == 0) { 1339 is->is_send = ntohl(tcp->th_seq) + fin->fin_dlen - 1340 (TCP_OFF(tcp) << 2) + 1341 ((tcp->th_flags & TH_SYN) ? 1 : 0) + 1342 ((tcp->th_flags & TH_FIN) ? 1 : 0); 1343 is->is_maxsend = is->is_send; 1344 1345 /* 1346 * Window scale option is only present in 1347 * SYN/SYN-ACK packet. 1348 */ 1349 if ((tcp->th_flags & ~(TH_FIN|TH_ACK|TH_ECNALL)) == 1350 TH_SYN && 1351 (TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) { 1352 if (fr_tcpoptions(fin, tcp, 1353 &is->is_tcp.ts_data[0]) == -1) { 1354 fin->fin_flx |= FI_BAD; 1355 } 1356 } 1357 1358 if ((fin->fin_out != 0) && (pass & FR_NEWISN) != 0) { 1359 fr_checknewisn(fin, is); 1360 fr_fixoutisn(fin, is); 1361 } 1362 1363 if ((tcp->th_flags & TH_OPENING) == TH_SYN) 1364 flags |= IS_TCPFSM; 1365 else { 1366 is->is_maxdwin = is->is_maxswin * 2; 1367 is->is_dend = ntohl(tcp->th_ack); 1368 is->is_maxdend = ntohl(tcp->th_ack); 1369 is->is_maxdwin *= 2; 1370 } 1371 } 1372 1373 /* 1374 * If we're creating state for a starting connection, start the 1375 * timer on it as we'll never see an error if it fails to 1376 * connect. 1377 */ 1378 ATOMIC_INCL(ifs->ifs_ips_stats.iss_tcp); 1379 break; 1380 1381 case IPPROTO_UDP : 1382 tcp = fin->fin_dp; 1383 1384 is->is_sport = htons(fin->fin_data[0]); 1385 is->is_dport = htons(fin->fin_data[1]); 1386 if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) { 1387 hv += tcp->th_dport; 1388 hv += tcp->th_sport; 1389 } 1390 ATOMIC_INCL(ifs->ifs_ips_stats.iss_udp); 1391 break; 1392 1393 default : 1394 break; 1395 } 1396 hv = DOUBLE_HASH(hv, ifs); 1397 is->is_hv = hv; 1398 is->is_rule = fr; 1399 is->is_flags = flags & IS_INHERITED; 1400 1401 /* 1402 * Look for identical state. 1403 */ 1404 for (is = ifs->ifs_ips_table[is->is_hv % ifs->ifs_fr_statesize]; 1405 is != NULL; 1406 is = is->is_hnext) { 1407 if (fr_matchstates(&ips, is) == 1) 1408 break; 1409 } 1410 1411 /* 1412 * we've found a matching state -> state already exists, 1413 * we are not going to add a duplicate record. 1414 */ 1415 if (is != NULL) 1416 return NULL; 1417 1418 if (ifs->ifs_ips_stats.iss_bucketlen[hv] >= ifs->ifs_fr_state_maxbucket) { 1419 ATOMIC_INCL(ifs->ifs_ips_stats.iss_bucketfull); 1420 return NULL; 1421 } 1422 KMALLOC(is, ipstate_t *); 1423 if (is == NULL) { 1424 ATOMIC_INCL(ifs->ifs_ips_stats.iss_nomem); 1425 return NULL; 1426 } 1427 bcopy((char *)&ips, (char *)is, sizeof(*is)); 1428 /* 1429 * Do not do the modulous here, it is done in fr_stinsert(). 1430 */ 1431 if (fr != NULL) { 1432 (void) strncpy(is->is_group, fr->fr_group, FR_GROUPLEN); 1433 if (fr->fr_age[0] != 0) { 1434 is->is_tqehead[0] = 1435 fr_addtimeoutqueue(&ifs->ifs_ips_utqe, 1436 fr->fr_age[0], ifs); 1437 is->is_sti.tqe_flags |= TQE_RULEBASED; 1438 } 1439 if (fr->fr_age[1] != 0) { 1440 is->is_tqehead[1] = 1441 fr_addtimeoutqueue(&ifs->ifs_ips_utqe, 1442 fr->fr_age[1], ifs); 1443 is->is_sti.tqe_flags |= TQE_RULEBASED; 1444 } 1445 is->is_tag = fr->fr_logtag; 1446 1447 is->is_ifp[(out << 1) + 1] = fr->fr_ifas[1]; 1448 is->is_ifp[(1 - out) << 1] = fr->fr_ifas[2]; 1449 is->is_ifp[((1 - out) << 1) + 1] = fr->fr_ifas[3]; 1450 1451 if (((ifp = fr->fr_ifas[1]) != NULL) && 1452 (ifp != (void *)-1)) { 1453 COPYIFNAME(ifp, is->is_ifname[(out << 1) + 1], fr->fr_v); 1454 } 1455 if (((ifp = fr->fr_ifas[2]) != NULL) && 1456 (ifp != (void *)-1)) { 1457 COPYIFNAME(ifp, is->is_ifname[(1 - out) << 1], fr->fr_v); 1458 } 1459 if (((ifp = fr->fr_ifas[3]) != NULL) && 1460 (ifp != (void *)-1)) { 1461 COPYIFNAME(ifp, is->is_ifname[((1 - out) << 1) + 1], fr->fr_v); 1462 } 1463 } 1464 1465 is->is_ifp[out << 1] = fin->fin_ifp; 1466 if (fin->fin_ifp != NULL) { 1467 COPYIFNAME(fin->fin_ifp, is->is_ifname[out << 1], fin->fin_v); 1468 } 1469 1470 is->is_ref = 1; 1471 is->is_pkts[0] = 0, is->is_bytes[0] = 0; 1472 is->is_pkts[1] = 0, is->is_bytes[1] = 0; 1473 is->is_pkts[2] = 0, is->is_bytes[2] = 0; 1474 is->is_pkts[3] = 0, is->is_bytes[3] = 0; 1475 if ((fin->fin_flx & FI_IGNORE) == 0) { 1476 is->is_pkts[out] = 1; 1477 is->is_bytes[out] = fin->fin_plen; 1478 is->is_flx[out][0] = fin->fin_flx & FI_CMP; 1479 is->is_flx[out][0] &= ~FI_OOW; 1480 } 1481 1482 if (pass & FR_STSTRICT) 1483 is->is_flags |= IS_STRICT; 1484 1485 if (pass & FR_STATESYNC) 1486 is->is_flags |= IS_STATESYNC; 1487 1488 if (flags & (SI_WILDP|SI_WILDA)) { 1489 ATOMIC_INCL(ifs->ifs_ips_stats.iss_wild); 1490 } 1491 is->is_rulen = fin->fin_rule; 1492 1493 1494 if (pass & FR_LOGFIRST) 1495 is->is_pass &= ~(FR_LOGFIRST|FR_LOG); 1496 1497 READ_ENTER(&ifs->ifs_ipf_state); 1498 is->is_me = stsave; 1499 1500 fr_stinsert(is, fin->fin_rev, ifs); 1501 1502 if (fin->fin_p == IPPROTO_TCP) { 1503 /* 1504 * If we're creating state for a starting connection, start the 1505 * timer on it as we'll never see an error if it fails to 1506 * connect. 1507 */ 1508 (void) fr_tcp_age(&is->is_sti, fin, ifs->ifs_ips_tqtqb, 1509 is->is_flags); 1510 MUTEX_EXIT(&is->is_lock); 1511 #ifdef IPFILTER_SCAN 1512 if ((is->is_flags & SI_CLONE) == 0) 1513 (void) ipsc_attachis(is); 1514 #endif 1515 } else { 1516 MUTEX_EXIT(&is->is_lock); 1517 } 1518 #ifdef IPFILTER_SYNC 1519 if ((is->is_flags & IS_STATESYNC) && ((is->is_flags & SI_CLONE) == 0)) 1520 is->is_sync = ipfsync_new(SMC_STATE, fin, is); 1521 #endif 1522 if (ifs->ifs_ipstate_logging) 1523 ipstate_log(is, ISL_NEW, ifs); 1524 1525 RWLOCK_EXIT(&ifs->ifs_ipf_state); 1526 fin->fin_rev = IP6_NEQ(&is->is_dst, &fin->fin_daddr); 1527 fin->fin_flx |= FI_STATE; 1528 if (fin->fin_flx & FI_FRAG) 1529 (void) fr_newfrag(fin, pass ^ FR_KEEPSTATE); 1530 1531 return is; 1532 } 1533 1534 1535 /* ------------------------------------------------------------------------ */ 1536 /* Function: fr_tcpoptions */ 1537 /* Returns: int - 1 == packet matches state entry, 0 == it does not */ 1538 /* Parameters: fin(I) - pointer to packet information */ 1539 /* tcp(I) - pointer to TCP packet header */ 1540 /* td(I) - pointer to TCP data held as part of the state */ 1541 /* */ 1542 /* Look after the TCP header for any options and deal with those that are */ 1543 /* present. Record details about those that we recogise. */ 1544 /* ------------------------------------------------------------------------ */ 1545 static int fr_tcpoptions(fin, tcp, td) 1546 fr_info_t *fin; 1547 tcphdr_t *tcp; 1548 tcpdata_t *td; 1549 { 1550 int off, mlen, ol, i, len, retval; 1551 char buf[64], *s, opt; 1552 mb_t *m = NULL; 1553 1554 len = (TCP_OFF(tcp) << 2); 1555 if (fin->fin_dlen < len) 1556 return 0; 1557 len -= sizeof(*tcp); 1558 1559 off = fin->fin_plen - fin->fin_dlen + sizeof(*tcp) + fin->fin_ipoff; 1560 1561 m = fin->fin_m; 1562 mlen = MSGDSIZE(m) - off; 1563 if (len > mlen) { 1564 len = mlen; 1565 retval = 0; 1566 } else { 1567 retval = 1; 1568 } 1569 1570 COPYDATA(m, off, len, buf); 1571 1572 for (s = buf; len > 0; ) { 1573 opt = *s; 1574 if (opt == TCPOPT_EOL) 1575 break; 1576 else if (opt == TCPOPT_NOP) 1577 ol = 1; 1578 else { 1579 if (len < 2) 1580 break; 1581 ol = (int)*(s + 1); 1582 if (ol < 2 || ol > len) 1583 break; 1584 1585 /* 1586 * Extract the TCP options we are interested in out of 1587 * the header and store them in the the tcpdata struct. 1588 */ 1589 switch (opt) 1590 { 1591 case TCPOPT_WINDOW : 1592 if (ol == TCPOLEN_WINDOW) { 1593 i = (int)*(s + 2); 1594 if (i > TCP_WSCALE_MAX) 1595 i = TCP_WSCALE_MAX; 1596 else if (i < 0) 1597 i = 0; 1598 td->td_winscale = i; 1599 td->td_winflags |= TCP_WSCALE_SEEN | 1600 TCP_WSCALE_FIRST; 1601 } else 1602 retval = -1; 1603 break; 1604 case TCPOPT_MAXSEG : 1605 /* 1606 * So, if we wanted to set the TCP MAXSEG, 1607 * it should be done here... 1608 */ 1609 if (ol == TCPOLEN_MAXSEG) { 1610 i = (int)*(s + 2); 1611 i <<= 8; 1612 i += (int)*(s + 3); 1613 td->td_maxseg = i; 1614 } else 1615 retval = -1; 1616 break; 1617 case TCPOPT_SACK_PERMITTED : 1618 if (ol == TCPOLEN_SACK_PERMITTED) 1619 td->td_winflags |= TCP_SACK_PERMIT; 1620 else 1621 retval = -1; 1622 break; 1623 } 1624 } 1625 len -= ol; 1626 s += ol; 1627 } 1628 return retval; 1629 } 1630 1631 1632 /* ------------------------------------------------------------------------ */ 1633 /* Function: fr_tcpstate */ 1634 /* Returns: int - 1 == packet matches state entry, 0 == it does not */ 1635 /* Parameters: fin(I) - pointer to packet information */ 1636 /* tcp(I) - pointer to TCP packet header */ 1637 /* is(I) - pointer to master state structure */ 1638 /* */ 1639 /* Check to see if a packet with TCP headers fits within the TCP window. */ 1640 /* Change timeout depending on whether new packet is a SYN-ACK returning */ 1641 /* for a SYN or a RST or FIN which indicate time to close up shop. */ 1642 /* ------------------------------------------------------------------------ */ 1643 static int fr_tcpstate(fin, tcp, is) 1644 fr_info_t *fin; 1645 tcphdr_t *tcp; 1646 ipstate_t *is; 1647 { 1648 int source, ret = 0, flags; 1649 tcpdata_t *fdata, *tdata; 1650 ipf_stack_t *ifs = fin->fin_ifs; 1651 1652 source = !fin->fin_rev; 1653 if (((is->is_flags & IS_TCPFSM) != 0) && (source == 1) && 1654 (ntohs(is->is_sport) != fin->fin_data[0])) 1655 source = 0; 1656 fdata = &is->is_tcp.ts_data[!source]; 1657 tdata = &is->is_tcp.ts_data[source]; 1658 1659 MUTEX_ENTER(&is->is_lock); 1660 1661 /* 1662 * If a SYN packet is received for a connection that is in a half 1663 * closed state, then move its state entry to deletetq. In such case 1664 * the SYN packet will be consequently dropped. This allows new state 1665 * entry to be created with a retransmited SYN packet. 1666 */ 1667 if ((tcp->th_flags & TH_OPENING) == TH_SYN) { 1668 if ((is->is_state[source] > IPF_TCPS_ESTABLISHED) && 1669 (is->is_state[!source] > IPF_TCPS_ESTABLISHED)) { 1670 is->is_state[source] = IPF_TCPS_CLOSED; 1671 is->is_state[!source] = IPF_TCPS_CLOSED; 1672 /* 1673 * Do not update is->is_sti.tqe_die in case state entry 1674 * is already present in deletetq. It prevents state 1675 * entry ttl update by retransmitted SYN packets, which 1676 * may arrive before timer tick kicks off. The SYN 1677 * packet will be dropped again. 1678 */ 1679 if (is->is_sti.tqe_ifq != &ifs->ifs_ips_deletetq) 1680 fr_movequeue(&is->is_sti, is->is_sti.tqe_ifq, 1681 &fin->fin_ifs->ifs_ips_deletetq, 1682 fin->fin_ifs); 1683 1684 MUTEX_EXIT(&is->is_lock); 1685 return 0; 1686 } 1687 } 1688 1689 if (fr_tcpinwindow(fin, fdata, tdata, tcp, is->is_flags)) { 1690 #ifdef IPFILTER_SCAN 1691 if (is->is_flags & (IS_SC_CLIENT|IS_SC_SERVER)) { 1692 ipsc_packet(fin, is); 1693 if (FR_ISBLOCK(is->is_pass)) { 1694 MUTEX_EXIT(&is->is_lock); 1695 return 1; 1696 } 1697 } 1698 #endif 1699 1700 /* 1701 * Nearing end of connection, start timeout. 1702 */ 1703 ret = fr_tcp_age(&is->is_sti, fin, ifs->ifs_ips_tqtqb, 1704 is->is_flags); 1705 if (ret == 0) { 1706 MUTEX_EXIT(&is->is_lock); 1707 return 0; 1708 } 1709 1710 /* 1711 * set s0's as appropriate. Use syn-ack packet as it 1712 * contains both pieces of required information. 1713 */ 1714 /* 1715 * Window scale option is only present in SYN/SYN-ACK packet. 1716 * Compare with ~TH_FIN to mask out T/TCP setups. 1717 */ 1718 flags = tcp->th_flags & ~(TH_FIN|TH_ECNALL); 1719 if (flags == (TH_SYN|TH_ACK)) { 1720 is->is_s0[source] = ntohl(tcp->th_ack); 1721 is->is_s0[!source] = ntohl(tcp->th_seq) + 1; 1722 if (TCP_OFF(tcp) > (sizeof (tcphdr_t) >> 2)) { 1723 (void) fr_tcpoptions(fin, tcp, fdata); 1724 } 1725 if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN)) 1726 fr_checknewisn(fin, is); 1727 } else if (flags == TH_SYN) { 1728 is->is_s0[source] = ntohl(tcp->th_seq) + 1; 1729 if ((TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) 1730 (void) fr_tcpoptions(fin, tcp, tdata); 1731 1732 if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN)) 1733 fr_checknewisn(fin, is); 1734 1735 } 1736 ret = 1; 1737 } else 1738 fin->fin_flx |= FI_OOW; 1739 MUTEX_EXIT(&is->is_lock); 1740 return ret; 1741 } 1742 1743 1744 /* ------------------------------------------------------------------------ */ 1745 /* Function: fr_checknewisn */ 1746 /* Returns: Nil */ 1747 /* Parameters: fin(I) - pointer to packet information */ 1748 /* is(I) - pointer to master state structure */ 1749 /* */ 1750 /* Check to see if this TCP connection is expecting and needs a new */ 1751 /* sequence number for a particular direction of the connection. */ 1752 /* */ 1753 /* NOTE: This does not actually change the sequence numbers, only gets new */ 1754 /* one ready. */ 1755 /* ------------------------------------------------------------------------ */ 1756 static void fr_checknewisn(fin, is) 1757 fr_info_t *fin; 1758 ipstate_t *is; 1759 { 1760 u_32_t sumd, old, new; 1761 tcphdr_t *tcp; 1762 int i; 1763 1764 i = fin->fin_rev; 1765 tcp = fin->fin_dp; 1766 1767 if (((i == 0) && !(is->is_flags & IS_ISNSYN)) || 1768 ((i == 1) && !(is->is_flags & IS_ISNACK))) { 1769 old = ntohl(tcp->th_seq); 1770 new = fr_newisn(fin); 1771 is->is_isninc[i] = new - old; 1772 CALC_SUMD(old, new, sumd); 1773 is->is_sumd[i] = (sumd & 0xffff) + (sumd >> 16); 1774 1775 is->is_flags |= ((i == 0) ? IS_ISNSYN : IS_ISNACK); 1776 } 1777 } 1778 1779 1780 /* ------------------------------------------------------------------------ */ 1781 /* Function: fr_tcpinwindow */ 1782 /* Returns: int - 1 == packet inside TCP "window", 0 == not inside. */ 1783 /* Parameters: fin(I) - pointer to packet information */ 1784 /* fdata(I) - pointer to tcp state informatio (forward) */ 1785 /* tdata(I) - pointer to tcp state informatio (reverse) */ 1786 /* tcp(I) - pointer to TCP packet header */ 1787 /* */ 1788 /* Given a packet has matched addresses and ports, check to see if it is */ 1789 /* within the TCP data window. In a show of generosity, allow packets that */ 1790 /* are within the window space behind the current sequence # as well. */ 1791 /* ------------------------------------------------------------------------ */ 1792 int fr_tcpinwindow(fin, fdata, tdata, tcp, flags) 1793 fr_info_t *fin; 1794 tcpdata_t *fdata, *tdata; 1795 tcphdr_t *tcp; 1796 int flags; 1797 { 1798 tcp_seq seq, ack, end; 1799 int ackskew, tcpflags; 1800 u_32_t win, maxwin; 1801 int dsize, inseq; 1802 1803 /* 1804 * Find difference between last checked packet and this packet. 1805 */ 1806 tcpflags = tcp->th_flags; 1807 seq = ntohl(tcp->th_seq); 1808 ack = ntohl(tcp->th_ack); 1809 1810 if (tcpflags & TH_SYN) 1811 win = ntohs(tcp->th_win); 1812 else 1813 win = ntohs(tcp->th_win) << fdata->td_winscale; 1814 1815 /* 1816 * win 0 means the receiving endpoint has closed the window, because it 1817 * has not enough memory to receive data from sender. In such case we 1818 * are pretending window size to be 1 to let TCP probe data through. 1819 * TCP probe data can be either 0 or 1 octet of data, the RFC does not 1820 * state this accurately, so we have to allow 1 octet (win = 1) even if 1821 * the window is closed (win == 0). 1822 */ 1823 if (win == 0) 1824 win = 1; 1825 1826 dsize = fin->fin_dlen - (TCP_OFF(tcp) << 2) + 1827 ((tcpflags & TH_SYN) ? 1 : 0) + ((tcpflags & TH_FIN) ? 1 : 0); 1828 1829 /* 1830 * if window scaling is present, the scaling is only allowed 1831 * for windows not in the first SYN packet. In that packet the 1832 * window is 65535 to specify the largest window possible 1833 * for receivers not implementing the window scale option. 1834 * Currently, we do not assume TTCP here. That means that 1835 * if we see a second packet from a host (after the initial 1836 * SYN), we can assume that the receiver of the SYN did 1837 * already send back the SYN/ACK (and thus that we know if 1838 * the receiver also does window scaling) 1839 */ 1840 if (!(tcpflags & TH_SYN) && (fdata->td_winflags & TCP_WSCALE_FIRST)) { 1841 fdata->td_maxwin = win; 1842 } 1843 1844 end = seq + dsize; 1845 1846 if ((fdata->td_end == 0) && 1847 (!(flags & IS_TCPFSM) || 1848 ((tcpflags & TH_OPENING) == TH_OPENING))) { 1849 /* 1850 * Must be a (outgoing) SYN-ACK in reply to a SYN. 1851 */ 1852 fdata->td_end = end - 1; 1853 fdata->td_maxwin = 1; 1854 fdata->td_maxend = end + win; 1855 } 1856 1857 if (!(tcpflags & TH_ACK)) { /* Pretend an ack was sent */ 1858 ack = tdata->td_end; 1859 } else if (((tcpflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) && 1860 (ack == 0)) { 1861 /* gross hack to get around certain broken tcp stacks */ 1862 ack = tdata->td_end; 1863 } 1864 1865 maxwin = tdata->td_maxwin; 1866 ackskew = tdata->td_end - ack; 1867 1868 /* 1869 * Strict sequencing only allows in-order delivery. 1870 */ 1871 if ((flags & IS_STRICT) != 0) { 1872 if (seq != fdata->td_end) { 1873 DTRACE_PROBE(strict_check); 1874 return 0; 1875 } 1876 } 1877 1878 #define SEQ_GE(a,b) ((int)((a) - (b)) >= 0) 1879 #define SEQ_GT(a,b) ((int)((a) - (b)) > 0) 1880 inseq = 0; 1881 DTRACE_PROBE4( 1882 dyn_params, 1883 int, dsize, 1884 int, ackskew, 1885 int, maxwin, 1886 int, win 1887 ); 1888 if ( 1889 #if defined(_KERNEL) 1890 /* 1891 * end <-> s + n 1892 * maxend <-> ack + win 1893 * this is upperbound check 1894 */ 1895 (SEQ_GE(fdata->td_maxend, end)) && 1896 /* 1897 * this is lowerbound check 1898 */ 1899 (SEQ_GE(seq, fdata->td_end - maxwin)) && 1900 #endif 1901 /* XXX what about big packets */ 1902 #define MAXACKWINDOW 66000 1903 (-ackskew <= (MAXACKWINDOW << fdata->td_winscale)) && 1904 ( ackskew <= (MAXACKWINDOW << fdata->td_winscale))) { 1905 inseq = 1; 1906 /* 1907 * Microsoft Windows will send the next packet to the right of the 1908 * window if SACK is in use. 1909 */ 1910 } else if ((seq == fdata->td_maxend) && (ackskew == 0) && 1911 (fdata->td_winflags & TCP_SACK_PERMIT) && 1912 (tdata->td_winflags & TCP_SACK_PERMIT)) { 1913 inseq = 1; 1914 /* 1915 * RST ACK with SEQ equal to 0 is sent by some OSes (i.e. Solaris) as a 1916 * response to initial SYN packet, when there is no application 1917 * listeing to on a port, where the SYN packet has came to. 1918 */ 1919 } else if ((seq == 0) && (tcpflags == (TH_RST|TH_ACK)) && 1920 (ackskew >= -1) && (ackskew <= 1)) { 1921 inseq = 1; 1922 } else if (!(flags & IS_TCPFSM)) { 1923 1924 if (!(fdata->td_winflags & 1925 (TCP_WSCALE_SEEN|TCP_WSCALE_FIRST))) { 1926 /* 1927 * No TCPFSM and no window scaling, so make some 1928 * extra guesses. 1929 */ 1930 if ((seq == fdata->td_maxend) && (ackskew == 0)) 1931 inseq = 1; 1932 else if (SEQ_GE(seq + maxwin, fdata->td_end - maxwin)) 1933 inseq = 1; 1934 } 1935 } 1936 1937 if (inseq) { 1938 /* if ackskew < 0 then this should be due to fragmented 1939 * packets. There is no way to know the length of the 1940 * total packet in advance. 1941 * We do know the total length from the fragment cache though. 1942 * Note however that there might be more sessions with 1943 * exactly the same source and destination parameters in the 1944 * state cache (and source and destination is the only stuff 1945 * that is saved in the fragment cache). Note further that 1946 * some TCP connections in the state cache are hashed with 1947 * sport and dport as well which makes it not worthwhile to 1948 * look for them. 1949 * Thus, when ackskew is negative but still seems to belong 1950 * to this session, we bump up the destinations end value. 1951 */ 1952 if (ackskew < 0) { 1953 DTRACE_PROBE2(end_update_td, 1954 int, tdata->td_end, 1955 int, ack 1956 ); 1957 tdata->td_end = ack; 1958 } 1959 1960 /* update max window seen */ 1961 if (fdata->td_maxwin < win) { 1962 DTRACE_PROBE2(win_update_fd, 1963 int, fdata->td_maxwin, 1964 int, win 1965 ); 1966 fdata->td_maxwin = win; 1967 } 1968 1969 if (SEQ_GT(end, fdata->td_end)) { 1970 DTRACE_PROBE2(end_update_fd, 1971 int, fdata->td_end, 1972 int, end 1973 ); 1974 fdata->td_end = end; 1975 } 1976 1977 if (SEQ_GE(ack + win, tdata->td_maxend)) { 1978 DTRACE_PROBE2(max_end_update_td, 1979 int, tdata->td_maxend, 1980 int, ack + win 1981 ); 1982 tdata->td_maxend = ack + win; 1983 } 1984 1985 return 1; 1986 } 1987 fin->fin_flx |= FI_OOW; 1988 1989 #if defined(_KERNEL) 1990 if (!(SEQ_GE(seq, fdata->td_end - maxwin))) 1991 fin->fin_flx |= FI_NEG_OOW; 1992 #endif 1993 1994 return 0; 1995 } 1996 1997 1998 /* ------------------------------------------------------------------------ */ 1999 /* Function: fr_stclone */ 2000 /* Returns: ipstate_t* - NULL == cloning failed, */ 2001 /* else pointer to new state structure */ 2002 /* Parameters: fin(I) - pointer to packet information */ 2003 /* tcp(I) - pointer to TCP/UDP header */ 2004 /* is(I) - pointer to master state structure */ 2005 /* */ 2006 /* Create a "duplcate" state table entry from the master. */ 2007 /* ------------------------------------------------------------------------ */ 2008 static ipstate_t *fr_stclone(fin, tcp, is) 2009 fr_info_t *fin; 2010 tcphdr_t *tcp; 2011 ipstate_t *is; 2012 { 2013 ipstate_t *clone; 2014 u_32_t send; 2015 ipf_stack_t *ifs = fin->fin_ifs; 2016 2017 /* 2018 * Trigger automatic call to fr_state_flush() if the 2019 * table has reached capacity specified by hi watermark. 2020 */ 2021 if (ST_TAB_WATER_LEVEL(ifs) > ifs->ifs_state_flush_level_hi) 2022 ifs->ifs_fr_state_doflush = 1; 2023 2024 /* 2025 * If automatic flushing did not do its job, and the table 2026 * has filled up, don't try to create a new entry. A NULL 2027 * return will indicate that the cloning has failed. 2028 */ 2029 if (ifs->ifs_ips_num >= ifs->ifs_fr_statemax) { 2030 ATOMIC_INCL(ifs->ifs_ips_stats.iss_max); 2031 return NULL; 2032 } 2033 2034 KMALLOC(clone, ipstate_t *); 2035 if (clone == NULL) 2036 return NULL; 2037 bcopy((char *)is, (char *)clone, sizeof(*clone)); 2038 2039 MUTEX_NUKE(&clone->is_lock); 2040 2041 clone->is_die = ONE_DAY + ifs->ifs_fr_ticks; 2042 clone->is_state[0] = 0; 2043 clone->is_state[1] = 0; 2044 send = ntohl(tcp->th_seq) + fin->fin_dlen - (TCP_OFF(tcp) << 2) + 2045 ((tcp->th_flags & TH_SYN) ? 1 : 0) + 2046 ((tcp->th_flags & TH_FIN) ? 1 : 0); 2047 2048 if (fin->fin_rev == 1) { 2049 clone->is_dend = send; 2050 clone->is_maxdend = send; 2051 clone->is_send = 0; 2052 clone->is_maxswin = 1; 2053 clone->is_maxdwin = ntohs(tcp->th_win); 2054 if (clone->is_maxdwin == 0) 2055 clone->is_maxdwin = 1; 2056 } else { 2057 clone->is_send = send; 2058 clone->is_maxsend = send; 2059 clone->is_dend = 0; 2060 clone->is_maxdwin = 1; 2061 clone->is_maxswin = ntohs(tcp->th_win); 2062 if (clone->is_maxswin == 0) 2063 clone->is_maxswin = 1; 2064 } 2065 2066 clone->is_flags &= ~SI_CLONE; 2067 clone->is_flags |= SI_CLONED; 2068 fr_stinsert(clone, fin->fin_rev, ifs); 2069 clone->is_ref = 1; 2070 if (clone->is_p == IPPROTO_TCP) { 2071 (void) fr_tcp_age(&clone->is_sti, fin, ifs->ifs_ips_tqtqb, 2072 clone->is_flags); 2073 } 2074 MUTEX_EXIT(&clone->is_lock); 2075 #ifdef IPFILTER_SCAN 2076 (void) ipsc_attachis(is); 2077 #endif 2078 #ifdef IPFILTER_SYNC 2079 if (is->is_flags & IS_STATESYNC) 2080 clone->is_sync = ipfsync_new(SMC_STATE, fin, clone); 2081 #endif 2082 return clone; 2083 } 2084 2085 2086 /* ------------------------------------------------------------------------ */ 2087 /* Function: fr_matchsrcdst */ 2088 /* Returns: Nil */ 2089 /* Parameters: fin(I) - pointer to packet information */ 2090 /* is(I) - pointer to state structure */ 2091 /* src(I) - pointer to source address */ 2092 /* dst(I) - pointer to destination address */ 2093 /* tcp(I) - pointer to TCP/UDP header */ 2094 /* */ 2095 /* Match a state table entry against an IP packet. The logic below is that */ 2096 /* ret gets set to one if the match succeeds, else remains 0. If it is */ 2097 /* still 0 after the test. no match. */ 2098 /* ------------------------------------------------------------------------ */ 2099 static ipstate_t *fr_matchsrcdst(fin, is, src, dst, tcp, cmask) 2100 fr_info_t *fin; 2101 ipstate_t *is; 2102 i6addr_t *src, *dst; 2103 tcphdr_t *tcp; 2104 u_32_t cmask; 2105 { 2106 int ret = 0, rev, out, flags, flx = 0, idx; 2107 u_short sp, dp; 2108 u_32_t cflx; 2109 void *ifp; 2110 ipf_stack_t *ifs = fin->fin_ifs; 2111 2112 rev = IP6_NEQ(&is->is_dst, dst); 2113 ifp = fin->fin_ifp; 2114 out = fin->fin_out; 2115 flags = is->is_flags; 2116 sp = 0; 2117 dp = 0; 2118 2119 if (tcp != NULL) { 2120 sp = htons(fin->fin_sport); 2121 dp = ntohs(fin->fin_dport); 2122 } 2123 if (!rev) { 2124 if (tcp != NULL) { 2125 if (!(flags & SI_W_SPORT) && (sp != is->is_sport)) 2126 rev = 1; 2127 else if (!(flags & SI_W_DPORT) && (dp != is->is_dport)) 2128 rev = 1; 2129 } 2130 } 2131 2132 idx = (out << 1) + rev; 2133 2134 /* 2135 * If the interface for this 'direction' is set, make sure it matches. 2136 * An interface name that is not set matches any, as does a name of *. 2137 */ 2138 if ((is->is_ifp[idx] == NULL && 2139 (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) || 2140 is->is_ifp[idx] == ifp) 2141 ret = 1; 2142 2143 if (ret == 0) { 2144 DTRACE_PROBE(no_match_on_iface); 2145 return NULL; 2146 } 2147 ret = 0; 2148 2149 /* 2150 * Match addresses and ports. 2151 */ 2152 if (rev == 0) { 2153 if ((IP6_EQ(&is->is_dst, dst) || (flags & SI_W_DADDR)) && 2154 (IP6_EQ(&is->is_src, src) || (flags & SI_W_SADDR))) { 2155 if (tcp) { 2156 if ((sp == is->is_sport || flags & SI_W_SPORT)&& 2157 (dp == is->is_dport || flags & SI_W_DPORT)) 2158 ret = 1; 2159 } else { 2160 ret = 1; 2161 } 2162 } 2163 } else { 2164 if ((IP6_EQ(&is->is_dst, src) || (flags & SI_W_DADDR)) && 2165 (IP6_EQ(&is->is_src, dst) || (flags & SI_W_SADDR))) { 2166 if (tcp) { 2167 if ((dp == is->is_sport || flags & SI_W_SPORT)&& 2168 (sp == is->is_dport || flags & SI_W_DPORT)) 2169 ret = 1; 2170 } else { 2171 ret = 1; 2172 } 2173 } 2174 } 2175 2176 if (ret == 0) { 2177 DTRACE_PROBE(no_match_on_addrs); 2178 return NULL; 2179 } 2180 /* 2181 * Whether or not this should be here, is questionable, but the aim 2182 * is to get this out of the main line. 2183 */ 2184 if (tcp == NULL) 2185 flags = is->is_flags & ~(SI_WILDP|SI_NEWFR|SI_CLONE|SI_CLONED); 2186 2187 /* 2188 * Only one of the source or destination address can be flaged as a 2189 * wildcard. Fill in the missing address, if set. 2190 * For IPv6, if the address being copied in is multicast, then 2191 * don't reset the wild flag - multicast causes it to be set in the 2192 * first place! 2193 */ 2194 if ((flags & (SI_W_SADDR|SI_W_DADDR))) { 2195 fr_ip_t *fi = &fin->fin_fi; 2196 2197 if ((flags & SI_W_SADDR) != 0) { 2198 if (rev == 0) { 2199 #ifdef USE_INET6 2200 if (is->is_v == 6 && 2201 IN6_IS_ADDR_MULTICAST(&fi->fi_src.in6)) 2202 /*EMPTY*/; 2203 else 2204 #endif 2205 { 2206 is->is_src = fi->fi_src; 2207 is->is_flags &= ~SI_W_SADDR; 2208 } 2209 } else { 2210 #ifdef USE_INET6 2211 if (is->is_v == 6 && 2212 IN6_IS_ADDR_MULTICAST(&fi->fi_dst.in6)) 2213 /*EMPTY*/; 2214 else 2215 #endif 2216 { 2217 is->is_src = fi->fi_dst; 2218 is->is_flags &= ~SI_W_SADDR; 2219 } 2220 } 2221 } else if ((flags & SI_W_DADDR) != 0) { 2222 if (rev == 0) { 2223 #ifdef USE_INET6 2224 if (is->is_v == 6 && 2225 IN6_IS_ADDR_MULTICAST(&fi->fi_dst.in6)) 2226 /*EMPTY*/; 2227 else 2228 #endif 2229 { 2230 is->is_dst = fi->fi_dst; 2231 is->is_flags &= ~SI_W_DADDR; 2232 } 2233 } else { 2234 #ifdef USE_INET6 2235 if (is->is_v == 6 && 2236 IN6_IS_ADDR_MULTICAST(&fi->fi_src.in6)) 2237 /*EMPTY*/; 2238 else 2239 #endif 2240 { 2241 is->is_dst = fi->fi_src; 2242 is->is_flags &= ~SI_W_DADDR; 2243 } 2244 } 2245 } 2246 if ((is->is_flags & (SI_WILDA|SI_WILDP)) == 0) { 2247 ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild); 2248 } 2249 } 2250 2251 flx = fin->fin_flx & cmask; 2252 cflx = is->is_flx[out][rev]; 2253 2254 /* 2255 * Match up any flags set from IP options. 2256 */ 2257 if ((cflx && (flx != (cflx & cmask))) || 2258 ((fin->fin_optmsk & is->is_optmsk[rev]) != is->is_opt[rev]) || 2259 ((fin->fin_secmsk & is->is_secmsk) != is->is_sec) || 2260 ((fin->fin_auth & is->is_authmsk) != is->is_auth)) { 2261 DTRACE_PROBE4(no_match_on_flags, 2262 int, (cflx && (flx != (cflx & cmask))), 2263 int, 2264 ((fin->fin_optmsk & is->is_optmsk[rev]) != is->is_opt[rev]), 2265 int, ((fin->fin_secmsk & is->is_secmsk) != is->is_sec), 2266 int, ((fin->fin_auth & is->is_authmsk) != is->is_auth) 2267 ); 2268 return NULL; 2269 } 2270 /* 2271 * Only one of the source or destination port can be flagged as a 2272 * wildcard. When filling it in, fill in a copy of the matched entry 2273 * if it has the cloning flag set. 2274 */ 2275 if ((fin->fin_flx & FI_IGNORE) != 0) { 2276 fin->fin_rev = rev; 2277 return is; 2278 } 2279 2280 if ((flags & (SI_W_SPORT|SI_W_DPORT))) { 2281 if ((flags & SI_CLONE) != 0) { 2282 ipstate_t *clone; 2283 2284 clone = fr_stclone(fin, tcp, is); 2285 if (clone == NULL) 2286 return NULL; 2287 is = clone; 2288 } else { 2289 ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild); 2290 } 2291 2292 if ((flags & SI_W_SPORT) != 0) { 2293 if (rev == 0) { 2294 is->is_sport = sp; 2295 is->is_send = ntohl(tcp->th_seq); 2296 } else { 2297 is->is_sport = dp; 2298 is->is_send = ntohl(tcp->th_ack); 2299 } 2300 is->is_maxsend = is->is_send + 1; 2301 } else if ((flags & SI_W_DPORT) != 0) { 2302 if (rev == 0) { 2303 is->is_dport = dp; 2304 is->is_dend = ntohl(tcp->th_ack); 2305 } else { 2306 is->is_dport = sp; 2307 is->is_dend = ntohl(tcp->th_seq); 2308 } 2309 is->is_maxdend = is->is_dend + 1; 2310 } 2311 is->is_flags &= ~(SI_W_SPORT|SI_W_DPORT); 2312 if ((flags & SI_CLONED) && ifs->ifs_ipstate_logging) 2313 ipstate_log(is, ISL_CLONE, ifs); 2314 } 2315 2316 ret = -1; 2317 2318 if (is->is_flx[out][rev] == 0) { 2319 is->is_flx[out][rev] = flx; 2320 /* 2321 * If we are dealing with the first packet coming in reverse 2322 * direction (sent by peer), then we have to set options into 2323 * state. 2324 */ 2325 if (rev == 1 && is->is_optmsk[1] == 0x0) { 2326 is->is_optmsk[1] = 0xffffffff; 2327 is->is_opt[1] = fin->fin_optmsk; 2328 DTRACE_PROBE(set_rev_opts); 2329 } 2330 if (is->is_v == 6) { 2331 is->is_opt[rev] &= ~0x8; 2332 is->is_optmsk[rev] &= ~0x8; 2333 } 2334 } 2335 2336 /* 2337 * Check if the interface name for this "direction" is set and if not, 2338 * fill it in. 2339 */ 2340 if (is->is_ifp[idx] == NULL && 2341 (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) { 2342 is->is_ifp[idx] = ifp; 2343 COPYIFNAME(ifp, is->is_ifname[idx], fin->fin_v); 2344 } 2345 fin->fin_rev = rev; 2346 return is; 2347 } 2348 2349 2350 /* ------------------------------------------------------------------------ */ 2351 /* Function: fr_checkicmpmatchingstate */ 2352 /* Returns: Nil */ 2353 /* Parameters: fin(I) - pointer to packet information */ 2354 /* */ 2355 /* If we've got an ICMP error message, using the information stored in the */ 2356 /* ICMP packet, look for a matching state table entry. */ 2357 /* */ 2358 /* If we return NULL then no lock on ipf_state is held. */ 2359 /* If we return non-null then a read-lock on ipf_state is held. */ 2360 /* ------------------------------------------------------------------------ */ 2361 static ipstate_t *fr_checkicmpmatchingstate(fin) 2362 fr_info_t *fin; 2363 { 2364 ipstate_t *is, **isp; 2365 u_short sport, dport; 2366 u_char pr; 2367 int backward, i, oi; 2368 i6addr_t dst, src; 2369 struct icmp *ic; 2370 u_short savelen; 2371 icmphdr_t *icmp; 2372 fr_info_t ofin; 2373 tcphdr_t *tcp; 2374 int len; 2375 ip_t *oip; 2376 u_int hv; 2377 ipf_stack_t *ifs = fin->fin_ifs; 2378 2379 /* 2380 * Does it at least have the return (basic) IP header ? 2381 * Is it an actual recognised ICMP error type? 2382 * Only a basic IP header (no options) should be with 2383 * an ICMP error header. 2384 */ 2385 if ((fin->fin_v != 4) || (fin->fin_hlen != sizeof(ip_t)) || 2386 (fin->fin_plen < ICMPERR_MINPKTLEN) || 2387 !(fin->fin_flx & FI_ICMPERR)) 2388 return NULL; 2389 ic = fin->fin_dp; 2390 2391 oip = (ip_t *)((char *)ic + ICMPERR_ICMPHLEN); 2392 /* 2393 * Check if the at least the old IP header (with options) and 2394 * 8 bytes of payload is present. 2395 */ 2396 if (fin->fin_plen < ICMPERR_MAXPKTLEN + ((IP_HL(oip) - 5) << 2)) 2397 return NULL; 2398 2399 /* 2400 * Sanity Checks. 2401 */ 2402 len = fin->fin_dlen - ICMPERR_ICMPHLEN; 2403 if ((len <= 0) || ((IP_HL(oip) << 2) > len)) 2404 return NULL; 2405 2406 /* 2407 * Is the buffer big enough for all of it ? It's the size of the IP 2408 * header claimed in the encapsulated part which is of concern. It 2409 * may be too big to be in this buffer but not so big that it's 2410 * outside the ICMP packet, leading to TCP deref's causing problems. 2411 * This is possible because we don't know how big oip_hl is when we 2412 * do the pullup early in fr_check() and thus can't guarantee it is 2413 * all here now. 2414 */ 2415 #ifdef _KERNEL 2416 { 2417 mb_t *m; 2418 2419 m = fin->fin_m; 2420 # if defined(MENTAT) 2421 if ((char *)oip + len > (char *)m->b_wptr) 2422 return NULL; 2423 # else 2424 if ((char *)oip + len > (char *)fin->fin_ip + m->m_len) 2425 return NULL; 2426 # endif 2427 } 2428 #endif 2429 bcopy((char *)fin, (char *)&ofin, sizeof(*fin)); 2430 2431 /* 2432 * in the IPv4 case we must zero the i6addr union otherwise 2433 * the IP6_EQ and IP6_NEQ macros produce the wrong results because 2434 * of the 'junk' in the unused part of the union 2435 */ 2436 bzero((char *)&src, sizeof(src)); 2437 bzero((char *)&dst, sizeof(dst)); 2438 2439 /* 2440 * we make an fin entry to be able to feed it to 2441 * matchsrcdst note that not all fields are encessary 2442 * but this is the cleanest way. Note further we fill 2443 * in fin_mp such that if someone uses it we'll get 2444 * a kernel panic. fr_matchsrcdst does not use this. 2445 * 2446 * watch out here, as ip is in host order and oip in network 2447 * order. Any change we make must be undone afterwards, like 2448 * oip->ip_off - it is still in network byte order so fix it. 2449 */ 2450 savelen = oip->ip_len; 2451 oip->ip_len = len; 2452 oip->ip_off = ntohs(oip->ip_off); 2453 2454 ofin.fin_flx = FI_NOCKSUM; 2455 ofin.fin_v = 4; 2456 ofin.fin_ip = oip; 2457 ofin.fin_m = NULL; /* if dereferenced, panic XXX */ 2458 ofin.fin_mp = NULL; /* if dereferenced, panic XXX */ 2459 ofin.fin_plen = fin->fin_dlen - ICMPERR_ICMPHLEN; 2460 (void) fr_makefrip(IP_HL(oip) << 2, oip, &ofin); 2461 ofin.fin_ifp = fin->fin_ifp; 2462 ofin.fin_out = !fin->fin_out; 2463 /* 2464 * Reset the short and bad flag here because in fr_matchsrcdst() 2465 * the flags for the current packet (fin_flx) are compared against 2466 * those for the existing session. 2467 */ 2468 ofin.fin_flx &= ~(FI_BAD|FI_SHORT); 2469 2470 /* 2471 * Put old values of ip_len and ip_off back as we don't know 2472 * if we have to forward the packet (or process it again. 2473 */ 2474 oip->ip_len = savelen; 2475 oip->ip_off = htons(oip->ip_off); 2476 2477 switch (oip->ip_p) 2478 { 2479 case IPPROTO_ICMP : 2480 /* 2481 * an ICMP error can only be generated as a result of an 2482 * ICMP query, not as the response on an ICMP error 2483 * 2484 * XXX theoretically ICMP_ECHOREP and the other reply's are 2485 * ICMP query's as well, but adding them here seems strange XXX 2486 */ 2487 if ((ofin.fin_flx & FI_ICMPERR) != 0) 2488 return NULL; 2489 2490 /* 2491 * perform a lookup of the ICMP packet in the state table 2492 */ 2493 icmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2)); 2494 hv = (pr = oip->ip_p); 2495 src.in4 = oip->ip_src; 2496 hv += src.in4.s_addr; 2497 dst.in4 = oip->ip_dst; 2498 hv += dst.in4.s_addr; 2499 hv += icmp->icmp_id; 2500 hv = DOUBLE_HASH(hv, ifs); 2501 2502 READ_ENTER(&ifs->ifs_ipf_state); 2503 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 2504 isp = &is->is_hnext; 2505 if ((is->is_p != pr) || (is->is_v != 4)) 2506 continue; 2507 if (is->is_pass & FR_NOICMPERR) 2508 continue; 2509 is = fr_matchsrcdst(&ofin, is, &src, &dst, 2510 NULL, FI_ICMPCMP); 2511 if (is != NULL) { 2512 if ((is->is_pass & FR_NOICMPERR) != 0) { 2513 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2514 return NULL; 2515 } 2516 /* 2517 * i : the index of this packet (the icmp 2518 * unreachable) 2519 * oi : the index of the original packet found 2520 * in the icmp header (i.e. the packet 2521 * causing this icmp) 2522 * backward : original packet was backward 2523 * compared to the state 2524 */ 2525 backward = IP6_NEQ(&is->is_src, &src); 2526 fin->fin_rev = !backward; 2527 i = (!backward << 1) + fin->fin_out; 2528 oi = (backward << 1) + ofin.fin_out; 2529 if (is->is_icmppkts[i] > is->is_pkts[oi]) 2530 continue; 2531 ifs->ifs_ips_stats.iss_hits++; 2532 is->is_icmppkts[i]++; 2533 return is; 2534 } 2535 } 2536 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2537 return NULL; 2538 case IPPROTO_TCP : 2539 case IPPROTO_UDP : 2540 break; 2541 default : 2542 return NULL; 2543 } 2544 2545 tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2)); 2546 dport = tcp->th_dport; 2547 sport = tcp->th_sport; 2548 2549 hv = (pr = oip->ip_p); 2550 src.in4 = oip->ip_src; 2551 hv += src.in4.s_addr; 2552 dst.in4 = oip->ip_dst; 2553 hv += dst.in4.s_addr; 2554 hv += dport; 2555 hv += sport; 2556 hv = DOUBLE_HASH(hv, ifs); 2557 2558 READ_ENTER(&ifs->ifs_ipf_state); 2559 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 2560 isp = &is->is_hnext; 2561 /* 2562 * Only allow this icmp though if the 2563 * encapsulated packet was allowed through the 2564 * other way around. Note that the minimal amount 2565 * of info present does not allow for checking against 2566 * tcp internals such as seq and ack numbers. Only the 2567 * ports are known to be present and can be even if the 2568 * short flag is set. 2569 */ 2570 if ((is->is_p == pr) && (is->is_v == 4) && 2571 (is = fr_matchsrcdst(&ofin, is, &src, &dst, 2572 tcp, FI_ICMPCMP))) { 2573 /* 2574 * i : the index of this packet (the icmp unreachable) 2575 * oi : the index of the original packet found in the 2576 * icmp header (i.e. the packet causing this icmp) 2577 * backward : original packet was backward compared to 2578 * the state 2579 */ 2580 backward = IP6_NEQ(&is->is_src, &src); 2581 fin->fin_rev = !backward; 2582 i = (!backward << 1) + fin->fin_out; 2583 oi = (backward << 1) + ofin.fin_out; 2584 2585 if (((is->is_pass & FR_NOICMPERR) != 0) || 2586 (is->is_icmppkts[i] > is->is_pkts[oi])) 2587 break; 2588 ifs->ifs_ips_stats.iss_hits++; 2589 is->is_icmppkts[i]++; 2590 /* 2591 * we deliberately do not touch the timeouts 2592 * for the accompanying state table entry. 2593 * It remains to be seen if that is correct. XXX 2594 */ 2595 return is; 2596 } 2597 } 2598 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2599 return NULL; 2600 } 2601 2602 2603 /* ------------------------------------------------------------------------ */ 2604 /* Function: fr_ipsmove */ 2605 /* Returns: Nil */ 2606 /* Parameters: is(I) - pointer to state table entry */ 2607 /* hv(I) - new hash value for state table entry */ 2608 /* Write Locks: ipf_state */ 2609 /* */ 2610 /* Move a state entry from one position in the hash table to another. */ 2611 /* ------------------------------------------------------------------------ */ 2612 static void fr_ipsmove(is, hv, ifs) 2613 ipstate_t *is; 2614 u_int hv; 2615 ipf_stack_t *ifs; 2616 { 2617 ipstate_t **isp; 2618 u_int hvm; 2619 2620 ASSERT(rw_read_locked(&ifs->ifs_ipf_state.ipf_lk) == 0); 2621 2622 hvm = is->is_hv; 2623 /* 2624 * Remove the hash from the old location... 2625 */ 2626 isp = is->is_phnext; 2627 if (is->is_hnext) 2628 is->is_hnext->is_phnext = isp; 2629 *isp = is->is_hnext; 2630 if (ifs->ifs_ips_table[hvm] == NULL) 2631 ifs->ifs_ips_stats.iss_inuse--; 2632 ifs->ifs_ips_stats.iss_bucketlen[hvm]--; 2633 2634 /* 2635 * ...and put the hash in the new one. 2636 */ 2637 hvm = DOUBLE_HASH(hv, ifs); 2638 is->is_hv = hvm; 2639 isp = &ifs->ifs_ips_table[hvm]; 2640 if (*isp) 2641 (*isp)->is_phnext = &is->is_hnext; 2642 else 2643 ifs->ifs_ips_stats.iss_inuse++; 2644 ifs->ifs_ips_stats.iss_bucketlen[hvm]++; 2645 is->is_phnext = isp; 2646 is->is_hnext = *isp; 2647 *isp = is; 2648 } 2649 2650 2651 /* ------------------------------------------------------------------------ */ 2652 /* Function: fr_stlookup */ 2653 /* Returns: ipstate_t* - NULL == no matching state found, */ 2654 /* else pointer to state information is returned */ 2655 /* Parameters: fin(I) - pointer to packet information */ 2656 /* tcp(I) - pointer to TCP/UDP header. */ 2657 /* */ 2658 /* Search the state table for a matching entry to the packet described by */ 2659 /* the contents of *fin. */ 2660 /* */ 2661 /* If we return NULL then no lock on ipf_state is held. */ 2662 /* If we return non-null then a read-lock on ipf_state is held. */ 2663 /* ------------------------------------------------------------------------ */ 2664 ipstate_t *fr_stlookup(fin, tcp, ifqp) 2665 fr_info_t *fin; 2666 tcphdr_t *tcp; 2667 ipftq_t **ifqp; 2668 { 2669 u_int hv, hvm, pr, v, tryagain; 2670 ipstate_t *is, **isp; 2671 u_short dport, sport; 2672 i6addr_t src, dst; 2673 struct icmp *ic; 2674 ipftq_t *ifq; 2675 int oow; 2676 ipf_stack_t *ifs = fin->fin_ifs; 2677 2678 is = NULL; 2679 ifq = NULL; 2680 tcp = fin->fin_dp; 2681 ic = (struct icmp *)tcp; 2682 hv = (pr = fin->fin_fi.fi_p); 2683 src = fin->fin_fi.fi_src; 2684 dst = fin->fin_fi.fi_dst; 2685 hv += src.in4.s_addr; 2686 hv += dst.in4.s_addr; 2687 2688 v = fin->fin_fi.fi_v; 2689 #ifdef USE_INET6 2690 if (v == 6) { 2691 hv += fin->fin_fi.fi_src.i6[1]; 2692 hv += fin->fin_fi.fi_src.i6[2]; 2693 hv += fin->fin_fi.fi_src.i6[3]; 2694 2695 if ((fin->fin_p == IPPROTO_ICMPV6) && 2696 IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_dst.in6)) { 2697 hv -= dst.in4.s_addr; 2698 } else { 2699 hv += fin->fin_fi.fi_dst.i6[1]; 2700 hv += fin->fin_fi.fi_dst.i6[2]; 2701 hv += fin->fin_fi.fi_dst.i6[3]; 2702 } 2703 } 2704 #endif 2705 if ((v == 4) && 2706 (fin->fin_flx & (FI_MULTICAST|FI_BROADCAST|FI_MBCAST))) { 2707 if (fin->fin_out == 0) { 2708 hv -= src.in4.s_addr; 2709 } else { 2710 hv -= dst.in4.s_addr; 2711 } 2712 } 2713 2714 /* 2715 * Search the hash table for matching packet header info. 2716 */ 2717 switch (pr) 2718 { 2719 #ifdef USE_INET6 2720 case IPPROTO_ICMPV6 : 2721 tryagain = 0; 2722 if (v == 6) { 2723 if ((ic->icmp_type == ICMP6_ECHO_REQUEST) || 2724 (ic->icmp_type == ICMP6_ECHO_REPLY)) { 2725 hv += ic->icmp_id; 2726 } 2727 } 2728 READ_ENTER(&ifs->ifs_ipf_state); 2729 icmp6again: 2730 hvm = DOUBLE_HASH(hv, ifs); 2731 for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) { 2732 isp = &is->is_hnext; 2733 if ((is->is_p != pr) || (is->is_v != v)) 2734 continue; 2735 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); 2736 if (is != NULL && 2737 fr_matchicmpqueryreply(v, &is->is_icmp, 2738 ic, fin->fin_rev)) { 2739 if (fin->fin_rev) 2740 ifq = &ifs->ifs_ips_icmpacktq; 2741 else 2742 ifq = &ifs->ifs_ips_icmptq; 2743 break; 2744 } 2745 } 2746 2747 if (is != NULL) { 2748 if ((tryagain != 0) && !(is->is_flags & SI_W_DADDR)) { 2749 hv += fin->fin_fi.fi_src.i6[0]; 2750 hv += fin->fin_fi.fi_src.i6[1]; 2751 hv += fin->fin_fi.fi_src.i6[2]; 2752 hv += fin->fin_fi.fi_src.i6[3]; 2753 fr_ipsmove(is, hv, ifs); 2754 MUTEX_DOWNGRADE(&ifs->ifs_ipf_state); 2755 } 2756 break; 2757 } 2758 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2759 2760 /* 2761 * No matching icmp state entry. Perhaps this is a 2762 * response to another state entry. 2763 * 2764 * XXX With some ICMP6 packets, the "other" address is already 2765 * in the packet, after the ICMP6 header, and this could be 2766 * used in place of the multicast address. However, taking 2767 * advantage of this requires some significant code changes 2768 * to handle the specific types where that is the case. 2769 */ 2770 if ((ifs->ifs_ips_stats.iss_wild != 0) && (v == 6) && (tryagain == 0) && 2771 !IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_src.in6)) { 2772 hv -= fin->fin_fi.fi_src.i6[0]; 2773 hv -= fin->fin_fi.fi_src.i6[1]; 2774 hv -= fin->fin_fi.fi_src.i6[2]; 2775 hv -= fin->fin_fi.fi_src.i6[3]; 2776 tryagain = 1; 2777 WRITE_ENTER(&ifs->ifs_ipf_state); 2778 goto icmp6again; 2779 } 2780 2781 is = fr_checkicmp6matchingstate(fin); 2782 if (is != NULL) 2783 return is; 2784 break; 2785 #endif 2786 2787 case IPPROTO_ICMP : 2788 if (v == 4) { 2789 hv += ic->icmp_id; 2790 } 2791 hv = DOUBLE_HASH(hv, ifs); 2792 READ_ENTER(&ifs->ifs_ipf_state); 2793 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 2794 isp = &is->is_hnext; 2795 if ((is->is_p != pr) || (is->is_v != v)) 2796 continue; 2797 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); 2798 if (is != NULL && 2799 fr_matchicmpqueryreply(v, &is->is_icmp, 2800 ic, fin->fin_rev)) { 2801 if (fin->fin_rev) 2802 ifq = &ifs->ifs_ips_icmpacktq; 2803 else 2804 ifq = &ifs->ifs_ips_icmptq; 2805 break; 2806 } 2807 } 2808 if (is == NULL) { 2809 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2810 } 2811 break; 2812 2813 case IPPROTO_TCP : 2814 case IPPROTO_UDP : 2815 ifqp = NULL; 2816 sport = htons(fin->fin_data[0]); 2817 hv += sport; 2818 dport = htons(fin->fin_data[1]); 2819 hv += dport; 2820 oow = 0; 2821 tryagain = 0; 2822 READ_ENTER(&ifs->ifs_ipf_state); 2823 retry_tcpudp: 2824 hvm = DOUBLE_HASH(hv, ifs); 2825 for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) { 2826 isp = &is->is_hnext; 2827 if ((is->is_p != pr) || (is->is_v != v)) 2828 continue; 2829 fin->fin_flx &= ~FI_OOW; 2830 is = fr_matchsrcdst(fin, is, &src, &dst, tcp, FI_CMP); 2831 if (is != NULL) { 2832 if (pr == IPPROTO_TCP) { 2833 if (!fr_tcpstate(fin, tcp, is)) { 2834 oow |= fin->fin_flx & FI_OOW; 2835 continue; 2836 } 2837 } 2838 break; 2839 } 2840 } 2841 if (is != NULL) { 2842 if (tryagain && 2843 !(is->is_flags & (SI_CLONE|SI_WILDP|SI_WILDA))) { 2844 hv += dport; 2845 hv += sport; 2846 fr_ipsmove(is, hv, ifs); 2847 MUTEX_DOWNGRADE(&ifs->ifs_ipf_state); 2848 } 2849 break; 2850 } 2851 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2852 2853 if (ifs->ifs_ips_stats.iss_wild) { 2854 if (tryagain == 0) { 2855 hv -= dport; 2856 hv -= sport; 2857 } else if (tryagain == 1) { 2858 hv = fin->fin_fi.fi_p; 2859 /* 2860 * If we try to pretend this is a reply to a 2861 * multicast/broadcast packet then we need to 2862 * exclude part of the address from the hash 2863 * calculation. 2864 */ 2865 if (fin->fin_out == 0) { 2866 hv += src.in4.s_addr; 2867 } else { 2868 hv += dst.in4.s_addr; 2869 } 2870 hv += dport; 2871 hv += sport; 2872 } 2873 tryagain++; 2874 if (tryagain <= 2) { 2875 WRITE_ENTER(&ifs->ifs_ipf_state); 2876 goto retry_tcpudp; 2877 } 2878 } 2879 fin->fin_flx |= oow; 2880 break; 2881 2882 #if 0 2883 case IPPROTO_GRE : 2884 gre = fin->fin_dp; 2885 if (GRE_REV(gre->gr_flags) == 1) { 2886 hv += gre->gr_call; 2887 } 2888 /* FALLTHROUGH */ 2889 #endif 2890 default : 2891 ifqp = NULL; 2892 hvm = DOUBLE_HASH(hv, ifs); 2893 READ_ENTER(&ifs->ifs_ipf_state); 2894 for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) { 2895 isp = &is->is_hnext; 2896 if ((is->is_p != pr) || (is->is_v != v)) 2897 continue; 2898 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); 2899 if (is != NULL) { 2900 ifq = &ifs->ifs_ips_iptq; 2901 break; 2902 } 2903 } 2904 if (is == NULL) { 2905 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2906 } 2907 break; 2908 } 2909 2910 if ((is != NULL) && ((is->is_sti.tqe_flags & TQE_RULEBASED) != 0) && 2911 (is->is_tqehead[fin->fin_rev] != NULL)) 2912 ifq = is->is_tqehead[fin->fin_rev]; 2913 if (ifq != NULL && ifqp != NULL) 2914 *ifqp = ifq; 2915 return is; 2916 } 2917 2918 2919 /* ------------------------------------------------------------------------ */ 2920 /* Function: fr_updatestate */ 2921 /* Returns: Nil */ 2922 /* Parameters: fin(I) - pointer to packet information */ 2923 /* is(I) - pointer to state table entry */ 2924 /* Read Locks: ipf_state */ 2925 /* */ 2926 /* Updates packet and byte counters for a newly received packet. Seeds the */ 2927 /* fragment cache with a new entry as required. */ 2928 /* ------------------------------------------------------------------------ */ 2929 void fr_updatestate(fin, is, ifq) 2930 fr_info_t *fin; 2931 ipstate_t *is; 2932 ipftq_t *ifq; 2933 { 2934 ipftqent_t *tqe; 2935 int i, pass; 2936 ipf_stack_t *ifs = fin->fin_ifs; 2937 2938 i = (fin->fin_rev << 1) + fin->fin_out; 2939 2940 /* 2941 * For TCP packets, ifq == NULL. For all others, check if this new 2942 * queue is different to the last one it was on and move it if so. 2943 */ 2944 tqe = &is->is_sti; 2945 MUTEX_ENTER(&is->is_lock); 2946 if ((tqe->tqe_flags & TQE_RULEBASED) != 0) 2947 ifq = is->is_tqehead[fin->fin_rev]; 2948 2949 if (ifq != NULL) 2950 fr_movequeue(tqe, tqe->tqe_ifq, ifq, ifs); 2951 2952 is->is_pkts[i]++; 2953 fin->fin_pktnum = is->is_pkts[i] + is->is_icmppkts[i]; 2954 is->is_bytes[i] += fin->fin_plen; 2955 MUTEX_EXIT(&is->is_lock); 2956 2957 #ifdef IPFILTER_SYNC 2958 if (is->is_flags & IS_STATESYNC) 2959 ipfsync_update(SMC_STATE, fin, is->is_sync); 2960 #endif 2961 2962 ATOMIC_INCL(ifs->ifs_ips_stats.iss_hits); 2963 2964 fin->fin_fr = is->is_rule; 2965 2966 /* 2967 * If this packet is a fragment and the rule says to track fragments, 2968 * then create a new fragment cache entry. 2969 */ 2970 pass = is->is_pass; 2971 if ((fin->fin_flx & FI_FRAG) && FR_ISPASS(pass)) 2972 (void) fr_newfrag(fin, pass ^ FR_KEEPSTATE); 2973 } 2974 2975 2976 /* ------------------------------------------------------------------------ */ 2977 /* Function: fr_checkstate */ 2978 /* Returns: frentry_t* - NULL == search failed, */ 2979 /* else pointer to rule for matching state */ 2980 /* Parameters: ifp(I) - pointer to interface */ 2981 /* passp(I) - pointer to filtering result flags */ 2982 /* */ 2983 /* Check if a packet is associated with an entry in the state table. */ 2984 /* ------------------------------------------------------------------------ */ 2985 frentry_t *fr_checkstate(fin, passp) 2986 fr_info_t *fin; 2987 u_32_t *passp; 2988 { 2989 ipstate_t *is; 2990 frentry_t *fr; 2991 tcphdr_t *tcp; 2992 ipftq_t *ifq; 2993 u_int pass; 2994 ipf_stack_t *ifs = fin->fin_ifs; 2995 2996 if (ifs->ifs_fr_state_lock || (ifs->ifs_ips_list == NULL) || 2997 (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGBODY|FI_BAD))) 2998 return NULL; 2999 3000 is = NULL; 3001 if ((fin->fin_flx & FI_TCPUDP) || 3002 (fin->fin_fi.fi_p == IPPROTO_ICMP) 3003 #ifdef USE_INET6 3004 || (fin->fin_fi.fi_p == IPPROTO_ICMPV6) 3005 #endif 3006 ) 3007 tcp = fin->fin_dp; 3008 else 3009 tcp = NULL; 3010 3011 /* 3012 * Search the hash table for matching packet header info. 3013 */ 3014 ifq = NULL; 3015 is = fr_stlookup(fin, tcp, &ifq); 3016 switch (fin->fin_p) 3017 { 3018 #ifdef USE_INET6 3019 case IPPROTO_ICMPV6 : 3020 if (is != NULL) 3021 break; 3022 if (fin->fin_v == 6) { 3023 is = fr_checkicmp6matchingstate(fin); 3024 if (is != NULL) 3025 goto matched; 3026 } 3027 break; 3028 #endif 3029 case IPPROTO_ICMP : 3030 if (is != NULL) 3031 break; 3032 /* 3033 * No matching icmp state entry. Perhaps this is a 3034 * response to another state entry. 3035 */ 3036 is = fr_checkicmpmatchingstate(fin); 3037 if (is != NULL) 3038 goto matched; 3039 break; 3040 case IPPROTO_TCP : 3041 if (is == NULL) 3042 break; 3043 3044 if (is->is_pass & FR_NEWISN) { 3045 if (fin->fin_out == 0) 3046 fr_fixinisn(fin, is); 3047 else if (fin->fin_out == 1) 3048 fr_fixoutisn(fin, is); 3049 } 3050 break; 3051 default : 3052 if (fin->fin_rev) 3053 ifq = &ifs->ifs_ips_udpacktq; 3054 else 3055 ifq = &ifs->ifs_ips_udptq; 3056 break; 3057 } 3058 if (is == NULL) { 3059 ATOMIC_INCL(ifs->ifs_ips_stats.iss_miss); 3060 return NULL; 3061 } 3062 3063 matched: 3064 fr = is->is_rule; 3065 if (fr != NULL) { 3066 if ((fin->fin_out == 0) && (fr->fr_nattag.ipt_num[0] != 0)) { 3067 if (fin->fin_nattag == NULL) { 3068 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3069 return NULL; 3070 } 3071 if (fr_matchtag(&fr->fr_nattag, fin->fin_nattag) != 0) { 3072 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3073 return NULL; 3074 } 3075 } 3076 (void) strncpy(fin->fin_group, fr->fr_group, FR_GROUPLEN); 3077 fin->fin_icode = fr->fr_icode; 3078 } 3079 3080 fin->fin_rule = is->is_rulen; 3081 pass = is->is_pass; 3082 fr_updatestate(fin, is, ifq); 3083 3084 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3085 fin->fin_flx |= FI_STATE; 3086 if ((pass & FR_LOGFIRST) != 0) 3087 pass &= ~(FR_LOGFIRST|FR_LOG); 3088 *passp = pass; 3089 return fr; 3090 } 3091 3092 3093 /* ------------------------------------------------------------------------ */ 3094 /* Function: fr_fixoutisn */ 3095 /* Returns: Nil */ 3096 /* Parameters: fin(I) - pointer to packet information */ 3097 /* is(I) - pointer to master state structure */ 3098 /* */ 3099 /* Called only for outbound packets, adjusts the sequence number and the */ 3100 /* TCP checksum to match that change. */ 3101 /* ------------------------------------------------------------------------ */ 3102 static void fr_fixoutisn(fin, is) 3103 fr_info_t *fin; 3104 ipstate_t *is; 3105 { 3106 tcphdr_t *tcp; 3107 int rev; 3108 u_32_t seq; 3109 3110 tcp = fin->fin_dp; 3111 rev = fin->fin_rev; 3112 if ((is->is_flags & IS_ISNSYN) != 0) { 3113 if (rev == 0) { 3114 seq = ntohl(tcp->th_seq); 3115 seq += is->is_isninc[0]; 3116 tcp->th_seq = htonl(seq); 3117 fix_outcksum(&tcp->th_sum, is->is_sumd[0]); 3118 } 3119 } 3120 if ((is->is_flags & IS_ISNACK) != 0) { 3121 if (rev == 1) { 3122 seq = ntohl(tcp->th_seq); 3123 seq += is->is_isninc[1]; 3124 tcp->th_seq = htonl(seq); 3125 fix_outcksum(&tcp->th_sum, is->is_sumd[1]); 3126 } 3127 } 3128 } 3129 3130 3131 /* ------------------------------------------------------------------------ */ 3132 /* Function: fr_fixinisn */ 3133 /* Returns: Nil */ 3134 /* Parameters: fin(I) - pointer to packet information */ 3135 /* is(I) - pointer to master state structure */ 3136 /* */ 3137 /* Called only for inbound packets, adjusts the acknowledge number and the */ 3138 /* TCP checksum to match that change. */ 3139 /* ------------------------------------------------------------------------ */ 3140 static void fr_fixinisn(fin, is) 3141 fr_info_t *fin; 3142 ipstate_t *is; 3143 { 3144 tcphdr_t *tcp; 3145 int rev; 3146 u_32_t ack; 3147 3148 tcp = fin->fin_dp; 3149 rev = fin->fin_rev; 3150 if ((is->is_flags & IS_ISNSYN) != 0) { 3151 if (rev == 1) { 3152 ack = ntohl(tcp->th_ack); 3153 ack -= is->is_isninc[0]; 3154 tcp->th_ack = htonl(ack); 3155 fix_incksum(&tcp->th_sum, is->is_sumd[0]); 3156 } 3157 } 3158 if ((is->is_flags & IS_ISNACK) != 0) { 3159 if (rev == 0) { 3160 ack = ntohl(tcp->th_ack); 3161 ack -= is->is_isninc[1]; 3162 tcp->th_ack = htonl(ack); 3163 fix_incksum(&tcp->th_sum, is->is_sumd[1]); 3164 } 3165 } 3166 } 3167 3168 3169 /* ------------------------------------------------------------------------ */ 3170 /* Function: fr_statesync */ 3171 /* Returns: Nil */ 3172 /* Parameters: action(I) - type of synchronisation to do */ 3173 /* v(I) - IP version being sync'd (v4 or v6) */ 3174 /* ifp(I) - interface identifier associated with action */ 3175 /* name(I) - name associated with ifp parameter */ 3176 /* */ 3177 /* Walk through all state entries and if an interface pointer match is */ 3178 /* found then look it up again, based on its name in case the pointer has */ 3179 /* changed since last time. */ 3180 /* */ 3181 /* If ifp is passed in as being non-null then we are only doing updates for */ 3182 /* existing, matching, uses of it. */ 3183 /* ------------------------------------------------------------------------ */ 3184 void fr_statesync(action, v, ifp, name, ifs) 3185 int action, v; 3186 void *ifp; 3187 char *name; 3188 ipf_stack_t *ifs; 3189 { 3190 ipstate_t *is; 3191 int i; 3192 3193 if (ifs->ifs_fr_running <= 0) 3194 return; 3195 3196 WRITE_ENTER(&ifs->ifs_ipf_state); 3197 3198 if (ifs->ifs_fr_running <= 0) { 3199 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3200 return; 3201 } 3202 3203 switch (action) 3204 { 3205 case IPFSYNC_RESYNC : 3206 for (is = ifs->ifs_ips_list; is; is = is->is_next) { 3207 if (v != 0 && is->is_v != v) 3208 continue; 3209 /* 3210 * Look up all the interface names in the state entry. 3211 */ 3212 for (i = 0; i < 4; i++) { 3213 is->is_ifp[i] = fr_resolvenic(is->is_ifname[i], 3214 is->is_v, ifs); 3215 } 3216 } 3217 break; 3218 case IPFSYNC_NEWIFP : 3219 for (is = ifs->ifs_ips_list; is; is = is->is_next) { 3220 if (v != 0 && is->is_v != v) 3221 continue; 3222 /* 3223 * Look up all the interface names in the state entry. 3224 */ 3225 for (i = 0; i < 4; i++) { 3226 if (!strncmp(is->is_ifname[i], name, 3227 sizeof(is->is_ifname[i]))) 3228 is->is_ifp[i] = ifp; 3229 } 3230 } 3231 break; 3232 case IPFSYNC_OLDIFP : 3233 for (is = ifs->ifs_ips_list; is; is = is->is_next) { 3234 if (v != 0 && is->is_v != v) 3235 continue; 3236 /* 3237 * Look up all the interface names in the state entry. 3238 */ 3239 for (i = 0; i < 4; i++) { 3240 if (is->is_ifp[i] == ifp) 3241 is->is_ifp[i] = (void *)-1; 3242 } 3243 } 3244 break; 3245 } 3246 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3247 } 3248 3249 3250 #if SOLARIS2 >= 10 3251 /* ------------------------------------------------------------------------ */ 3252 /* Function: fr_stateifindexsync */ 3253 /* Returns: void */ 3254 /* Parameters: ifp - current network interface descriptor (ifindex) */ 3255 /* newifp - new interface descriptor (new ifindex) */ 3256 /* ifs - pointer to IPF stack */ 3257 /* */ 3258 /* Write Locks: assumes ipf_mutex is locked */ 3259 /* */ 3260 /* Updates all interface indeces matching to ifp with new interface index */ 3261 /* value. */ 3262 /* ------------------------------------------------------------------------ */ 3263 void fr_stateifindexsync(ifp, newifp, ifs) 3264 void *ifp; 3265 void *newifp; 3266 ipf_stack_t *ifs; 3267 { 3268 ipstate_t *is; 3269 int i; 3270 3271 WRITE_ENTER(&ifs->ifs_ipf_state); 3272 3273 for (is = ifs->ifs_ips_list; is != NULL; is = is->is_next) { 3274 3275 for (i = 0; i < 4; i++) { 3276 if (is->is_ifp[i] == ifp) 3277 is->is_ifp[i] = newifp; 3278 } 3279 } 3280 3281 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3282 } 3283 #endif 3284 3285 /* ------------------------------------------------------------------------ */ 3286 /* Function: fr_delstate */ 3287 /* Returns: int - 0 = entry deleted, else ref count on entry */ 3288 /* Parameters: is(I) - pointer to state structure to delete */ 3289 /* why(I) - if not 0, log reason why it was deleted */ 3290 /* ifs - ipf stack instance */ 3291 /* Write Locks: ipf_state/ipf_global */ 3292 /* */ 3293 /* Deletes a state entry from the enumerated list as well as the hash table */ 3294 /* and timeout queue lists. Make adjustments to hash table statistics and */ 3295 /* global counters as required. */ 3296 /* ------------------------------------------------------------------------ */ 3297 int fr_delstate(is, why, ifs) 3298 ipstate_t *is; 3299 int why; 3300 ipf_stack_t *ifs; 3301 { 3302 int removed = 0; 3303 3304 ASSERT(rw_write_held(&ifs->ifs_ipf_global.ipf_lk) == 0 || 3305 rw_write_held(&ifs->ifs_ipf_state.ipf_lk) == 0); 3306 3307 /* 3308 * Start by removing the entry from the hash table of state entries 3309 * so it will not be "used" again. 3310 * 3311 * It will remain in the "list" of state entries until all references 3312 * have been accounted for. 3313 */ 3314 if (is->is_phnext != NULL) { 3315 removed = 1; 3316 *is->is_phnext = is->is_hnext; 3317 if (is->is_hnext != NULL) 3318 is->is_hnext->is_phnext = is->is_phnext; 3319 if (ifs->ifs_ips_table[is->is_hv] == NULL) 3320 ifs->ifs_ips_stats.iss_inuse--; 3321 ifs->ifs_ips_stats.iss_bucketlen[is->is_hv]--; 3322 3323 is->is_phnext = NULL; 3324 is->is_hnext = NULL; 3325 } 3326 3327 /* 3328 * Because ifs->ifs_ips_stats.iss_wild is a count of entries in the state 3329 * table that have wildcard flags set, only decerement it once 3330 * and do it here. 3331 */ 3332 if (is->is_flags & (SI_WILDP|SI_WILDA)) { 3333 if (!(is->is_flags & SI_CLONED)) { 3334 ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild); 3335 } 3336 is->is_flags &= ~(SI_WILDP|SI_WILDA); 3337 } 3338 3339 /* 3340 * Next, remove it from the timeout queue it is in. 3341 */ 3342 fr_deletequeueentry(&is->is_sti); 3343 3344 is->is_me = NULL; 3345 3346 /* 3347 * If it is still in use by something else, do not go any further, 3348 * but note that at this point it is now an orphan. 3349 */ 3350 MUTEX_ENTER(&is->is_lock); 3351 if (is->is_ref > 1) { 3352 is->is_ref--; 3353 MUTEX_EXIT(&is->is_lock); 3354 if (removed) 3355 ifs->ifs_ips_stats.iss_orphans++; 3356 return (is->is_ref); 3357 } 3358 MUTEX_EXIT(&is->is_lock); 3359 3360 is->is_ref = 0; 3361 3362 /* 3363 * If entry has already been removed from table, 3364 * it means we're simply cleaning up an orphan. 3365 */ 3366 if (!removed) 3367 ifs->ifs_ips_stats.iss_orphans--; 3368 3369 if (is->is_tqehead[0] != NULL) 3370 (void) fr_deletetimeoutqueue(is->is_tqehead[0]); 3371 3372 if (is->is_tqehead[1] != NULL) 3373 (void) fr_deletetimeoutqueue(is->is_tqehead[1]); 3374 3375 #ifdef IPFILTER_SYNC 3376 if (is->is_sync) 3377 ipfsync_del(is->is_sync); 3378 #endif 3379 #ifdef IPFILTER_SCAN 3380 (void) ipsc_detachis(is); 3381 #endif 3382 3383 /* 3384 * Now remove it from master list of state table entries. 3385 */ 3386 if (is->is_pnext != NULL) { 3387 *is->is_pnext = is->is_next; 3388 if (is->is_next != NULL) { 3389 is->is_next->is_pnext = is->is_pnext; 3390 is->is_next = NULL; 3391 } 3392 is->is_pnext = NULL; 3393 } 3394 3395 if (ifs->ifs_ipstate_logging != 0 && why != 0) 3396 ipstate_log(is, why, ifs); 3397 3398 if (is->is_rule != NULL) { 3399 is->is_rule->fr_statecnt--; 3400 (void)fr_derefrule(&is->is_rule, ifs); 3401 } 3402 3403 MUTEX_DESTROY(&is->is_lock); 3404 KFREE(is); 3405 ifs->ifs_ips_num--; 3406 3407 return (0); 3408 } 3409 3410 3411 /* ------------------------------------------------------------------------ */ 3412 /* Function: fr_timeoutstate */ 3413 /* Returns: Nil */ 3414 /* Parameters: ifs - ipf stack instance */ 3415 /* */ 3416 /* Slowly expire held state for thingslike UDP and ICMP. The algorithm */ 3417 /* used here is to keep the queue sorted with the oldest things at the top */ 3418 /* and the youngest at the bottom. So if the top one doesn't need to be */ 3419 /* expired then neither will any under it. */ 3420 /* ------------------------------------------------------------------------ */ 3421 void fr_timeoutstate(ifs) 3422 ipf_stack_t *ifs; 3423 { 3424 ipftq_t *ifq, *ifqnext; 3425 ipftqent_t *tqe, *tqn; 3426 ipstate_t *is; 3427 SPL_INT(s); 3428 3429 SPL_NET(s); 3430 WRITE_ENTER(&ifs->ifs_ipf_state); 3431 for (ifq = ifs->ifs_ips_tqtqb; ifq != NULL; ifq = ifq->ifq_next) 3432 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) { 3433 if (tqe->tqe_die > ifs->ifs_fr_ticks) 3434 break; 3435 tqn = tqe->tqe_next; 3436 is = tqe->tqe_parent; 3437 (void) fr_delstate(is, ISL_EXPIRE, ifs); 3438 } 3439 3440 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifq->ifq_next) { 3441 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) { 3442 if (tqe->tqe_die > ifs->ifs_fr_ticks) 3443 break; 3444 tqn = tqe->tqe_next; 3445 is = tqe->tqe_parent; 3446 (void) fr_delstate(is, ISL_EXPIRE, ifs); 3447 } 3448 } 3449 3450 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifqnext) { 3451 ifqnext = ifq->ifq_next; 3452 3453 if (((ifq->ifq_flags & IFQF_DELETE) != 0) && 3454 (ifq->ifq_ref == 0)) { 3455 fr_freetimeoutqueue(ifq, ifs); 3456 } 3457 } 3458 3459 if (ifs->ifs_fr_state_doflush) { 3460 (void) fr_state_flush(FLUSH_TABLE_EXTRA, 0, ifs); 3461 ifs->ifs_fr_state_doflush = 0; 3462 } 3463 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3464 SPL_X(s); 3465 } 3466 3467 3468 /* ---------------------------------------------------------------------- */ 3469 /* Function: fr_state_flush */ 3470 /* Returns: int - 0 == success, -1 == failure */ 3471 /* Parameters: flush_option - how to flush the active State table */ 3472 /* proto - IP version to flush (4, 6, or both) */ 3473 /* ifs - ipf stack instance */ 3474 /* Write Locks: ipf_state */ 3475 /* */ 3476 /* Flush state tables. Three possible flush options currently defined: */ 3477 /* */ 3478 /* FLUSH_TABLE_ALL : Flush all state table entries */ 3479 /* */ 3480 /* FLUSH_TABLE_CLOSING : Flush entries with TCP connections which */ 3481 /* have started to close on both ends using */ 3482 /* ipf_flushclosing(). */ 3483 /* */ 3484 /* FLUSH_TABLE_EXTRA : First, flush entries which are "almost" closed. */ 3485 /* Then, if needed, flush entries with TCP */ 3486 /* connections which have been idle for a long */ 3487 /* time with ipf_extraflush(). */ 3488 /* ---------------------------------------------------------------------- */ 3489 static int fr_state_flush(flush_option, proto, ifs) 3490 int flush_option, proto; 3491 ipf_stack_t *ifs; 3492 { 3493 ipstate_t *is, *isn; 3494 int removed; 3495 SPL_INT(s); 3496 3497 removed = 0; 3498 3499 SPL_NET(s); 3500 switch (flush_option) 3501 { 3502 case FLUSH_TABLE_ALL: 3503 isn = ifs->ifs_ips_list; 3504 while ((is = isn) != NULL) { 3505 isn = is->is_next; 3506 if ((proto != 0) && (is->is_v != proto)) 3507 continue; 3508 if (fr_delstate(is, ISL_FLUSH, ifs) == 0) 3509 removed++; 3510 } 3511 break; 3512 3513 case FLUSH_TABLE_CLOSING: 3514 removed = ipf_flushclosing(STATE_FLUSH, 3515 IPF_TCPS_CLOSE_WAIT, 3516 ifs->ifs_ips_tqtqb, 3517 ifs->ifs_ips_utqe, 3518 ifs); 3519 break; 3520 3521 case FLUSH_TABLE_EXTRA: 3522 removed = ipf_flushclosing(STATE_FLUSH, 3523 IPF_TCPS_FIN_WAIT_2, 3524 ifs->ifs_ips_tqtqb, 3525 ifs->ifs_ips_utqe, 3526 ifs); 3527 3528 /* 3529 * Be sure we haven't done this in the last 10 seconds. 3530 */ 3531 if (ifs->ifs_fr_ticks - ifs->ifs_ips_last_force_flush < 3532 IPF_TTLVAL(10)) 3533 break; 3534 ifs->ifs_ips_last_force_flush = ifs->ifs_fr_ticks; 3535 removed += ipf_extraflush(STATE_FLUSH, 3536 &ifs->ifs_ips_tqtqb[IPF_TCPS_ESTABLISHED], 3537 ifs->ifs_ips_utqe, 3538 ifs); 3539 break; 3540 3541 default: /* Flush Nothing */ 3542 break; 3543 } 3544 3545 SPL_X(s); 3546 return (removed); 3547 } 3548 3549 3550 /* ------------------------------------------------------------------------ */ 3551 /* Function: fr_tcp_age */ 3552 /* Returns: int - 1 == state transition made, 0 == no change (rejected) */ 3553 /* Parameters: tq(I) - pointer to timeout queue information */ 3554 /* fin(I) - pointer to packet information */ 3555 /* tqtab(I) - TCP timeout queue table this is in */ 3556 /* flags(I) - flags from state/NAT entry */ 3557 /* */ 3558 /* Rewritten by Arjan de Vet <Arjan.deVet@adv.iae.nl>, 2000-07-29: */ 3559 /* */ 3560 /* - (try to) base state transitions on real evidence only, */ 3561 /* i.e. packets that are sent and have been received by ipfilter; */ 3562 /* diagram 18.12 of TCP/IP volume 1 by W. Richard Stevens was used. */ 3563 /* */ 3564 /* - deal with half-closed connections correctly; */ 3565 /* */ 3566 /* - store the state of the source in state[0] such that ipfstat */ 3567 /* displays the state as source/dest instead of dest/source; the calls */ 3568 /* to fr_tcp_age have been changed accordingly. */ 3569 /* */ 3570 /* Internal Parameters: */ 3571 /* */ 3572 /* state[0] = state of source (host that initiated connection) */ 3573 /* state[1] = state of dest (host that accepted the connection) */ 3574 /* */ 3575 /* dir == 0 : a packet from source to dest */ 3576 /* dir == 1 : a packet from dest to source */ 3577 /* */ 3578 /* Locking: it is assumed that the parent of the tqe structure is locked. */ 3579 /* ------------------------------------------------------------------------ */ 3580 int fr_tcp_age(tqe, fin, tqtab, flags) 3581 ipftqent_t *tqe; 3582 fr_info_t *fin; 3583 ipftq_t *tqtab; 3584 int flags; 3585 { 3586 int dlen, ostate, nstate, rval, dir; 3587 u_char tcpflags; 3588 tcphdr_t *tcp; 3589 ipf_stack_t *ifs = fin->fin_ifs; 3590 3591 tcp = fin->fin_dp; 3592 3593 rval = 0; 3594 dir = fin->fin_rev; 3595 tcpflags = tcp->th_flags; 3596 dlen = fin->fin_dlen - (TCP_OFF(tcp) << 2); 3597 3598 ostate = tqe->tqe_state[1 - dir]; 3599 nstate = tqe->tqe_state[dir]; 3600 3601 DTRACE_PROBE4( 3602 indata, 3603 fr_info_t *, fin, 3604 int, ostate, 3605 int, nstate, 3606 u_char, tcpflags 3607 ); 3608 3609 if (tcpflags & TH_RST) { 3610 if (!(tcpflags & TH_PUSH) && !dlen) 3611 nstate = IPF_TCPS_CLOSED; 3612 else 3613 nstate = IPF_TCPS_CLOSE_WAIT; 3614 3615 /* 3616 * Once RST is received, we must advance peer's state to 3617 * CLOSE_WAIT. 3618 */ 3619 if (ostate <= IPF_TCPS_ESTABLISHED) { 3620 tqe->tqe_state[1 - dir] = IPF_TCPS_CLOSE_WAIT; 3621 } 3622 rval = 1; 3623 } else { 3624 3625 switch (nstate) 3626 { 3627 case IPF_TCPS_LISTEN: /* 0 */ 3628 if ((tcpflags & TH_OPENING) == TH_OPENING) { 3629 /* 3630 * 'dir' received an S and sends SA in 3631 * response, CLOSED -> SYN_RECEIVED 3632 */ 3633 nstate = IPF_TCPS_SYN_RECEIVED; 3634 rval = 1; 3635 } else if ((tcpflags & TH_OPENING) == TH_SYN) { 3636 /* 'dir' sent S, CLOSED -> SYN_SENT */ 3637 nstate = IPF_TCPS_SYN_SENT; 3638 rval = 1; 3639 } 3640 /* 3641 * the next piece of code makes it possible to get 3642 * already established connections into the state table 3643 * after a restart or reload of the filter rules; this 3644 * does not work when a strict 'flags S keep state' is 3645 * used for tcp connections of course 3646 */ 3647 if (((flags & IS_TCPFSM) == 0) && 3648 ((tcpflags & TH_ACKMASK) == TH_ACK)) { 3649 /* 3650 * we saw an A, guess 'dir' is in ESTABLISHED 3651 * mode 3652 */ 3653 switch (ostate) 3654 { 3655 case IPF_TCPS_LISTEN : 3656 case IPF_TCPS_SYN_RECEIVED : 3657 nstate = IPF_TCPS_HALF_ESTAB; 3658 rval = 1; 3659 break; 3660 case IPF_TCPS_HALF_ESTAB : 3661 case IPF_TCPS_ESTABLISHED : 3662 nstate = IPF_TCPS_ESTABLISHED; 3663 rval = 1; 3664 break; 3665 default : 3666 break; 3667 } 3668 } 3669 /* 3670 * TODO: besides regular ACK packets we can have other 3671 * packets as well; it is yet to be determined how we 3672 * should initialize the states in those cases 3673 */ 3674 break; 3675 3676 case IPF_TCPS_SYN_SENT: /* 1 */ 3677 if ((tcpflags & ~(TH_ECN|TH_CWR)) == TH_SYN) { 3678 /* 3679 * A retransmitted SYN packet. We do not reset 3680 * the timeout here to fr_tcptimeout because a 3681 * connection connect timeout does not renew 3682 * after every packet that is sent. We need to 3683 * set rval so as to indicate the packet has 3684 * passed the check for its flags being valid 3685 * in the TCP FSM. Setting rval to 2 has the 3686 * result of not resetting the timeout. 3687 */ 3688 rval = 2; 3689 } else if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) == 3690 TH_ACK) { 3691 /* 3692 * we see an A from 'dir' which is in SYN_SENT 3693 * state: 'dir' sent an A in response to an SA 3694 * which it received, SYN_SENT -> ESTABLISHED 3695 */ 3696 nstate = IPF_TCPS_ESTABLISHED; 3697 rval = 1; 3698 } else if (tcpflags & TH_FIN) { 3699 /* 3700 * we see an F from 'dir' which is in SYN_SENT 3701 * state and wants to close its side of the 3702 * connection; SYN_SENT -> FIN_WAIT_1 3703 */ 3704 nstate = IPF_TCPS_FIN_WAIT_1; 3705 rval = 1; 3706 } else if ((tcpflags & TH_OPENING) == TH_OPENING) { 3707 /* 3708 * we see an SA from 'dir' which is already in 3709 * SYN_SENT state, this means we have a 3710 * simultaneous open; SYN_SENT -> SYN_RECEIVED 3711 */ 3712 nstate = IPF_TCPS_SYN_RECEIVED; 3713 rval = 1; 3714 } 3715 break; 3716 3717 case IPF_TCPS_SYN_RECEIVED: /* 2 */ 3718 if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) == TH_ACK) { 3719 /* 3720 * we see an A from 'dir' which was in 3721 * SYN_RECEIVED state so it must now be in 3722 * established state, SYN_RECEIVED -> 3723 * ESTABLISHED 3724 */ 3725 nstate = IPF_TCPS_ESTABLISHED; 3726 rval = 1; 3727 } else if ((tcpflags & ~(TH_ECN|TH_CWR)) == 3728 TH_OPENING) { 3729 /* 3730 * We see an SA from 'dir' which is already in 3731 * SYN_RECEIVED state. 3732 */ 3733 rval = 2; 3734 } else if (tcpflags & TH_FIN) { 3735 /* 3736 * we see an F from 'dir' which is in 3737 * SYN_RECEIVED state and wants to close its 3738 * side of the connection; SYN_RECEIVED -> 3739 * FIN_WAIT_1 3740 */ 3741 nstate = IPF_TCPS_FIN_WAIT_1; 3742 rval = 1; 3743 } 3744 break; 3745 3746 case IPF_TCPS_HALF_ESTAB: /* 3 */ 3747 if (tcpflags & TH_FIN) { 3748 nstate = IPF_TCPS_FIN_WAIT_1; 3749 rval = 1; 3750 } else if ((tcpflags & TH_ACKMASK) == TH_ACK) { 3751 /* 3752 * If we've picked up a connection in mid 3753 * flight, we could be looking at a follow on 3754 * packet from the same direction as the one 3755 * that created this state. Recognise it but 3756 * do not advance the entire connection's 3757 * state. 3758 */ 3759 switch (ostate) 3760 { 3761 case IPF_TCPS_LISTEN : 3762 case IPF_TCPS_SYN_SENT : 3763 case IPF_TCPS_SYN_RECEIVED : 3764 rval = 1; 3765 break; 3766 case IPF_TCPS_HALF_ESTAB : 3767 case IPF_TCPS_ESTABLISHED : 3768 nstate = IPF_TCPS_ESTABLISHED; 3769 rval = 1; 3770 break; 3771 default : 3772 break; 3773 } 3774 } 3775 break; 3776 3777 case IPF_TCPS_ESTABLISHED: /* 4 */ 3778 rval = 1; 3779 if (tcpflags & TH_FIN) { 3780 /* 3781 * 'dir' closed its side of the connection; 3782 * this gives us a half-closed connection; 3783 * ESTABLISHED -> FIN_WAIT_1 3784 */ 3785 if (ostate == IPF_TCPS_FIN_WAIT_1) { 3786 nstate = IPF_TCPS_CLOSING; 3787 } else { 3788 nstate = IPF_TCPS_FIN_WAIT_1; 3789 } 3790 } else if (tcpflags & TH_ACK) { 3791 /* 3792 * an ACK, should we exclude other flags here? 3793 */ 3794 if (ostate == IPF_TCPS_FIN_WAIT_1) { 3795 /* 3796 * We know the other side did an active 3797 * close, so we are ACKing the recvd 3798 * FIN packet (does the window matching 3799 * code guarantee this?) and go into 3800 * CLOSE_WAIT state; this gives us a 3801 * half-closed connection 3802 */ 3803 nstate = IPF_TCPS_CLOSE_WAIT; 3804 } else if (ostate < IPF_TCPS_CLOSE_WAIT) { 3805 /* 3806 * still a fully established 3807 * connection reset timeout 3808 */ 3809 nstate = IPF_TCPS_ESTABLISHED; 3810 } 3811 } 3812 break; 3813 3814 case IPF_TCPS_CLOSE_WAIT: /* 5 */ 3815 rval = 1; 3816 if (tcpflags & TH_FIN) { 3817 /* 3818 * application closed and 'dir' sent a FIN, 3819 * we're now going into LAST_ACK state 3820 */ 3821 nstate = IPF_TCPS_LAST_ACK; 3822 } else { 3823 /* 3824 * we remain in CLOSE_WAIT because the other 3825 * side has closed already and we did not 3826 * close our side yet; reset timeout 3827 */ 3828 nstate = IPF_TCPS_CLOSE_WAIT; 3829 } 3830 break; 3831 3832 case IPF_TCPS_FIN_WAIT_1: /* 6 */ 3833 rval = 1; 3834 if ((tcpflags & TH_ACK) && 3835 ostate > IPF_TCPS_CLOSE_WAIT) { 3836 /* 3837 * if the other side is not active anymore 3838 * it has sent us a FIN packet that we are 3839 * ack'ing now with an ACK; this means both 3840 * sides have now closed the connection and 3841 * we go into LAST_ACK 3842 */ 3843 /* 3844 * XXX: how do we know we really are ACKing 3845 * the FIN packet here? does the window code 3846 * guarantee that? 3847 */ 3848 nstate = IPF_TCPS_LAST_ACK; 3849 } else { 3850 /* 3851 * we closed our side of the connection 3852 * already but the other side is still active 3853 * (ESTABLISHED/CLOSE_WAIT); continue with 3854 * this half-closed connection 3855 */ 3856 nstate = IPF_TCPS_FIN_WAIT_1; 3857 } 3858 break; 3859 3860 case IPF_TCPS_CLOSING: /* 7 */ 3861 if ((tcpflags & (TH_FIN|TH_ACK)) == TH_ACK) { 3862 nstate = IPF_TCPS_TIME_WAIT; 3863 } 3864 rval = 1; 3865 break; 3866 3867 case IPF_TCPS_LAST_ACK: /* 8 */ 3868 /* 3869 * We want to reset timer here to keep state in table. 3870 * If we would allow the state to time out here, while 3871 * there would still be packets being retransmitted, we 3872 * would cut off line between the two peers preventing 3873 * them to close connection properly. 3874 */ 3875 rval = 1; 3876 break; 3877 3878 case IPF_TCPS_FIN_WAIT_2: /* 9 */ 3879 /* NOT USED */ 3880 break; 3881 3882 case IPF_TCPS_TIME_WAIT: /* 10 */ 3883 /* we're in 2MSL timeout now */ 3884 if (ostate == IPF_TCPS_LAST_ACK) { 3885 nstate = IPF_TCPS_CLOSED; 3886 rval = 1; 3887 } else { 3888 rval = 2; 3889 } 3890 break; 3891 3892 case IPF_TCPS_CLOSED: /* 11 */ 3893 rval = 2; 3894 break; 3895 3896 default : 3897 #if defined(_KERNEL) 3898 ASSERT(nstate >= IPF_TCPS_LISTEN && 3899 nstate <= IPF_TCPS_CLOSED); 3900 #else 3901 abort(); 3902 #endif 3903 break; 3904 } 3905 } 3906 3907 /* 3908 * If rval == 2 then do not update the queue position, but treat the 3909 * packet as being ok. 3910 */ 3911 if (rval == 2) { 3912 DTRACE_PROBE1(state_keeping_timer, int, nstate); 3913 rval = 1; 3914 } 3915 else if (rval == 1) { 3916 tqe->tqe_state[dir] = nstate; 3917 /* 3918 * The nstate can either advance to a new state, or remain 3919 * unchanged, resetting the timer by moving to the bottom of 3920 * the queue. 3921 */ 3922 DTRACE_PROBE1(state_done, int, nstate); 3923 3924 if ((tqe->tqe_flags & TQE_RULEBASED) == 0) 3925 fr_movequeue(tqe, tqe->tqe_ifq, tqtab + nstate, ifs); 3926 } 3927 3928 return rval; 3929 } 3930 3931 3932 /* ------------------------------------------------------------------------ */ 3933 /* Function: ipstate_log */ 3934 /* Returns: Nil */ 3935 /* Parameters: is(I) - pointer to state structure */ 3936 /* type(I) - type of log entry to create */ 3937 /* */ 3938 /* Creates a state table log entry using the state structure and type info. */ 3939 /* passed in. Log packet/byte counts, source/destination address and other */ 3940 /* protocol specific information. */ 3941 /* ------------------------------------------------------------------------ */ 3942 void ipstate_log(is, type, ifs) 3943 struct ipstate *is; 3944 u_int type; 3945 ipf_stack_t *ifs; 3946 { 3947 #ifdef IPFILTER_LOG 3948 struct ipslog ipsl; 3949 size_t sizes[1]; 3950 void *items[1]; 3951 int types[1]; 3952 3953 /* 3954 * Copy information out of the ipstate_t structure and into the 3955 * structure used for logging. 3956 */ 3957 ipsl.isl_type = type; 3958 ipsl.isl_pkts[0] = is->is_pkts[0] + is->is_icmppkts[0]; 3959 ipsl.isl_bytes[0] = is->is_bytes[0]; 3960 ipsl.isl_pkts[1] = is->is_pkts[1] + is->is_icmppkts[1]; 3961 ipsl.isl_bytes[1] = is->is_bytes[1]; 3962 ipsl.isl_pkts[2] = is->is_pkts[2] + is->is_icmppkts[2]; 3963 ipsl.isl_bytes[2] = is->is_bytes[2]; 3964 ipsl.isl_pkts[3] = is->is_pkts[3] + is->is_icmppkts[3]; 3965 ipsl.isl_bytes[3] = is->is_bytes[3]; 3966 ipsl.isl_src = is->is_src; 3967 ipsl.isl_dst = is->is_dst; 3968 ipsl.isl_p = is->is_p; 3969 ipsl.isl_v = is->is_v; 3970 ipsl.isl_flags = is->is_flags; 3971 ipsl.isl_tag = is->is_tag; 3972 ipsl.isl_rulen = is->is_rulen; 3973 (void) strncpy(ipsl.isl_group, is->is_group, FR_GROUPLEN); 3974 3975 if (ipsl.isl_p == IPPROTO_TCP || ipsl.isl_p == IPPROTO_UDP) { 3976 ipsl.isl_sport = is->is_sport; 3977 ipsl.isl_dport = is->is_dport; 3978 if (ipsl.isl_p == IPPROTO_TCP) { 3979 ipsl.isl_state[0] = is->is_state[0]; 3980 ipsl.isl_state[1] = is->is_state[1]; 3981 } 3982 } else if (ipsl.isl_p == IPPROTO_ICMP) { 3983 ipsl.isl_itype = is->is_icmp.ici_type; 3984 } else if (ipsl.isl_p == IPPROTO_ICMPV6) { 3985 ipsl.isl_itype = is->is_icmp.ici_type; 3986 } else { 3987 ipsl.isl_ps.isl_filler[0] = 0; 3988 ipsl.isl_ps.isl_filler[1] = 0; 3989 } 3990 3991 items[0] = &ipsl; 3992 sizes[0] = sizeof(ipsl); 3993 types[0] = 0; 3994 3995 if (ipllog(IPL_LOGSTATE, NULL, items, sizes, types, 1, ifs)) { 3996 ATOMIC_INCL(ifs->ifs_ips_stats.iss_logged); 3997 } else { 3998 ATOMIC_INCL(ifs->ifs_ips_stats.iss_logfail); 3999 } 4000 #endif 4001 } 4002 4003 4004 #ifdef USE_INET6 4005 /* ------------------------------------------------------------------------ */ 4006 /* Function: fr_checkicmp6matchingstate */ 4007 /* Returns: ipstate_t* - NULL == no match found, */ 4008 /* else pointer to matching state entry */ 4009 /* Parameters: fin(I) - pointer to packet information */ 4010 /* Locks: NULL == no locks, else Read Lock on ipf_state */ 4011 /* */ 4012 /* If we've got an ICMPv6 error message, using the information stored in */ 4013 /* the ICMPv6 packet, look for a matching state table entry. */ 4014 /* ------------------------------------------------------------------------ */ 4015 static ipstate_t *fr_checkicmp6matchingstate(fin) 4016 fr_info_t *fin; 4017 { 4018 struct icmp6_hdr *ic6, *oic; 4019 int backward, i; 4020 ipstate_t *is, **isp; 4021 u_short sport, dport; 4022 i6addr_t dst, src; 4023 u_short savelen; 4024 icmpinfo_t *ic; 4025 fr_info_t ofin; 4026 tcphdr_t *tcp; 4027 ip6_t *oip6; 4028 u_char pr; 4029 u_int hv; 4030 ipf_stack_t *ifs = fin->fin_ifs; 4031 4032 /* 4033 * Does it at least have the return (basic) IP header ? 4034 * Is it an actual recognised ICMP error type? 4035 * Only a basic IP header (no options) should be with 4036 * an ICMP error header. 4037 */ 4038 if ((fin->fin_v != 6) || (fin->fin_plen < ICMP6ERR_MINPKTLEN) || 4039 !(fin->fin_flx & FI_ICMPERR)) 4040 return NULL; 4041 4042 ic6 = fin->fin_dp; 4043 4044 oip6 = (ip6_t *)((char *)ic6 + ICMPERR_ICMPHLEN); 4045 if (fin->fin_plen < sizeof(*oip6)) 4046 return NULL; 4047 4048 bcopy((char *)fin, (char *)&ofin, sizeof(*fin)); 4049 ofin.fin_v = 6; 4050 ofin.fin_ifp = fin->fin_ifp; 4051 ofin.fin_out = !fin->fin_out; 4052 ofin.fin_m = NULL; /* if dereferenced, panic XXX */ 4053 ofin.fin_mp = NULL; /* if dereferenced, panic XXX */ 4054 4055 /* 4056 * We make a fin entry to be able to feed it to 4057 * matchsrcdst. Note that not all fields are necessary 4058 * but this is the cleanest way. Note further we fill 4059 * in fin_mp such that if someone uses it we'll get 4060 * a kernel panic. fr_matchsrcdst does not use this. 4061 * 4062 * watch out here, as ip is in host order and oip6 in network 4063 * order. Any change we make must be undone afterwards. 4064 */ 4065 savelen = oip6->ip6_plen; 4066 oip6->ip6_plen = fin->fin_dlen - ICMPERR_ICMPHLEN; 4067 ofin.fin_flx = FI_NOCKSUM; 4068 ofin.fin_ip = (ip_t *)oip6; 4069 ofin.fin_plen = oip6->ip6_plen; 4070 (void) fr_makefrip(sizeof(*oip6), (ip_t *)oip6, &ofin); 4071 ofin.fin_flx &= ~(FI_BAD|FI_SHORT); 4072 oip6->ip6_plen = savelen; 4073 4074 if (oip6->ip6_nxt == IPPROTO_ICMPV6) { 4075 oic = (struct icmp6_hdr *)(oip6 + 1); 4076 /* 4077 * an ICMP error can only be generated as a result of an 4078 * ICMP query, not as the response on an ICMP error 4079 * 4080 * XXX theoretically ICMP_ECHOREP and the other reply's are 4081 * ICMP query's as well, but adding them here seems strange XXX 4082 */ 4083 if (!(oic->icmp6_type & ICMP6_INFOMSG_MASK)) 4084 return NULL; 4085 4086 /* 4087 * perform a lookup of the ICMP packet in the state table 4088 */ 4089 hv = (pr = oip6->ip6_nxt); 4090 src.in6 = oip6->ip6_src; 4091 hv += src.in4.s_addr; 4092 dst.in6 = oip6->ip6_dst; 4093 hv += dst.in4.s_addr; 4094 hv += oic->icmp6_id; 4095 hv += oic->icmp6_seq; 4096 hv = DOUBLE_HASH(hv, ifs); 4097 4098 READ_ENTER(&ifs->ifs_ipf_state); 4099 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 4100 ic = &is->is_icmp; 4101 isp = &is->is_hnext; 4102 if ((is->is_p == pr) && 4103 !(is->is_pass & FR_NOICMPERR) && 4104 (oic->icmp6_id == ic->ici_id) && 4105 (oic->icmp6_seq == ic->ici_seq) && 4106 (is = fr_matchsrcdst(&ofin, is, &src, 4107 &dst, NULL, FI_ICMPCMP))) { 4108 /* 4109 * in the state table ICMP query's are stored 4110 * with the type of the corresponding ICMP 4111 * response. Correct here 4112 */ 4113 if (((ic->ici_type == ICMP6_ECHO_REPLY) && 4114 (oic->icmp6_type == ICMP6_ECHO_REQUEST)) || 4115 (ic->ici_type - 1 == oic->icmp6_type )) { 4116 ifs->ifs_ips_stats.iss_hits++; 4117 backward = IP6_NEQ(&is->is_dst, &src); 4118 fin->fin_rev = !backward; 4119 i = (backward << 1) + fin->fin_out; 4120 is->is_icmppkts[i]++; 4121 return is; 4122 } 4123 } 4124 } 4125 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4126 return NULL; 4127 } 4128 4129 hv = (pr = oip6->ip6_nxt); 4130 src.in6 = oip6->ip6_src; 4131 hv += src.i6[0]; 4132 hv += src.i6[1]; 4133 hv += src.i6[2]; 4134 hv += src.i6[3]; 4135 dst.in6 = oip6->ip6_dst; 4136 hv += dst.i6[0]; 4137 hv += dst.i6[1]; 4138 hv += dst.i6[2]; 4139 hv += dst.i6[3]; 4140 4141 if ((oip6->ip6_nxt == IPPROTO_TCP) || (oip6->ip6_nxt == IPPROTO_UDP)) { 4142 tcp = (tcphdr_t *)(oip6 + 1); 4143 dport = tcp->th_dport; 4144 sport = tcp->th_sport; 4145 hv += dport; 4146 hv += sport; 4147 } else 4148 tcp = NULL; 4149 hv = DOUBLE_HASH(hv, ifs); 4150 4151 READ_ENTER(&ifs->ifs_ipf_state); 4152 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 4153 isp = &is->is_hnext; 4154 /* 4155 * Only allow this icmp though if the 4156 * encapsulated packet was allowed through the 4157 * other way around. Note that the minimal amount 4158 * of info present does not allow for checking against 4159 * tcp internals such as seq and ack numbers. 4160 */ 4161 if ((is->is_p != pr) || (is->is_v != 6) || 4162 (is->is_pass & FR_NOICMPERR)) 4163 continue; 4164 is = fr_matchsrcdst(&ofin, is, &src, &dst, tcp, FI_ICMPCMP); 4165 if (is != NULL) { 4166 ifs->ifs_ips_stats.iss_hits++; 4167 backward = IP6_NEQ(&is->is_dst, &src); 4168 fin->fin_rev = !backward; 4169 i = (backward << 1) + fin->fin_out; 4170 is->is_icmppkts[i]++; 4171 /* 4172 * we deliberately do not touch the timeouts 4173 * for the accompanying state table entry. 4174 * It remains to be seen if that is correct. XXX 4175 */ 4176 return is; 4177 } 4178 } 4179 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4180 return NULL; 4181 } 4182 #endif 4183 4184 4185 /* ------------------------------------------------------------------------ */ 4186 /* Function: fr_sttab_init */ 4187 /* Returns: Nil */ 4188 /* Parameters: tqp(I) - pointer to an array of timeout queues for TCP */ 4189 /* */ 4190 /* Initialise the array of timeout queues for TCP. */ 4191 /* ------------------------------------------------------------------------ */ 4192 void fr_sttab_init(tqp, ifs) 4193 ipftq_t *tqp; 4194 ipf_stack_t *ifs; 4195 { 4196 int i; 4197 4198 for (i = IPF_TCP_NSTATES - 1; i >= 0; i--) { 4199 tqp[i].ifq_ttl = 0; 4200 tqp[i].ifq_ref = 1; 4201 tqp[i].ifq_head = NULL; 4202 tqp[i].ifq_tail = &tqp[i].ifq_head; 4203 tqp[i].ifq_next = tqp + i + 1; 4204 MUTEX_INIT(&tqp[i].ifq_lock, "ipftq tcp tab"); 4205 } 4206 tqp[IPF_TCP_NSTATES - 1].ifq_next = NULL; 4207 tqp[IPF_TCPS_CLOSED].ifq_ttl = ifs->ifs_fr_tcpclosed; 4208 tqp[IPF_TCPS_LISTEN].ifq_ttl = ifs->ifs_fr_tcptimeout; 4209 tqp[IPF_TCPS_SYN_SENT].ifq_ttl = ifs->ifs_fr_tcptimeout; 4210 tqp[IPF_TCPS_SYN_RECEIVED].ifq_ttl = ifs->ifs_fr_tcptimeout; 4211 tqp[IPF_TCPS_ESTABLISHED].ifq_ttl = ifs->ifs_fr_tcpidletimeout; 4212 tqp[IPF_TCPS_CLOSE_WAIT].ifq_ttl = ifs->ifs_fr_tcphalfclosed; 4213 tqp[IPF_TCPS_FIN_WAIT_1].ifq_ttl = ifs->ifs_fr_tcphalfclosed; 4214 tqp[IPF_TCPS_CLOSING].ifq_ttl = ifs->ifs_fr_tcptimeout; 4215 tqp[IPF_TCPS_LAST_ACK].ifq_ttl = ifs->ifs_fr_tcplastack; 4216 tqp[IPF_TCPS_FIN_WAIT_2].ifq_ttl = ifs->ifs_fr_tcpclosewait; 4217 tqp[IPF_TCPS_TIME_WAIT].ifq_ttl = ifs->ifs_fr_tcptimeout; 4218 tqp[IPF_TCPS_HALF_ESTAB].ifq_ttl = ifs->ifs_fr_tcptimeout; 4219 } 4220 4221 4222 /* ------------------------------------------------------------------------ */ 4223 /* Function: fr_sttab_destroy */ 4224 /* Returns: Nil */ 4225 /* Parameters: tqp(I) - pointer to an array of timeout queues for TCP */ 4226 /* */ 4227 /* Do whatever is necessary to "destroy" each of the entries in the array */ 4228 /* of timeout queues for TCP. */ 4229 /* ------------------------------------------------------------------------ */ 4230 void fr_sttab_destroy(tqp) 4231 ipftq_t *tqp; 4232 { 4233 int i; 4234 4235 for (i = IPF_TCP_NSTATES - 1; i >= 0; i--) 4236 MUTEX_DESTROY(&tqp[i].ifq_lock); 4237 } 4238 4239 4240 /* ------------------------------------------------------------------------ */ 4241 /* Function: fr_statederef */ 4242 /* Returns: Nil */ 4243 /* Parameters: isp(I) - pointer to pointer to state table entry */ 4244 /* ifs - ipf stack instance */ 4245 /* */ 4246 /* Decrement the reference counter for this state table entry and free it */ 4247 /* if there are no more things using it. */ 4248 /* */ 4249 /* Internal parameters: */ 4250 /* state[0] = state of source (host that initiated connection) */ 4251 /* state[1] = state of dest (host that accepted the connection) */ 4252 /* ------------------------------------------------------------------------ */ 4253 void fr_statederef(isp, ifs) 4254 ipstate_t **isp; 4255 ipf_stack_t *ifs; 4256 { 4257 ipstate_t *is; 4258 4259 is = *isp; 4260 *isp = NULL; 4261 4262 MUTEX_ENTER(&is->is_lock); 4263 if (is->is_ref > 1) { 4264 is->is_ref--; 4265 MUTEX_EXIT(&is->is_lock); 4266 #ifndef _KERNEL 4267 if ((is->is_sti.tqe_state[0] > IPF_TCPS_ESTABLISHED) || 4268 (is->is_sti.tqe_state[1] > IPF_TCPS_ESTABLISHED)) { 4269 (void) fr_delstate(is, ISL_ORPHAN, ifs); 4270 } 4271 #endif 4272 return; 4273 } 4274 MUTEX_EXIT(&is->is_lock); 4275 4276 WRITE_ENTER(&ifs->ifs_ipf_state); 4277 (void) fr_delstate(is, ISL_EXPIRE, ifs); 4278 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4279 } 4280 4281 4282 /* ------------------------------------------------------------------------ */ 4283 /* Function: fr_setstatequeue */ 4284 /* Returns: Nil */ 4285 /* Parameters: is(I) - pointer to state structure */ 4286 /* rev(I) - forward(0) or reverse(1) direction */ 4287 /* Locks: ipf_state (read or write) */ 4288 /* */ 4289 /* Put the state entry on its default queue entry, using rev as a helped in */ 4290 /* determining which queue it should be placed on. */ 4291 /* ------------------------------------------------------------------------ */ 4292 void fr_setstatequeue(is, rev, ifs) 4293 ipstate_t *is; 4294 int rev; 4295 ipf_stack_t *ifs; 4296 { 4297 ipftq_t *oifq, *nifq; 4298 4299 4300 if ((is->is_sti.tqe_flags & TQE_RULEBASED) != 0) 4301 nifq = is->is_tqehead[rev]; 4302 else 4303 nifq = NULL; 4304 4305 if (nifq == NULL) { 4306 switch (is->is_p) 4307 { 4308 #ifdef USE_INET6 4309 case IPPROTO_ICMPV6 : 4310 if (rev == 1) 4311 nifq = &ifs->ifs_ips_icmpacktq; 4312 else 4313 nifq = &ifs->ifs_ips_icmptq; 4314 break; 4315 #endif 4316 case IPPROTO_ICMP : 4317 if (rev == 1) 4318 nifq = &ifs->ifs_ips_icmpacktq; 4319 else 4320 nifq = &ifs->ifs_ips_icmptq; 4321 break; 4322 case IPPROTO_TCP : 4323 nifq = ifs->ifs_ips_tqtqb + is->is_state[rev]; 4324 break; 4325 4326 case IPPROTO_UDP : 4327 if (rev == 1) 4328 nifq = &ifs->ifs_ips_udpacktq; 4329 else 4330 nifq = &ifs->ifs_ips_udptq; 4331 break; 4332 4333 default : 4334 nifq = &ifs->ifs_ips_iptq; 4335 break; 4336 } 4337 } 4338 4339 oifq = is->is_sti.tqe_ifq; 4340 /* 4341 * If it's currently on a timeout queue, move it from one queue to 4342 * another, else put it on the end of the newly determined queue. 4343 */ 4344 if (oifq != NULL) 4345 fr_movequeue(&is->is_sti, oifq, nifq, ifs); 4346 else 4347 fr_queueappend(&is->is_sti, nifq, is, ifs); 4348 return; 4349 } 4350 4351 4352 /* ------------------------------------------------------------------------ */ 4353 /* Function: fr_stateiter */ 4354 /* Returns: int - 0 == success, else error */ 4355 /* Parameters: token(I) - pointer to ipftoken structure */ 4356 /* itp(I) - pointer to ipfgeniter structure */ 4357 /* */ 4358 /* This function handles the SIOCGENITER ioctl for the state tables and */ 4359 /* walks through the list of entries in the state table list (ips_list.) */ 4360 /* ------------------------------------------------------------------------ */ 4361 static int fr_stateiter(token, itp, ifs) 4362 ipftoken_t *token; 4363 ipfgeniter_t *itp; 4364 ipf_stack_t *ifs; 4365 { 4366 ipstate_t *is, *next, zero; 4367 int error, count; 4368 char *dst; 4369 4370 if (itp->igi_data == NULL) 4371 return EFAULT; 4372 4373 if (itp->igi_nitems == 0) 4374 return EINVAL; 4375 4376 if (itp->igi_type != IPFGENITER_STATE) 4377 return EINVAL; 4378 4379 error = 0; 4380 4381 READ_ENTER(&ifs->ifs_ipf_state); 4382 4383 /* 4384 * Get "previous" entry from the token and find the next entry. 4385 */ 4386 is = token->ipt_data; 4387 if (is == NULL) { 4388 next = ifs->ifs_ips_list; 4389 } else { 4390 next = is->is_next; 4391 } 4392 4393 dst = itp->igi_data; 4394 for (count = itp->igi_nitems; count > 0; count--) { 4395 /* 4396 * If we found an entry, add a reference to it and update the token. 4397 * Otherwise, zero out data to be returned and NULL out token. 4398 */ 4399 if (next != NULL) { 4400 MUTEX_ENTER(&next->is_lock); 4401 next->is_ref++; 4402 MUTEX_EXIT(&next->is_lock); 4403 token->ipt_data = next; 4404 } else { 4405 bzero(&zero, sizeof(zero)); 4406 next = &zero; 4407 token->ipt_data = NULL; 4408 } 4409 4410 /* 4411 * Safe to release lock now the we have a reference. 4412 */ 4413 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4414 4415 /* 4416 * Copy out data and clean up references and tokens. 4417 */ 4418 error = COPYOUT(next, dst, sizeof(*next)); 4419 if (error != 0) 4420 error = EFAULT; 4421 if (token->ipt_data == NULL) { 4422 ipf_freetoken(token, ifs); 4423 break; 4424 } else { 4425 if (is != NULL) 4426 fr_statederef(&is, ifs); 4427 if (next->is_next == NULL) { 4428 ipf_freetoken(token, ifs); 4429 break; 4430 } 4431 } 4432 4433 if ((count == 1) || (error != 0)) 4434 break; 4435 4436 READ_ENTER(&ifs->ifs_ipf_state); 4437 dst += sizeof(*next); 4438 is = next; 4439 next = is->is_next; 4440 } 4441 4442 return error; 4443 } 4444