From 847f87e9061c2edffc21f9033adf98c7d768196a Mon Sep 17 00:00:00 2001 From: Johannes Truschnigg Date: Wed, 16 Mar 2022 20:47:33 +0100 Subject: [PATCH] Implement timeout- and load detection-logic Also, elect to not do any SQM changes when there is no clear trend to be determined. --- __lagdetect.awk | 55 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 44 insertions(+), 11 deletions(-) diff --git a/__lagdetect.awk b/__lagdetect.awk index 9c0f1b2..42778e5 100644 --- a/__lagdetect.awk +++ b/__lagdetect.awk @@ -23,7 +23,10 @@ BEGIN { ping_pprev[pn] = -1 # latency recorded in the previous cycle not_increasing_count[pn] = 0 # number of consecutive cycles with no (fudged) latency increase } - adjust_delta = 0 + consec_timeouts = 0 + adjust_ts_delta = 0 + rx_rate_max = 100 # make something up + tx_rate_max = 100 # make something up update_bw() update_bw_min_delta = (0.02 * peer_count) } @@ -37,6 +40,7 @@ BEGIN { lat=9999 update_bw() # XXX TODO what now? + consec_timeouts++ process_record(pn, bytes, lat) #print "TIMEOUT for " pn } @@ -49,6 +53,7 @@ BEGIN { bytes=$4 lat=$6 update_bw() + consec_timeouts = 0 process_record(pn, bytes, lat) } @@ -76,6 +81,7 @@ function get_time() { function update_bw() { + # If the last update happened rather recently, do not compute new bw stats. if (! (update_ts())) { #print "too fast, skiping bw updates " ts - ts_old return @@ -94,6 +100,14 @@ function update_bw() { rx_rate=(rx_delta / (ts_delta)) tx_delta=(tx - tx_old) tx_rate=(tx_delta / (ts_delta)) + if (tx_rate_max < tx_rate) { + tx_rate_max = tx_rate + print "# " ts " new peak tx_rate=" tx_rate_max + } + if (rx_rate_max < rx_rate) { + rx_rate_max = rx_rate + print "# " ts " new peak rx_rate=" rx_rate_max + } # printf("# " ts " (Kbyte/s) rx_rate=%-12.2f tx_rate=%-12.2f ts_delta=%-1.2f\n", (rx_rate / 1024), (tx_rate / 1024), ts_delta) } @@ -106,14 +120,14 @@ function process_record(pn, bytes, lat) { if (bytes == 0 && lat == 9999) { print "# WARN: TIMEOUT: " pn - return 1 + # XXX TODO: what to do now? } #printf("%s %db in %.2fms\n", pn, bytes, lat) update_pingstats(pn, lat) if (have_baseline[pn]) { - adjust_bw(pn, lat) + adjust_sqm(pn, lat) } ping_pprev[pn]=ping_prev[pn] @@ -121,36 +135,55 @@ function process_record(pn, bytes, lat) { } -function adjust_bw(peername, latency) { - adjust_delta = (ts - adjust_old) +function adjust_sqm(peername, latency) { + adjust_ts_delta = (ts - adjust_old) + + # Assume links are clogged, so we short-circuit a bandwidth decrease + if (consec_timeouts > ((1.5 * peer_count) + 2)) { + # print "# " ts " too many TIMEOUTS in a row, decreasing SQM bw" + set_bw(IFACE_RX, 5000) + set_bw(IFACE_TX, 5000) + return + } # do not try to set bw more than once a second - if (adjust_delta < 1.0) { - # print "looping too fast, skipping BW update adjust_delta=" adjust_delta + if (adjust_ts_delta < 1.0) { + # print "looping too fast, skipping SQM update adjust_ts_delta=" adjust_ts_delta return } + # XXX this logic needs fixing + if ( rx_rate < (rx_rate_max * 0.1) || tx_rate < (tx_rate_max * 0.1) ) { + # print "line does not appear to be loaded, skipping SQM update" + return + } + + # Try to determine the latency trend over the last few samples last2avg = (((ping_prev[peername] + latency) / 2.0)) thresh_fudged = (FUDGE + (ping_avgs[peername] * THRESHOLD)) if (last2avg > thresh_fudged ) { if (latency > ping_prev[peername] && latency > ping_pprev[peername]) { - print ts " " adjust_delta " CONSIDER BW DECREASE # " peername " lat=" latency " prev=" ping_prev[peername] " pprev=" ping_pprev[peername] " avg=" ping_avgs[peername] " min=" ping_min[peername] + # print ts " " adjust_ts_delta " CONSIDER BW DECREASE # " peername " lat=" latency " prev=" ping_prev[peername] " pprev=" ping_pprev[peername] " avg=" ping_avgs[peername] " min=" ping_min[peername] set_bw(IFACE_TX, 5000) set_bw(IFACE_RX, 5000) # print "set adjust_old=" ts adjust_old = ts } not_increasing_count[peername]=0 - } else { + } else if ((latency < FUDGE + ping_avgs[peername]) && + latency < ping_prev[peername] && + latency < ping_pprev[peername] && + ping_prev[peername] < ping_pprev[peername]) { if (not_increasing_count[peername] > (5 * PINGSLOTS)) { - print ts " " adjust_delta " CONSIDER BW INCREASE # " peername " lat=" latency " prev=" ping_prev[peername] " pprev=" ping_pprev[peername] " avg=" ping_avgs[peername] " min=" ping_min[peername] + # print ts " " adjust_ts_delta " CONSIDER BW INCREASE # " peername " lat=" latency " prev=" ping_prev[peername] " pprev=" ping_pprev[peername] " avg=" ping_avgs[peername] " min=" ping_min[peername] not_increasing_count[peername]=0 set_bw(IFACE_TX, 10000) set_bw(IFACE_RX, 10000) - # print "set adjust_old=" ts adjust_old = ts } not_increasing_count[peername]++ + } else { + # print ts " NOOP - no latency trend determined" } } -- 2.39.5