diff options
Diffstat (limited to 'contrib/unbound/services')
22 files changed, 6415 insertions, 1219 deletions
diff --git a/contrib/unbound/services/authzone.c b/contrib/unbound/services/authzone.c index ecd63ec144f5..c518883419c4 100644 --- a/contrib/unbound/services/authzone.c +++ b/contrib/unbound/services/authzone.c @@ -67,7 +67,11 @@ #include "sldns/parseutil.h" #include "sldns/keyraw.h" #include "validator/val_nsec3.h" +#include "validator/val_nsec.h" #include "validator/val_secalgo.h" +#include "validator/val_sigcrypt.h" +#include "validator/val_anchor.h" +#include "validator/val_utils.h" #include <ctype.h> /** bytes to use for NSEC3 hash buffer. 20 for sha1 */ @@ -80,7 +84,7 @@ #define AUTH_PROBE_TIMEOUT_STOP 1000 /* msec */ /* auth transfer timeout for TCP connections, in msec */ #define AUTH_TRANSFER_TIMEOUT 10000 /* msec */ -/* auth transfer max backoff for failed tranfers and probes */ +/* auth transfer max backoff for failed transfers and probes */ #define AUTH_TRANSFER_MAX_BACKOFF 86400 /* sec */ /* auth http port number */ #define AUTH_HTTP_PORT 80 @@ -128,6 +132,7 @@ msg_create(struct regional* region, struct query_info* qinfo) return NULL; msg->rep->flags = (uint16_t)(BIT_QR | BIT_AA); msg->rep->authoritative = 1; + msg->rep->reason_bogus = LDNS_EDE_NONE; msg->rep->qdcount = 1; /* rrsets is NULL, no rrsets yet */ return msg; @@ -239,7 +244,7 @@ msg_add_rrset_an(struct auth_zone* z, struct regional* region, return 1; } -/** add rrset to authority section (no additonal section rrsets yet) */ +/** add rrset to authority section (no additional section rrsets yet) */ static int msg_add_rrset_ns(struct auth_zone* z, struct regional* region, struct dns_msg* msg, struct auth_data* node, struct auth_rrset* rrset) @@ -1301,8 +1306,8 @@ az_remove_rr(struct auth_zone* z, uint8_t* rr, size_t rr_len, auth_data_delete(node); } if(z->rpz) { - rpz_remove_rr(z->rpz, z->namelen, dname, dname_len, rr_type, - rr_class, rdata, rdatalen); + rpz_remove_rr(z->rpz, z->name, z->namelen, dname, dname_len, + rr_type, rr_class, rdata, rdatalen); } return 1; } @@ -1741,9 +1746,45 @@ int auth_zone_write_file(struct auth_zone* z, const char* fname) return 1; } +/** offline verify for zonemd, while reading a zone file to immediately + * spot bad hashes in zonefile as they are read. + * Creates temp buffers, but uses anchors and validation environment + * from the module_env. */ +static void +zonemd_offline_verify(struct auth_zone* z, struct module_env* env_for_val, + struct module_stack* mods) +{ + struct module_env env; + time_t now = 0; + if(!z->zonemd_check) + return; + env = *env_for_val; + env.scratch_buffer = sldns_buffer_new(env.cfg->msg_buffer_size); + if(!env.scratch_buffer) { + log_err("out of memory"); + goto clean_exit; + } + env.scratch = regional_create(); + if(!env.now) { + env.now = &now; + now = time(NULL); + } + if(!env.scratch) { + log_err("out of memory"); + goto clean_exit; + } + auth_zone_verify_zonemd(z, &env, mods, NULL, 1, 0); + +clean_exit: + /* clean up and exit */ + sldns_buffer_free(env.scratch_buffer); + regional_destroy(env.scratch); +} + /** read all auth zones from file (if they have) */ static int -auth_zones_read_zones(struct auth_zones* az, struct config_file* cfg) +auth_zones_read_zones(struct auth_zones* az, struct config_file* cfg, + struct module_env* env, struct module_stack* mods) { struct auth_zone* z; lock_rw_wrlock(&az->lock); @@ -1754,12 +1795,202 @@ auth_zones_read_zones(struct auth_zones* az, struct config_file* cfg) lock_rw_unlock(&az->lock); return 0; } + if(z->zonefile && z->zonefile[0]!=0 && env) + zonemd_offline_verify(z, env, mods); lock_rw_unlock(&z->lock); } lock_rw_unlock(&az->lock); return 1; } +/** fetch the content of a ZONEMD RR from the rdata */ +static int zonemd_fetch_parameters(struct auth_rrset* zonemd_rrset, size_t i, + uint32_t* serial, int* scheme, int* hashalgo, uint8_t** hash, + size_t* hashlen) +{ + size_t rr_len; + uint8_t* rdata; + if(i >= zonemd_rrset->data->count) + return 0; + rr_len = zonemd_rrset->data->rr_len[i]; + if(rr_len < 2+4+1+1) + return 0; /* too short, for rdlen+serial+scheme+algo */ + rdata = zonemd_rrset->data->rr_data[i]; + *serial = sldns_read_uint32(rdata+2); + *scheme = rdata[6]; + *hashalgo = rdata[7]; + *hashlen = rr_len - 8; + if(*hashlen == 0) + *hash = NULL; + else *hash = rdata+8; + return 1; +} + +/** + * See if the ZONEMD scheme, hash occurs more than once. + * @param zonemd_rrset: the zonemd rrset to check with the RRs in it. + * @param index: index of the original, this is allowed to have that + * scheme and hashalgo, but other RRs should not have it. + * @param scheme: the scheme to check for. + * @param hashalgo: the hash algorithm to check for. + * @return true if it occurs more than once. + */ +static int zonemd_is_duplicate_scheme_hash(struct auth_rrset* zonemd_rrset, + size_t index, int scheme, int hashalgo) +{ + size_t j; + for(j=0; j<zonemd_rrset->data->count; j++) { + uint32_t serial2 = 0; + int scheme2 = 0, hashalgo2 = 0; + uint8_t* hash2 = NULL; + size_t hashlen2 = 0; + if(index == j) { + /* this is the original */ + continue; + } + if(!zonemd_fetch_parameters(zonemd_rrset, j, &serial2, + &scheme2, &hashalgo2, &hash2, &hashlen2)) { + /* malformed, skip it */ + continue; + } + if(scheme == scheme2 && hashalgo == hashalgo2) { + /* duplicate scheme, hash */ + verbose(VERB_ALGO, "zonemd duplicate for scheme %d " + "and hash %d", scheme, hashalgo); + return 1; + } + } + return 0; +} + +/** + * Check ZONEMDs if present for the auth zone. Depending on config + * it can warn or fail on that. Checks the hash of the ZONEMD. + * @param z: auth zone to check for. + * caller must hold lock on zone. + * @param env: module env for temp buffers. + * @param reason: returned on failure. + * @return false on failure, true if hash checks out. + */ +static int auth_zone_zonemd_check_hash(struct auth_zone* z, + struct module_env* env, char** reason) +{ + /* loop over ZONEMDs and see which one is valid. if not print + * failure (depending on config) */ + struct auth_data* apex; + struct auth_rrset* zonemd_rrset; + size_t i; + struct regional* region = NULL; + struct sldns_buffer* buf = NULL; + uint32_t soa_serial = 0; + char* unsupported_reason = NULL; + int only_unsupported = 1; + region = env->scratch; + regional_free_all(region); + buf = env->scratch_buffer; + if(!auth_zone_get_serial(z, &soa_serial)) { + *reason = "zone has no SOA serial"; + return 0; + } + + apex = az_find_name(z, z->name, z->namelen); + if(!apex) { + *reason = "zone has no apex"; + return 0; + } + zonemd_rrset = az_domain_rrset(apex, LDNS_RR_TYPE_ZONEMD); + if(!zonemd_rrset || zonemd_rrset->data->count==0) { + *reason = "zone has no ZONEMD"; + return 0; /* no RRset or no RRs in rrset */ + } + + /* we have a ZONEMD, check if it is correct */ + for(i=0; i<zonemd_rrset->data->count; i++) { + uint32_t serial = 0; + int scheme = 0, hashalgo = 0; + uint8_t* hash = NULL; + size_t hashlen = 0; + if(!zonemd_fetch_parameters(zonemd_rrset, i, &serial, &scheme, + &hashalgo, &hash, &hashlen)) { + /* malformed RR */ + *reason = "ZONEMD rdata malformed"; + only_unsupported = 0; + continue; + } + /* check for duplicates */ + if(zonemd_is_duplicate_scheme_hash(zonemd_rrset, i, scheme, + hashalgo)) { + /* duplicate hash of the same scheme,hash + * is not allowed. */ + *reason = "ZONEMD RRSet contains more than one RR " + "with the same scheme and hash algorithm"; + only_unsupported = 0; + continue; + } + regional_free_all(region); + if(serial != soa_serial) { + *reason = "ZONEMD serial is wrong"; + only_unsupported = 0; + continue; + } + *reason = NULL; + if(auth_zone_generate_zonemd_check(z, scheme, hashalgo, + hash, hashlen, region, buf, reason)) { + /* success */ + if(*reason) { + if(!unsupported_reason) + unsupported_reason = *reason; + /* continue to check for valid ZONEMD */ + if(verbosity >= VERB_ALGO) { + char zstr[255+1]; + dname_str(z->name, zstr); + verbose(VERB_ALGO, "auth-zone %s ZONEMD %d %d is unsupported: %s", zstr, (int)scheme, (int)hashalgo, *reason); + } + *reason = NULL; + continue; + } + if(verbosity >= VERB_ALGO) { + char zstr[255+1]; + dname_str(z->name, zstr); + if(!*reason) + verbose(VERB_ALGO, "auth-zone %s ZONEMD hash is correct", zstr); + } + return 1; + } + only_unsupported = 0; + /* try next one */ + } + /* have we seen no failures but only unsupported algo, + * and one unsupported algorithm, or more. */ + if(only_unsupported && unsupported_reason) { + /* only unsupported algorithms, with valid serial, not + * malformed. Did not see supported algorithms, failed or + * successful ones. */ + *reason = unsupported_reason; + return 1; + } + /* fail, we may have reason */ + if(!*reason) + *reason = "no ZONEMD records found"; + if(verbosity >= VERB_ALGO) { + char zstr[255+1]; + dname_str(z->name, zstr); + verbose(VERB_ALGO, "auth-zone %s ZONEMD failed: %s", zstr, *reason); + } + return 0; +} + +/** find the apex SOA RRset, if it exists */ +struct auth_rrset* auth_zone_get_soa_rrset(struct auth_zone* z) +{ + struct auth_data* apex; + struct auth_rrset* soa; + apex = az_find_name(z, z->name, z->namelen); + if(!apex) return NULL; + soa = az_domain_rrset(apex, LDNS_RR_TYPE_SOA); + return soa; +} + /** find serial number of zone or false if none */ int auth_zone_get_serial(struct auth_zone* z, uint32_t* serial) @@ -1779,7 +2010,7 @@ auth_zone_get_serial(struct auth_zone* z, uint32_t* serial) } /** Find auth_zone SOA and populate the values in xfr(soa values). */ -static int +int xfr_find_soa(struct auth_zone* z, struct auth_xfer* xfr) { struct auth_data* apex; @@ -1908,6 +2139,8 @@ auth_zones_cfg(struct auth_zones* az, struct config_auth* c) z->for_downstream = c->for_downstream; z->for_upstream = c->for_upstream; z->fallback_enabled = c->fallback_enabled; + z->zonemd_check = c->zonemd_check; + z->zonemd_reject_absence = c->zonemd_reject_absence; if(c->isrpz && !z->rpz){ if(!(z->rpz = rpz_create(c))){ fatal_exit("Could not setup RPZ zones"); @@ -1919,6 +2152,16 @@ auth_zones_cfg(struct auth_zones* az, struct config_auth* c) if(az->rpz_first) az->rpz_first->rpz_az_prev = z; az->rpz_first = z; + } else if(c->isrpz && z->rpz) { + if(!rpz_config(z->rpz, c)) { + log_err("Could not change rpz config"); + if(x) { + lock_basic_unlock(&x->lock); + } + lock_rw_unlock(&z->lock); + lock_rw_unlock(&az->rpz_lock); + return 0; + } } if(c->isrpz) { lock_rw_unlock(&az->rpz_lock); @@ -2000,7 +2243,8 @@ az_delete_deleted_zones(struct auth_zones* az) } int auth_zones_apply_cfg(struct auth_zones* az, struct config_file* cfg, - int setup, int* is_rpz) + int setup, int* is_rpz, struct module_env* env, + struct module_stack* mods) { struct config_auth* p; az_setall_deleted(az); @@ -2016,7 +2260,7 @@ int auth_zones_apply_cfg(struct auth_zones* az, struct config_file* cfg, } } az_delete_deleted_zones(az); - if(!auth_zones_read_zones(az, cfg)) + if(!auth_zones_read_zones(az, cfg, env, mods)) return 0; if(setup) { if(!auth_zones_setup_zones(az)) @@ -2241,6 +2485,7 @@ az_find_ce(struct auth_zone* z, struct query_info* qinfo, struct auth_rrset** rrset) { struct auth_data* n = node; + struct auth_rrset* lookrrset; *ce = NULL; *rrset = NULL; if(!node_exact) { @@ -2263,21 +2508,23 @@ az_find_ce(struct auth_zone* z, struct query_info* qinfo, /* see if the current candidate has issues */ /* not zone apex and has type NS */ if(n->namelen != z->namelen && - (*rrset=az_domain_rrset(n, LDNS_RR_TYPE_NS)) && + (lookrrset=az_domain_rrset(n, LDNS_RR_TYPE_NS)) && /* delegate here, but DS at exact the dp has notype */ (qinfo->qtype != LDNS_RR_TYPE_DS || n->namelen != qinfo->qname_len)) { /* referral */ /* this is ce and the lowernode is nonexisting */ *ce = n; - return 0; + *rrset = lookrrset; + node_exact = 0; } /* not equal to qname and has type DNAME */ if(n->namelen != qinfo->qname_len && - (*rrset=az_domain_rrset(n, LDNS_RR_TYPE_DNAME))) { + (lookrrset=az_domain_rrset(n, LDNS_RR_TYPE_DNAME))) { /* this is ce and the lowernode is nonexisting */ *ce = n; - return 0; + *rrset = lookrrset; + node_exact = 0; } if(*ce == NULL && !domain_has_only_nsec3(n)) { @@ -2465,7 +2712,7 @@ create_synth_cname(uint8_t* qname, size_t qname_len, struct regional* region, if(!d) return 0; /* out of memory */ (*cname)->entry.data = d; - d->ttl = 0; /* 0 for synthesized CNAME TTL */ + d->ttl = dname->data->ttl; /* RFC6672: synth CNAME TTL == DNAME TTL */ d->count = 1; d->rrsig_count = 0; d->trust = rrset_trust_ans_noAA; @@ -2522,6 +2769,7 @@ az_change_dnames(struct dns_msg* msg, uint8_t* oldname, uint8_t* newname, == 0) { msg->rep->rrsets[i]->rk.dname = newname; msg->rep->rrsets[i]->rk.dname_len = newlen; + msg->rep->rrsets[i]->entry.hash = rrset_key_hash(&msg->rep->rrsets[i]->rk); } } } @@ -3314,7 +3562,7 @@ auth_error_encode(struct query_info* qinfo, struct module_env* env, if(!inplace_cb_reply_local_call(env, qinfo, NULL, NULL, rcode, edns, repinfo, temp, env->now_tv)) - edns->opt_list = NULL; + edns->opt_list_inplace_cb_out = NULL; error_encode(buf, rcode|BIT_AA, qinfo, *(uint16_t*)sldns_buffer_begin(buf), sldns_buffer_read_u16_at(buf, 2), edns); @@ -3465,7 +3713,7 @@ addr_matches_master(struct auth_master* master, struct sockaddr_storage* addr, /* compare address (but not port number, that is the destination * port of the master, the port number of the received notify is * allowed to by any port on that master) */ - if(extstrtoaddr(master->host, &a, &alen) && + if(extstrtoaddr(master->host, &a, &alen, UNBOUND_DNS_PORT) && sockaddr_cmp_addr(addr, addrlen, &a, alen)==0) { *fromhost = master; return 1; @@ -3733,7 +3981,7 @@ probe_copy_masters_for_allow_notify(struct auth_xfer* xfr) struct auth_master* list = NULL, *last = NULL; struct auth_master* p; /* build up new list with copies */ - for(p = xfr->task_probe->masters; p; p=p->next) { + for(p = xfr->task_transfer->masters; p; p=p->next) { struct auth_master* m = auth_master_copy(p); if(!m) { auth_free_masters(list); @@ -4252,7 +4500,7 @@ chunkline_get_line_collated(struct auth_chunk** chunk, size_t* chunk_pos, return 1; } -/** process $ORIGIN for http */ +/** process $ORIGIN for http, 0 nothing, 1 done, 2 error */ static int http_parse_origin(sldns_buffer* buf, struct sldns_file_parse_state* pstate) { @@ -4263,13 +4511,16 @@ http_parse_origin(sldns_buffer* buf, struct sldns_file_parse_state* pstate) pstate->origin_len = sizeof(pstate->origin); s = sldns_str2wire_dname_buf(sldns_strip_ws(line+8), pstate->origin, &pstate->origin_len); - if(s) pstate->origin_len = 0; + if(s) { + pstate->origin_len = 0; + return 2; + } return 1; } return 0; } -/** process $TTL for http */ +/** process $TTL for http, 0 nothing, 1 done, 2 error */ static int http_parse_ttl(sldns_buffer* buf, struct sldns_file_parse_state* pstate) { @@ -4277,8 +4528,12 @@ http_parse_ttl(sldns_buffer* buf, struct sldns_file_parse_state* pstate) if(strncmp(line, "$TTL", 4) == 0 && isspace((unsigned char)line[4])) { const char* end = NULL; + int overflow = 0; pstate->default_ttl = sldns_str2period( - sldns_strip_ws(line+5), &end); + sldns_strip_ws(line+5), &end, &overflow); + if(overflow) { + return 2; + } return 1; } return 0; @@ -4289,15 +4544,20 @@ static int chunkline_non_comment_RR(struct auth_chunk** chunk, size_t* chunk_pos, sldns_buffer* buf, struct sldns_file_parse_state* pstate) { + int ret; while(chunkline_get_line_collated(chunk, chunk_pos, buf)) { if(chunkline_is_comment_line_or_empty(buf)) { /* a comment, go to next line */ continue; } - if(http_parse_origin(buf, pstate)) { + if((ret=http_parse_origin(buf, pstate))!=0) { + if(ret == 2) + return 0; continue; /* $ORIGIN has been handled */ } - if(http_parse_ttl(buf, pstate)) { + if((ret=http_parse_ttl(buf, pstate))!=0) { + if(ret == 2) + return 0; continue; /* $TTL has been handled */ } return 1; @@ -4803,6 +5063,7 @@ apply_http(struct auth_xfer* xfr, struct auth_zone* z, struct sldns_file_parse_state pstate; struct auth_chunk* chunk; size_t chunk_pos; + int ret; memset(&pstate, 0, sizeof(pstate)); pstate.default_ttl = 3600; if(xfr->namelen < sizeof(pstate.origin)) { @@ -4859,10 +5120,24 @@ apply_http(struct auth_xfer* xfr, struct auth_zone* z, continue; } /* parse line and add RR */ - if(http_parse_origin(scratch_buffer, &pstate)) { + if((ret=http_parse_origin(scratch_buffer, &pstate))!=0) { + if(ret == 2) { + verbose(VERB_ALGO, "error parsing ORIGIN on line [%s:%d] %s", + xfr->task_transfer->master->file, + pstate.lineno, + sldns_buffer_begin(scratch_buffer)); + return 0; + } continue; /* $ORIGIN has been handled */ } - if(http_parse_ttl(scratch_buffer, &pstate)) { + if((ret=http_parse_ttl(scratch_buffer, &pstate))!=0) { + if(ret == 2) { + verbose(VERB_ALGO, "error parsing TTL on line [%s:%d] %s", + xfr->task_transfer->master->file, + pstate.lineno, + sldns_buffer_begin(scratch_buffer)); + return 0; + } continue; /* $TTL has been handled */ } if(!http_parse_add_rr(xfr, z, scratch_buffer, &pstate)) { @@ -4959,6 +5234,9 @@ xfr_write_after_update(struct auth_xfer* xfr, struct module_env* env) lock_rw_unlock(&z->lock); return; } +#ifdef UB_ON_WINDOWS + (void)unlink(zfilename); /* windows does not replace file with rename() */ +#endif if(rename(tmpfile, zfilename) < 0) { log_err("could not rename(%s, %s): %s", tmpfile, zfilename, strerror(errno)); @@ -4969,6 +5247,28 @@ xfr_write_after_update(struct auth_xfer* xfr, struct module_env* env) lock_rw_unlock(&z->lock); } +/** reacquire locks and structures. Starts with no locks, ends + * with xfr and z locks, if fail, no z lock */ +static int xfr_process_reacquire_locks(struct auth_xfer* xfr, + struct module_env* env, struct auth_zone** z) +{ + /* release xfr lock, then, while holding az->lock grab both + * z->lock and xfr->lock */ + lock_rw_rdlock(&env->auth_zones->lock); + *z = auth_zone_find(env->auth_zones, xfr->name, xfr->namelen, + xfr->dclass); + if(!*z) { + lock_rw_unlock(&env->auth_zones->lock); + lock_basic_lock(&xfr->lock); + *z = NULL; + return 0; + } + lock_rw_wrlock(&(*z)->lock); + lock_basic_lock(&xfr->lock); + lock_rw_unlock(&env->auth_zones->lock); + return 1; +} + /** process chunk list and update zone in memory, * return false if it did not work */ static int @@ -4978,21 +5278,12 @@ xfr_process_chunk_list(struct auth_xfer* xfr, struct module_env* env, struct auth_zone* z; /* obtain locks and structures */ - /* release xfr lock, then, while holding az->lock grab both - * z->lock and xfr->lock */ lock_basic_unlock(&xfr->lock); - lock_rw_rdlock(&env->auth_zones->lock); - z = auth_zone_find(env->auth_zones, xfr->name, xfr->namelen, - xfr->dclass); - if(!z) { - lock_rw_unlock(&env->auth_zones->lock); + if(!xfr_process_reacquire_locks(xfr, env, &z)) { /* the zone is gone, ignore xfr results */ - lock_basic_lock(&xfr->lock); return 0; } - lock_rw_wrlock(&z->lock); - lock_basic_lock(&xfr->lock); - lock_rw_unlock(&env->auth_zones->lock); + /* holding xfr and z locks */ /* apply data */ if(xfr->task_transfer->master->http) { @@ -5027,6 +5318,35 @@ xfr_process_chunk_list(struct auth_xfer* xfr, struct module_env* env, " (or malformed RR)", xfr->task_transfer->master->host); return 0; } + + /* release xfr lock while verifying zonemd because it may have + * to spawn lookups in the state machines */ + lock_basic_unlock(&xfr->lock); + /* holding z lock */ + auth_zone_verify_zonemd(z, env, &env->mesh->mods, NULL, 0, 0); + if(z->zone_expired) { + char zname[256]; + /* ZONEMD must have failed */ + /* reacquire locks, so we hold xfr lock on exit of routine, + * and both xfr and z again after releasing xfr for potential + * state machine mesh callbacks */ + lock_rw_unlock(&z->lock); + if(!xfr_process_reacquire_locks(xfr, env, &z)) + return 0; + dname_str(xfr->name, zname); + verbose(VERB_ALGO, "xfr from %s: ZONEMD failed for %s, transfer is failed", xfr->task_transfer->master->host, zname); + xfr->zone_expired = 1; + lock_rw_unlock(&z->lock); + return 0; + } + /* reacquire locks, so we hold xfr lock on exit of routine, + * and both xfr and z again after releasing xfr for potential + * state machine mesh callbacks */ + lock_rw_unlock(&z->lock); + if(!xfr_process_reacquire_locks(xfr, env, &z)) + return 0; + /* holding xfr and z locks */ + if(xfr->have_zone) xfr->lease_time = *env->now; @@ -5075,7 +5395,7 @@ xfr_transfer_lookup_host(struct auth_xfer* xfr, struct module_env* env) struct edns_data edns; sldns_buffer* buf = env->scratch_buffer; if(!master) return 0; - if(extstrtoaddr(master->host, &addr, &addrlen)) { + if(extstrtoaddr(master->host, &addr, &addrlen, UNBOUND_DNS_PORT)) { /* not needed, host is in IP addr format */ return 0; } @@ -5109,8 +5429,12 @@ xfr_transfer_lookup_host(struct auth_xfer* xfr, struct module_env* env) edns.ext_rcode = 0; edns.edns_version = 0; edns.bits = EDNS_DO; - edns.opt_list = NULL; + edns.opt_list_in = NULL; + edns.opt_list_out = NULL; + edns.opt_list_inplace_cb_out = NULL; edns.padding_block_size = 0; + edns.cookie_present = 0; + edns.cookie_valid = 0; if(sldns_buffer_capacity(buf) < 65535) edns.udp_size = (uint16_t)sldns_buffer_capacity(buf); else edns.udp_size = 65535; @@ -5119,7 +5443,7 @@ xfr_transfer_lookup_host(struct auth_xfer* xfr, struct module_env* env) * called straight away */ lock_basic_unlock(&xfr->lock); if(!mesh_new_callback(env->mesh, &qinfo, qflags, &edns, buf, 0, - &auth_xfer_transfer_lookup_callback, xfr)) { + &auth_xfer_transfer_lookup_callback, xfr, 0)) { lock_basic_lock(&xfr->lock); log_err("out of memory lookup up master %s", master->host); return 0; @@ -5188,7 +5512,7 @@ xfr_transfer_init_fetch(struct auth_xfer* xfr, struct module_env* env) xfr->task_transfer->cp = outnet_comm_point_for_http( env->outnet, auth_xfer_transfer_http_callback, xfr, &addr, addrlen, -1, master->ssl, master->host, - master->file); + master->file, env->cfg); if(!xfr->task_transfer->cp) { char zname[255+1], as[256]; dname_str(xfr->name, zname); @@ -5204,13 +5528,15 @@ xfr_transfer_init_fetch(struct auth_xfer* xfr, struct module_env* env) addr_to_str(&addr, addrlen, as, sizeof(as)); verbose(VERB_ALGO, "auth zone %s transfer next HTTP fetch from %s started", zname, as); } + /* Create or refresh the list of allow_notify addrs */ + probe_copy_masters_for_allow_notify(xfr); return 1; } /* perform AXFR/IXFR */ /* set the packet to be written */ /* create new ID */ - xfr->task_transfer->id = (uint16_t)(ub_random(env->rnd)&0xffff); + xfr->task_transfer->id = GET_RANDOM_ID(env->rnd); xfr_create_ixfr_packet(xfr, env->scratch_buffer, xfr->task_transfer->id, master); @@ -6060,7 +6386,7 @@ xfr_probe_send_probe(struct auth_xfer* xfr, struct module_env* env, /* create new ID for new probes, but not on timeout retries, * this means we'll accept replies to previous retries to same ip */ if(timeout == AUTH_PROBE_TIMEOUT) - xfr->task_probe->id = (uint16_t)(ub_random(env->rnd)&0xffff); + xfr->task_probe->id = GET_RANDOM_ID(env->rnd); xfr_create_soa_probe_packet(xfr, env->scratch_buffer, xfr->task_probe->id); /* we need to remove the cp if we have a different ip4/ip6 type now */ @@ -6242,7 +6568,7 @@ auth_xfer_probe_udp_callback(struct comm_point* c, void* arg, int err, comm_point_delete(xfr->task_probe->cp); xfr->task_probe->cp = NULL; - /* if the result was not a successfull probe, we need + /* if the result was not a successful probe, we need * to send the next one */ xfr_probe_nextmaster(xfr); xfr_probe_send_or_end(xfr, env); @@ -6262,7 +6588,7 @@ xfr_probe_lookup_host(struct auth_xfer* xfr, struct module_env* env) struct edns_data edns; sldns_buffer* buf = env->scratch_buffer; if(!master) return 0; - if(extstrtoaddr(master->host, &addr, &addrlen)) { + if(extstrtoaddr(master->host, &addr, &addrlen, UNBOUND_DNS_PORT)) { /* not needed, host is in IP addr format */ return 0; } @@ -6298,8 +6624,12 @@ xfr_probe_lookup_host(struct auth_xfer* xfr, struct module_env* env) edns.ext_rcode = 0; edns.edns_version = 0; edns.bits = EDNS_DO; - edns.opt_list = NULL; + edns.opt_list_in = NULL; + edns.opt_list_out = NULL; + edns.opt_list_inplace_cb_out = NULL; edns.padding_block_size = 0; + edns.cookie_present = 0; + edns.cookie_valid = 0; if(sldns_buffer_capacity(buf) < 65535) edns.udp_size = (uint16_t)sldns_buffer_capacity(buf); else edns.udp_size = 65535; @@ -6308,7 +6638,7 @@ xfr_probe_lookup_host(struct auth_xfer* xfr, struct module_env* env) * called straight away */ lock_basic_unlock(&xfr->lock); if(!mesh_new_callback(env->mesh, &qinfo, qflags, &edns, buf, 0, - &auth_xfer_probe_lookup_callback, xfr)) { + &auth_xfer_probe_lookup_callback, xfr, 0)) { lock_basic_lock(&xfr->lock); log_err("out of memory lookup up master %s", master->host); return 0; @@ -6911,7 +7241,7 @@ parse_url(char* url, char** host, char** file, int* port, int* ssl) while(p && *p == '/') p++; if(!p || p[0] == 0) - *file = strdup("index.html"); + *file = strdup("/"); else *file = strdup(p); if(!*file) { log_err("malloc failure"); @@ -6933,12 +7263,14 @@ xfer_set_masters(struct auth_master** list, struct config_auth* c, if(with_http) for(p = c->urls; p; p = p->next) { m = auth_master_new(&list); + if(!m) return 0; m->http = 1; if(!parse_url(p->str, &m->host, &m->file, &m->port, &m->ssl)) return 0; } for(p = c->masters; p; p = p->next) { m = auth_master_new(&list); + if(!m) return 0; m->ixfr = 1; /* this flag is not configurable */ m->host = strdup(p->str); if(!m->host) { @@ -6948,6 +7280,7 @@ xfer_set_masters(struct auth_master** list, struct config_auth* c, } for(p = c->allow_notify; p; p = p->next) { m = auth_master_new(&list); + if(!m) return 0; m->allow_notify = 1; m->host = strdup(p->str); if(!m->host) { @@ -6972,3 +7305,1243 @@ compare_serial(uint32_t a, uint32_t b) return 1; } } + +int zonemd_hashalgo_supported(int hashalgo) +{ + if(hashalgo == ZONEMD_ALGO_SHA384) return 1; + if(hashalgo == ZONEMD_ALGO_SHA512) return 1; + return 0; +} + +int zonemd_scheme_supported(int scheme) +{ + if(scheme == ZONEMD_SCHEME_SIMPLE) return 1; + return 0; +} + +/** initialize hash for hashing with zonemd hash algo */ +static struct secalgo_hash* zonemd_digest_init(int hashalgo, char** reason) +{ + struct secalgo_hash *h; + if(hashalgo == ZONEMD_ALGO_SHA384) { + /* sha384 */ + h = secalgo_hash_create_sha384(); + if(!h) + *reason = "digest sha384 could not be created"; + return h; + } else if(hashalgo == ZONEMD_ALGO_SHA512) { + /* sha512 */ + h = secalgo_hash_create_sha512(); + if(!h) + *reason = "digest sha512 could not be created"; + return h; + } + /* unknown hash algo */ + *reason = "unsupported algorithm"; + return NULL; +} + +/** update the hash for zonemd */ +static int zonemd_digest_update(int hashalgo, struct secalgo_hash* h, + uint8_t* data, size_t len, char** reason) +{ + if(hashalgo == ZONEMD_ALGO_SHA384) { + if(!secalgo_hash_update(h, data, len)) { + *reason = "digest sha384 failed"; + return 0; + } + return 1; + } else if(hashalgo == ZONEMD_ALGO_SHA512) { + if(!secalgo_hash_update(h, data, len)) { + *reason = "digest sha512 failed"; + return 0; + } + return 1; + } + /* unknown hash algo */ + *reason = "unsupported algorithm"; + return 0; +} + +/** finish the hash for zonemd */ +static int zonemd_digest_finish(int hashalgo, struct secalgo_hash* h, + uint8_t* result, size_t hashlen, size_t* resultlen, char** reason) +{ + if(hashalgo == ZONEMD_ALGO_SHA384) { + if(hashlen < 384/8) { + *reason = "digest buffer too small for sha384"; + return 0; + } + if(!secalgo_hash_final(h, result, hashlen, resultlen)) { + *reason = "digest sha384 finish failed"; + return 0; + } + return 1; + } else if(hashalgo == ZONEMD_ALGO_SHA512) { + if(hashlen < 512/8) { + *reason = "digest buffer too small for sha512"; + return 0; + } + if(!secalgo_hash_final(h, result, hashlen, resultlen)) { + *reason = "digest sha512 finish failed"; + return 0; + } + return 1; + } + /* unknown algo */ + *reason = "unsupported algorithm"; + return 0; +} + +/** add rrsets from node to the list */ +static size_t authdata_rrsets_to_list(struct auth_rrset** array, + size_t arraysize, struct auth_rrset* first) +{ + struct auth_rrset* rrset = first; + size_t num = 0; + while(rrset) { + if(num >= arraysize) + return num; + array[num] = rrset; + num++; + rrset = rrset->next; + } + return num; +} + +/** compare rr list entries */ +static int rrlist_compare(const void* arg1, const void* arg2) +{ + struct auth_rrset* r1 = *(struct auth_rrset**)arg1; + struct auth_rrset* r2 = *(struct auth_rrset**)arg2; + uint16_t t1, t2; + if(r1 == NULL) t1 = LDNS_RR_TYPE_RRSIG; + else t1 = r1->type; + if(r2 == NULL) t2 = LDNS_RR_TYPE_RRSIG; + else t2 = r2->type; + if(t1 < t2) + return -1; + if(t1 > t2) + return 1; + return 0; +} + +/** add type RRSIG to rr list if not one there already, + * this is to perform RRSIG collate processing at that point. */ +static void addrrsigtype_if_needed(struct auth_rrset** array, + size_t arraysize, size_t* rrnum, struct auth_data* node) +{ + if(az_domain_rrset(node, LDNS_RR_TYPE_RRSIG)) + return; /* already one there */ + if((*rrnum) >= arraysize) + return; /* array too small? */ + array[*rrnum] = NULL; /* nothing there, but need entry in list */ + (*rrnum)++; +} + +/** collate the RRs in an RRset using the simple scheme */ +static int zonemd_simple_rrset(struct auth_zone* z, int hashalgo, + struct secalgo_hash* h, struct auth_data* node, + struct auth_rrset* rrset, struct regional* region, + struct sldns_buffer* buf, char** reason) +{ + /* canonicalize */ + struct ub_packed_rrset_key key; + memset(&key, 0, sizeof(key)); + key.entry.key = &key; + key.entry.data = rrset->data; + key.rk.dname = node->name; + key.rk.dname_len = node->namelen; + key.rk.type = htons(rrset->type); + key.rk.rrset_class = htons(z->dclass); + if(!rrset_canonicalize_to_buffer(region, buf, &key)) { + *reason = "out of memory"; + return 0; + } + regional_free_all(region); + + /* hash */ + if(!zonemd_digest_update(hashalgo, h, sldns_buffer_begin(buf), + sldns_buffer_limit(buf), reason)) { + return 0; + } + return 1; +} + +/** count number of RRSIGs in a domain name rrset list */ +static size_t zonemd_simple_count_rrsig(struct auth_rrset* rrset, + struct auth_rrset** rrlist, size_t rrnum, + struct auth_zone* z, struct auth_data* node) +{ + size_t i, count = 0; + if(rrset) { + size_t j; + for(j = 0; j<rrset->data->count; j++) { + if(rrsig_rdata_get_type_covered(rrset->data-> + rr_data[j], rrset->data->rr_len[j]) == + LDNS_RR_TYPE_ZONEMD && + query_dname_compare(z->name, node->name)==0) { + /* omit RRSIGs over type ZONEMD at apex */ + continue; + } + count++; + } + } + for(i=0; i<rrnum; i++) { + if(rrlist[i] && rrlist[i]->type == LDNS_RR_TYPE_ZONEMD && + query_dname_compare(z->name, node->name)==0) { + /* omit RRSIGs over type ZONEMD at apex */ + continue; + } + count += (rrlist[i]?rrlist[i]->data->rrsig_count:0); + } + return count; +} + +/** allocate sparse rrset data for the number of entries in tepm region */ +static int zonemd_simple_rrsig_allocs(struct regional* region, + struct packed_rrset_data* data, size_t count) +{ + data->rr_len = regional_alloc(region, sizeof(*data->rr_len) * count); + if(!data->rr_len) { + return 0; + } + data->rr_ttl = regional_alloc(region, sizeof(*data->rr_ttl) * count); + if(!data->rr_ttl) { + return 0; + } + data->rr_data = regional_alloc(region, sizeof(*data->rr_data) * count); + if(!data->rr_data) { + return 0; + } + return 1; +} + +/** add the RRSIGs from the rrs in the domain into the data */ +static void add_rrlist_rrsigs_into_data(struct packed_rrset_data* data, + size_t* done, struct auth_rrset** rrlist, size_t rrnum, + struct auth_zone* z, struct auth_data* node) +{ + size_t i; + for(i=0; i<rrnum; i++) { + size_t j; + if(!rrlist[i]) + continue; + if(rrlist[i]->type == LDNS_RR_TYPE_ZONEMD && + query_dname_compare(z->name, node->name)==0) { + /* omit RRSIGs over type ZONEMD at apex */ + continue; + } + for(j = 0; j<rrlist[i]->data->rrsig_count; j++) { + data->rr_len[*done] = rrlist[i]->data->rr_len[rrlist[i]->data->count + j]; + data->rr_ttl[*done] = rrlist[i]->data->rr_ttl[rrlist[i]->data->count + j]; + /* reference the rdata in the rrset, no need to + * copy it, it is no longer needed at the end of + * the routine */ + data->rr_data[*done] = rrlist[i]->data->rr_data[rrlist[i]->data->count + j]; + (*done)++; + } + } +} + +static void add_rrset_into_data(struct packed_rrset_data* data, + size_t* done, struct auth_rrset* rrset, + struct auth_zone* z, struct auth_data* node) +{ + if(rrset) { + size_t j; + for(j = 0; j<rrset->data->count; j++) { + if(rrsig_rdata_get_type_covered(rrset->data-> + rr_data[j], rrset->data->rr_len[j]) == + LDNS_RR_TYPE_ZONEMD && + query_dname_compare(z->name, node->name)==0) { + /* omit RRSIGs over type ZONEMD at apex */ + continue; + } + data->rr_len[*done] = rrset->data->rr_len[j]; + data->rr_ttl[*done] = rrset->data->rr_ttl[j]; + /* reference the rdata in the rrset, no need to + * copy it, it is no longer need at the end of + * the routine */ + data->rr_data[*done] = rrset->data->rr_data[j]; + (*done)++; + } + } +} + +/** collate the RRSIGs using the simple scheme */ +static int zonemd_simple_rrsig(struct auth_zone* z, int hashalgo, + struct secalgo_hash* h, struct auth_data* node, + struct auth_rrset* rrset, struct auth_rrset** rrlist, size_t rrnum, + struct regional* region, struct sldns_buffer* buf, char** reason) +{ + /* the rrset pointer can be NULL, this means it is type RRSIG and + * there is no ordinary type RRSIG there. The RRSIGs are stored + * with the RRsets in their data. + * + * The RRset pointer can be nonNULL. This happens if there is + * no RR that is covered by the RRSIG for the domain. Then this + * RRSIG RR is stored in an rrset of type RRSIG. The other RRSIGs + * are stored in the rrset entries for the RRs in the rr list for + * the domain node. We need to collate the rrset's data, if any, and + * the rrlist's rrsigs */ + /* if this is the apex, omit RRSIGs that cover type ZONEMD */ + /* build rrsig rrset */ + size_t done = 0; + struct ub_packed_rrset_key key; + struct packed_rrset_data data; + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + key.entry.key = &key; + key.entry.data = &data; + key.rk.dname = node->name; + key.rk.dname_len = node->namelen; + key.rk.type = htons(LDNS_RR_TYPE_RRSIG); + key.rk.rrset_class = htons(z->dclass); + data.count = zonemd_simple_count_rrsig(rrset, rrlist, rrnum, z, node); + if(!zonemd_simple_rrsig_allocs(region, &data, data.count)) { + *reason = "out of memory"; + regional_free_all(region); + return 0; + } + /* all the RRSIGs stored in the other rrsets for this domain node */ + add_rrlist_rrsigs_into_data(&data, &done, rrlist, rrnum, z, node); + /* plus the RRSIGs stored in an rrset of type RRSIG for this node */ + add_rrset_into_data(&data, &done, rrset, z, node); + + /* canonicalize */ + if(!rrset_canonicalize_to_buffer(region, buf, &key)) { + *reason = "out of memory"; + regional_free_all(region); + return 0; + } + regional_free_all(region); + + /* hash */ + if(!zonemd_digest_update(hashalgo, h, sldns_buffer_begin(buf), + sldns_buffer_limit(buf), reason)) { + return 0; + } + return 1; +} + +/** collate a domain's rrsets using the simple scheme */ +static int zonemd_simple_domain(struct auth_zone* z, int hashalgo, + struct secalgo_hash* h, struct auth_data* node, + struct regional* region, struct sldns_buffer* buf, char** reason) +{ + const size_t rrlistsize = 65536; + struct auth_rrset* rrlist[rrlistsize]; + size_t i, rrnum = 0; + /* see if the domain is out of scope, the zone origin, + * that would be omitted */ + if(!dname_subdomain_c(node->name, z->name)) + return 1; /* continue */ + /* loop over the rrsets in ascending order. */ + rrnum = authdata_rrsets_to_list(rrlist, rrlistsize, node->rrsets); + addrrsigtype_if_needed(rrlist, rrlistsize, &rrnum, node); + qsort(rrlist, rrnum, sizeof(*rrlist), rrlist_compare); + for(i=0; i<rrnum; i++) { + if(rrlist[i] && rrlist[i]->type == LDNS_RR_TYPE_ZONEMD && + query_dname_compare(z->name, node->name) == 0) { + /* omit type ZONEMD at apex */ + continue; + } + if(rrlist[i] == NULL || rrlist[i]->type == + LDNS_RR_TYPE_RRSIG) { + if(!zonemd_simple_rrsig(z, hashalgo, h, node, + rrlist[i], rrlist, rrnum, region, buf, reason)) + return 0; + } else if(!zonemd_simple_rrset(z, hashalgo, h, node, + rrlist[i], region, buf, reason)) { + return 0; + } + } + return 1; +} + +/** collate the zone using the simple scheme */ +static int zonemd_simple_collate(struct auth_zone* z, int hashalgo, + struct secalgo_hash* h, struct regional* region, + struct sldns_buffer* buf, char** reason) +{ + /* our tree is sorted in canonical order, so we can just loop over + * the tree */ + struct auth_data* n; + RBTREE_FOR(n, struct auth_data*, &z->data) { + if(!zonemd_simple_domain(z, hashalgo, h, n, region, buf, + reason)) + return 0; + } + return 1; +} + +int auth_zone_generate_zonemd_hash(struct auth_zone* z, int scheme, + int hashalgo, uint8_t* hash, size_t hashlen, size_t* resultlen, + struct regional* region, struct sldns_buffer* buf, char** reason) +{ + struct secalgo_hash* h = zonemd_digest_init(hashalgo, reason); + if(!h) { + if(!*reason) + *reason = "digest init fail"; + return 0; + } + if(scheme == ZONEMD_SCHEME_SIMPLE) { + if(!zonemd_simple_collate(z, hashalgo, h, region, buf, reason)) { + if(!*reason) *reason = "scheme simple collate fail"; + secalgo_hash_delete(h); + return 0; + } + } + if(!zonemd_digest_finish(hashalgo, h, hash, hashlen, resultlen, + reason)) { + secalgo_hash_delete(h); + *reason = "digest finish fail"; + return 0; + } + secalgo_hash_delete(h); + return 1; +} + +int auth_zone_generate_zonemd_check(struct auth_zone* z, int scheme, + int hashalgo, uint8_t* hash, size_t hashlen, struct regional* region, + struct sldns_buffer* buf, char** reason) +{ + uint8_t gen[512]; + size_t genlen = 0; + *reason = NULL; + if(!zonemd_hashalgo_supported(hashalgo)) { + /* allow it */ + *reason = "unsupported algorithm"; + return 1; + } + if(!zonemd_scheme_supported(scheme)) { + /* allow it */ + *reason = "unsupported scheme"; + return 1; + } + if(hashlen < 12) { + /* the ZONEMD draft requires digests to fail if too small */ + *reason = "digest length too small, less than 12"; + return 0; + } + /* generate digest */ + if(!auth_zone_generate_zonemd_hash(z, scheme, hashalgo, gen, + sizeof(gen), &genlen, region, buf, reason)) { + /* reason filled in by zonemd hash routine */ + return 0; + } + /* check digest length */ + if(hashlen != genlen) { + *reason = "incorrect digest length"; + if(verbosity >= VERB_ALGO) { + verbose(VERB_ALGO, "zonemd scheme=%d hashalgo=%d", + scheme, hashalgo); + log_hex("ZONEMD should be ", gen, genlen); + log_hex("ZONEMD to check is", hash, hashlen); + } + return 0; + } + /* check digest */ + if(memcmp(hash, gen, genlen) != 0) { + *reason = "incorrect digest"; + if(verbosity >= VERB_ALGO) { + verbose(VERB_ALGO, "zonemd scheme=%d hashalgo=%d", + scheme, hashalgo); + log_hex("ZONEMD should be ", gen, genlen); + log_hex("ZONEMD to check is", hash, hashlen); + } + return 0; + } + return 1; +} + +/** log auth zone message with zone name in front. */ +static void auth_zone_log(uint8_t* name, enum verbosity_value level, + const char* format, ...) ATTR_FORMAT(printf, 3, 4); +static void auth_zone_log(uint8_t* name, enum verbosity_value level, + const char* format, ...) +{ + va_list args; + va_start(args, format); + if(verbosity >= level) { + char str[255+1]; + char msg[MAXSYSLOGMSGLEN]; + dname_str(name, str); + vsnprintf(msg, sizeof(msg), format, args); + verbose(level, "auth zone %s %s", str, msg); + } + va_end(args); +} + +/** ZONEMD, dnssec verify the rrset with the dnskey */ +static int zonemd_dnssec_verify_rrset(struct auth_zone* z, + struct module_env* env, struct module_stack* mods, + struct ub_packed_rrset_key* dnskey, struct auth_data* node, + struct auth_rrset* rrset, char** why_bogus, uint8_t* sigalg) +{ + struct ub_packed_rrset_key pk; + enum sec_status sec; + struct val_env* ve; + int m; + int verified = 0; + m = modstack_find(mods, "validator"); + if(m == -1) { + auth_zone_log(z->name, VERB_ALGO, "zonemd dnssec verify: have " + "DNSKEY chain of trust, but no validator module"); + return 0; + } + ve = (struct val_env*)env->modinfo[m]; + + memset(&pk, 0, sizeof(pk)); + pk.entry.key = &pk; + pk.entry.data = rrset->data; + pk.rk.dname = node->name; + pk.rk.dname_len = node->namelen; + pk.rk.type = htons(rrset->type); + pk.rk.rrset_class = htons(z->dclass); + if(verbosity >= VERB_ALGO) { + char typestr[32]; + typestr[0]=0; + sldns_wire2str_type_buf(rrset->type, typestr, sizeof(typestr)); + auth_zone_log(z->name, VERB_ALGO, + "zonemd: verify %s RRset with DNSKEY", typestr); + } + sec = dnskeyset_verify_rrset(env, ve, &pk, dnskey, sigalg, why_bogus, NULL, + LDNS_SECTION_ANSWER, NULL, &verified); + if(sec == sec_status_secure) { + return 1; + } + if(why_bogus) + auth_zone_log(z->name, VERB_ALGO, "DNSSEC verify was bogus: %s", *why_bogus); + return 0; +} + +/** check for nsec3, the RR with params equal, if bitmap has the type */ +static int nsec3_of_param_has_type(struct auth_rrset* nsec3, int algo, + size_t iter, uint8_t* salt, size_t saltlen, uint16_t rrtype) +{ + int i, count = (int)nsec3->data->count; + struct ub_packed_rrset_key pk; + memset(&pk, 0, sizeof(pk)); + pk.entry.data = nsec3->data; + for(i=0; i<count; i++) { + int rralgo; + size_t rriter, rrsaltlen; + uint8_t* rrsalt; + if(!nsec3_get_params(&pk, i, &rralgo, &rriter, &rrsalt, + &rrsaltlen)) + continue; /* no parameters, malformed */ + if(rralgo != algo || rriter != iter || rrsaltlen != saltlen) + continue; /* different parameters */ + if(saltlen != 0) { + if(rrsalt == NULL || salt == NULL) + continue; + if(memcmp(rrsalt, salt, saltlen) != 0) + continue; /* different salt parameters */ + } + if(nsec3_has_type(&pk, i, rrtype)) + return 1; + } + return 0; +} + +/** Verify the absence of ZONEMD with DNSSEC by checking NSEC, NSEC3 type flag. + * return false on failure, reason contains description of failure. */ +static int zonemd_check_dnssec_absence(struct auth_zone* z, + struct module_env* env, struct module_stack* mods, + struct ub_packed_rrset_key* dnskey, struct auth_data* apex, + char** reason, char** why_bogus, uint8_t* sigalg) +{ + struct auth_rrset* nsec = NULL; + if(!apex) { + *reason = "zone has no apex domain but ZONEMD missing"; + return 0; + } + nsec = az_domain_rrset(apex, LDNS_RR_TYPE_NSEC); + if(nsec) { + struct ub_packed_rrset_key pk; + /* dnssec verify the NSEC */ + if(!zonemd_dnssec_verify_rrset(z, env, mods, dnskey, apex, + nsec, why_bogus, sigalg)) { + *reason = "DNSSEC verify failed for NSEC RRset"; + return 0; + } + /* check type bitmap */ + memset(&pk, 0, sizeof(pk)); + pk.entry.data = nsec->data; + if(nsec_has_type(&pk, LDNS_RR_TYPE_ZONEMD)) { + *reason = "DNSSEC NSEC bitmap says type ZONEMD exists"; + return 0; + } + auth_zone_log(z->name, VERB_ALGO, "zonemd DNSSEC NSEC verification of absence of ZONEMD secure"); + } else { + /* NSEC3 perhaps ? */ + int algo; + size_t iter, saltlen; + uint8_t* salt; + struct auth_rrset* nsec3param = az_domain_rrset(apex, + LDNS_RR_TYPE_NSEC3PARAM); + struct auth_data* match; + struct auth_rrset* nsec3; + if(!nsec3param) { + *reason = "zone has no NSEC information but ZONEMD missing"; + return 0; + } + if(!az_nsec3_param(z, &algo, &iter, &salt, &saltlen)) { + *reason = "zone has no NSEC information but ZONEMD missing"; + return 0; + } + /* find the NSEC3 record */ + match = az_nsec3_find_exact(z, z->name, z->namelen, algo, + iter, salt, saltlen); + if(!match) { + *reason = "zone has no NSEC3 domain for the apex but ZONEMD missing"; + return 0; + } + nsec3 = az_domain_rrset(match, LDNS_RR_TYPE_NSEC3); + if(!nsec3) { + *reason = "zone has no NSEC3 RRset for the apex but ZONEMD missing"; + return 0; + } + /* dnssec verify the NSEC3 */ + if(!zonemd_dnssec_verify_rrset(z, env, mods, dnskey, match, + nsec3, why_bogus, sigalg)) { + *reason = "DNSSEC verify failed for NSEC3 RRset"; + return 0; + } + /* check type bitmap */ + if(nsec3_of_param_has_type(nsec3, algo, iter, salt, saltlen, + LDNS_RR_TYPE_ZONEMD)) { + *reason = "DNSSEC NSEC3 bitmap says type ZONEMD exists"; + return 0; + } + auth_zone_log(z->name, VERB_ALGO, "zonemd DNSSEC NSEC3 verification of absence of ZONEMD secure"); + } + + return 1; +} + +/** Verify the SOA and ZONEMD DNSSEC signatures. + * return false on failure, reason contains description of failure. */ +static int zonemd_check_dnssec_soazonemd(struct auth_zone* z, + struct module_env* env, struct module_stack* mods, + struct ub_packed_rrset_key* dnskey, struct auth_data* apex, + struct auth_rrset* zonemd_rrset, char** reason, char** why_bogus, + uint8_t* sigalg) +{ + struct auth_rrset* soa; + if(!apex) { + *reason = "zone has no apex domain"; + return 0; + } + soa = az_domain_rrset(apex, LDNS_RR_TYPE_SOA); + if(!soa) { + *reason = "zone has no SOA RRset"; + return 0; + } + if(!zonemd_dnssec_verify_rrset(z, env, mods, dnskey, apex, soa, + why_bogus, sigalg)) { + *reason = "DNSSEC verify failed for SOA RRset"; + return 0; + } + if(!zonemd_dnssec_verify_rrset(z, env, mods, dnskey, apex, + zonemd_rrset, why_bogus, sigalg)) { + *reason = "DNSSEC verify failed for ZONEMD RRset"; + return 0; + } + auth_zone_log(z->name, VERB_ALGO, "zonemd DNSSEC verification of SOA and ZONEMD RRsets secure"); + return 1; +} + +/** + * Fail the ZONEMD verification. + * @param z: auth zone that fails. + * @param env: environment with config, to ignore failure or not. + * @param reason: failure string description. + * @param why_bogus: failure string for DNSSEC verification failure. + * @param result: strdup result in here if not NULL. + */ +static void auth_zone_zonemd_fail(struct auth_zone* z, struct module_env* env, + char* reason, char* why_bogus, char** result) +{ + char zstr[255+1]; + /* if fail: log reason, and depending on config also take action + * and drop the zone, eg. it is gone from memory, set zone_expired */ + dname_str(z->name, zstr); + if(!reason) reason = "verification failed"; + if(result) { + if(why_bogus) { + char res[1024]; + snprintf(res, sizeof(res), "%s: %s", reason, + why_bogus); + *result = strdup(res); + } else { + *result = strdup(reason); + } + if(!*result) log_err("out of memory"); + } else { + log_warn("auth zone %s: ZONEMD verification failed: %s", zstr, reason); + } + + if(env->cfg->zonemd_permissive_mode) { + verbose(VERB_ALGO, "zonemd-permissive-mode enabled, " + "not blocking zone %s", zstr); + return; + } + + /* expired means the zone gives servfail and is not used by + * lookup if fallback_enabled*/ + z->zone_expired = 1; +} + +/** + * Verify the zonemd with DNSSEC and hash check, with given key. + * @param z: auth zone. + * @param env: environment with config and temp buffers. + * @param mods: module stack with validator env for verification. + * @param dnskey: dnskey that we can use, or NULL. If nonnull, the key + * has been verified and is the start of the chain of trust. + * @param is_insecure: if true, the dnskey is not used, the zone is insecure. + * And dnssec is not used. It is DNSSEC secure insecure or not under + * a trust anchor. + * @param sigalg: if nonNULL provide algorithm downgrade protection. + * Otherwise one algorithm is enough. Must have space of ALGO_NEEDS_MAX+1. + * @param result: if not NULL result reason copied here. + */ +static void +auth_zone_verify_zonemd_with_key(struct auth_zone* z, struct module_env* env, + struct module_stack* mods, struct ub_packed_rrset_key* dnskey, + int is_insecure, char** result, uint8_t* sigalg) +{ + char* reason = NULL, *why_bogus = NULL; + struct auth_data* apex = NULL; + struct auth_rrset* zonemd_rrset = NULL; + int zonemd_absent = 0, zonemd_absence_dnssecok = 0; + + /* see if ZONEMD is present or absent. */ + apex = az_find_name(z, z->name, z->namelen); + if(!apex) { + zonemd_absent = 1; + } else { + zonemd_rrset = az_domain_rrset(apex, LDNS_RR_TYPE_ZONEMD); + if(!zonemd_rrset || zonemd_rrset->data->count==0) { + zonemd_absent = 1; + zonemd_rrset = NULL; + } + } + + /* if no DNSSEC, done. */ + /* if no ZONEMD, and DNSSEC, use DNSKEY to verify NSEC or NSEC3 for + * zone apex. Check ZONEMD bit is turned off or else fail */ + /* if ZONEMD, and DNSSEC, check DNSSEC signature on SOA and ZONEMD, + * or else fail */ + if(!dnskey && !is_insecure) { + auth_zone_zonemd_fail(z, env, "DNSKEY missing", NULL, result); + return; + } else if(!zonemd_rrset && dnskey && !is_insecure) { + /* fetch, DNSSEC verify, and check NSEC/NSEC3 */ + if(!zonemd_check_dnssec_absence(z, env, mods, dnskey, apex, + &reason, &why_bogus, sigalg)) { + auth_zone_zonemd_fail(z, env, reason, why_bogus, result); + return; + } + zonemd_absence_dnssecok = 1; + } else if(zonemd_rrset && dnskey && !is_insecure) { + /* check DNSSEC verify of SOA and ZONEMD */ + if(!zonemd_check_dnssec_soazonemd(z, env, mods, dnskey, apex, + zonemd_rrset, &reason, &why_bogus, sigalg)) { + auth_zone_zonemd_fail(z, env, reason, why_bogus, result); + return; + } + } + + if(zonemd_absent && z->zonemd_reject_absence) { + auth_zone_zonemd_fail(z, env, "ZONEMD absent and that is not allowed by config", NULL, result); + return; + } + if(zonemd_absent && zonemd_absence_dnssecok) { + auth_zone_log(z->name, VERB_ALGO, "DNSSEC verified nonexistence of ZONEMD"); + if(result) { + *result = strdup("DNSSEC verified nonexistence of ZONEMD"); + if(!*result) log_err("out of memory"); + } + return; + } + if(zonemd_absent) { + auth_zone_log(z->name, VERB_ALGO, "no ZONEMD present"); + if(result) { + *result = strdup("no ZONEMD present"); + if(!*result) log_err("out of memory"); + } + return; + } + + /* check ZONEMD checksum and report or else fail. */ + if(!auth_zone_zonemd_check_hash(z, env, &reason)) { + auth_zone_zonemd_fail(z, env, reason, NULL, result); + return; + } + + /* success! log the success */ + if(reason) + auth_zone_log(z->name, VERB_ALGO, "ZONEMD %s", reason); + else auth_zone_log(z->name, VERB_ALGO, "ZONEMD verification successful"); + if(result) { + if(reason) + *result = strdup(reason); + else *result = strdup("ZONEMD verification successful"); + if(!*result) log_err("out of memory"); + } +} + +/** + * verify the zone DNSKEY rrset from the trust anchor + * This is possible because the anchor is for the zone itself, and can + * thus apply straight to the zone DNSKEY set. + * @param z: the auth zone. + * @param env: environment with time and temp buffers. + * @param mods: module stack for validator environment for dnssec validation. + * @param anchor: trust anchor to use + * @param is_insecure: returned, true if the zone is securely insecure. + * @param why_bogus: if the routine fails, returns the failure reason. + * @param keystorage: where to store the ub_packed_rrset_key that is created + * on success. A pointer to it is returned on success. + * @return the dnskey RRset, reference to zone data and keystorage, or + * NULL on failure. + */ +static struct ub_packed_rrset_key* +zonemd_get_dnskey_from_anchor(struct auth_zone* z, struct module_env* env, + struct module_stack* mods, struct trust_anchor* anchor, + int* is_insecure, char** why_bogus, + struct ub_packed_rrset_key* keystorage) +{ + struct auth_data* apex; + struct auth_rrset* dnskey_rrset; + enum sec_status sec; + struct val_env* ve; + int m; + + apex = az_find_name(z, z->name, z->namelen); + if(!apex) { + *why_bogus = "have trust anchor, but zone has no apex domain for DNSKEY"; + return 0; + } + dnskey_rrset = az_domain_rrset(apex, LDNS_RR_TYPE_DNSKEY); + if(!dnskey_rrset || dnskey_rrset->data->count==0) { + *why_bogus = "have trust anchor, but zone has no DNSKEY"; + return 0; + } + + m = modstack_find(mods, "validator"); + if(m == -1) { + *why_bogus = "have trust anchor, but no validator module"; + return 0; + } + ve = (struct val_env*)env->modinfo[m]; + + memset(keystorage, 0, sizeof(*keystorage)); + keystorage->entry.key = keystorage; + keystorage->entry.data = dnskey_rrset->data; + keystorage->rk.dname = apex->name; + keystorage->rk.dname_len = apex->namelen; + keystorage->rk.type = htons(LDNS_RR_TYPE_DNSKEY); + keystorage->rk.rrset_class = htons(z->dclass); + auth_zone_log(z->name, VERB_QUERY, + "zonemd: verify DNSKEY RRset with trust anchor"); + sec = val_verify_DNSKEY_with_TA(env, ve, keystorage, anchor->ds_rrset, + anchor->dnskey_rrset, NULL, why_bogus, NULL, NULL); + regional_free_all(env->scratch); + if(sec == sec_status_secure) { + /* success */ + *is_insecure = 0; + return keystorage; + } else if(sec == sec_status_insecure) { + /* insecure */ + *is_insecure = 1; + } else { + /* bogus */ + *is_insecure = 0; + auth_zone_log(z->name, VERB_ALGO, + "zonemd: verify DNSKEY RRset with trust anchor failed: %s", *why_bogus); + } + return NULL; +} + +/** verify the DNSKEY from the zone with looked up DS record */ +static struct ub_packed_rrset_key* +auth_zone_verify_zonemd_key_with_ds(struct auth_zone* z, + struct module_env* env, struct module_stack* mods, + struct ub_packed_rrset_key* ds, int* is_insecure, char** why_bogus, + struct ub_packed_rrset_key* keystorage, uint8_t* sigalg) +{ + struct auth_data* apex; + struct auth_rrset* dnskey_rrset; + enum sec_status sec; + struct val_env* ve; + int m; + + /* fetch DNSKEY from zone data */ + apex = az_find_name(z, z->name, z->namelen); + if(!apex) { + *why_bogus = "in verifywithDS, zone has no apex"; + return NULL; + } + dnskey_rrset = az_domain_rrset(apex, LDNS_RR_TYPE_DNSKEY); + if(!dnskey_rrset || dnskey_rrset->data->count==0) { + *why_bogus = "in verifywithDS, zone has no DNSKEY"; + return NULL; + } + + m = modstack_find(mods, "validator"); + if(m == -1) { + *why_bogus = "in verifywithDS, have no validator module"; + return NULL; + } + ve = (struct val_env*)env->modinfo[m]; + + memset(keystorage, 0, sizeof(*keystorage)); + keystorage->entry.key = keystorage; + keystorage->entry.data = dnskey_rrset->data; + keystorage->rk.dname = apex->name; + keystorage->rk.dname_len = apex->namelen; + keystorage->rk.type = htons(LDNS_RR_TYPE_DNSKEY); + keystorage->rk.rrset_class = htons(z->dclass); + auth_zone_log(z->name, VERB_QUERY, "zonemd: verify zone DNSKEY with DS"); + sec = val_verify_DNSKEY_with_DS(env, ve, keystorage, ds, sigalg, + why_bogus, NULL, NULL); + regional_free_all(env->scratch); + if(sec == sec_status_secure) { + /* success */ + return keystorage; + } else if(sec == sec_status_insecure) { + /* insecure */ + *is_insecure = 1; + } else { + /* bogus */ + *is_insecure = 0; + if(*why_bogus == NULL) + *why_bogus = "verify failed"; + auth_zone_log(z->name, VERB_ALGO, + "zonemd: verify DNSKEY RRset with DS failed: %s", + *why_bogus); + } + return NULL; +} + +/** callback for ZONEMD lookup of DNSKEY */ +void auth_zonemd_dnskey_lookup_callback(void* arg, int rcode, sldns_buffer* buf, + enum sec_status sec, char* why_bogus, int ATTR_UNUSED(was_ratelimited)) +{ + struct auth_zone* z = (struct auth_zone*)arg; + struct module_env* env; + char* reason = NULL, *ds_bogus = NULL, *typestr="DNSKEY"; + struct ub_packed_rrset_key* dnskey = NULL, *ds = NULL; + int is_insecure = 0, downprot; + struct ub_packed_rrset_key keystorage; + uint8_t sigalg[ALGO_NEEDS_MAX+1]; + + lock_rw_wrlock(&z->lock); + env = z->zonemd_callback_env; + /* release the env variable so another worker can pick up the + * ZONEMD verification task if it wants to */ + z->zonemd_callback_env = NULL; + if(!env || env->outnet->want_to_quit || z->zone_deleted) { + lock_rw_unlock(&z->lock); + return; /* stop on quit */ + } + if(z->zonemd_callback_qtype == LDNS_RR_TYPE_DS) + typestr = "DS"; + downprot = env->cfg->harden_algo_downgrade; + + /* process result */ + if(sec == sec_status_bogus) { + reason = why_bogus; + if(!reason) { + if(z->zonemd_callback_qtype == LDNS_RR_TYPE_DNSKEY) + reason = "lookup of DNSKEY was bogus"; + else reason = "lookup of DS was bogus"; + } + auth_zone_log(z->name, VERB_ALGO, + "zonemd lookup of %s was bogus: %s", typestr, reason); + } else if(rcode == LDNS_RCODE_NOERROR) { + uint16_t wanted_qtype = z->zonemd_callback_qtype; + struct regional* temp = env->scratch; + struct query_info rq; + struct reply_info* rep; + memset(&rq, 0, sizeof(rq)); + rep = parse_reply_in_temp_region(buf, temp, &rq); + if(rep && rq.qtype == wanted_qtype && + query_dname_compare(z->name, rq.qname) == 0 && + FLAGS_GET_RCODE(rep->flags) == LDNS_RCODE_NOERROR) { + /* parsed successfully */ + struct ub_packed_rrset_key* answer = + reply_find_answer_rrset(&rq, rep); + if(answer && sec == sec_status_secure) { + if(z->zonemd_callback_qtype == LDNS_RR_TYPE_DNSKEY) + dnskey = answer; + else ds = answer; + auth_zone_log(z->name, VERB_ALGO, + "zonemd lookup of %s was secure", typestr); + } else if(sec == sec_status_secure && !answer) { + is_insecure = 1; + auth_zone_log(z->name, VERB_ALGO, + "zonemd lookup of %s has no content, but is secure, treat as insecure", typestr); + } else if(sec == sec_status_insecure) { + is_insecure = 1; + auth_zone_log(z->name, VERB_ALGO, + "zonemd lookup of %s was insecure", typestr); + } else if(sec == sec_status_indeterminate) { + is_insecure = 1; + auth_zone_log(z->name, VERB_ALGO, + "zonemd lookup of %s was indeterminate, treat as insecure", typestr); + } else { + auth_zone_log(z->name, VERB_ALGO, + "zonemd lookup of %s has nodata", typestr); + if(z->zonemd_callback_qtype == LDNS_RR_TYPE_DNSKEY) + reason = "lookup of DNSKEY has nodata"; + else reason = "lookup of DS has nodata"; + } + } else if(rep && rq.qtype == wanted_qtype && + query_dname_compare(z->name, rq.qname) == 0 && + FLAGS_GET_RCODE(rep->flags) == LDNS_RCODE_NXDOMAIN && + sec == sec_status_secure) { + /* secure nxdomain, so the zone is like some RPZ zone + * that does not exist in the wider internet, with + * a secure nxdomain answer outside of it. So we + * treat the zonemd zone without a dnssec chain of + * trust, as insecure. */ + is_insecure = 1; + auth_zone_log(z->name, VERB_ALGO, + "zonemd lookup of %s was secure NXDOMAIN, treat as insecure", typestr); + } else if(rep && rq.qtype == wanted_qtype && + query_dname_compare(z->name, rq.qname) == 0 && + FLAGS_GET_RCODE(rep->flags) == LDNS_RCODE_NXDOMAIN && + sec == sec_status_insecure) { + is_insecure = 1; + auth_zone_log(z->name, VERB_ALGO, + "zonemd lookup of %s was insecure NXDOMAIN, treat as insecure", typestr); + } else if(rep && rq.qtype == wanted_qtype && + query_dname_compare(z->name, rq.qname) == 0 && + FLAGS_GET_RCODE(rep->flags) == LDNS_RCODE_NXDOMAIN && + sec == sec_status_indeterminate) { + is_insecure = 1; + auth_zone_log(z->name, VERB_ALGO, + "zonemd lookup of %s was indeterminate NXDOMAIN, treat as insecure", typestr); + } else { + auth_zone_log(z->name, VERB_ALGO, + "zonemd lookup of %s has no answer", typestr); + if(z->zonemd_callback_qtype == LDNS_RR_TYPE_DNSKEY) + reason = "lookup of DNSKEY has no answer"; + else reason = "lookup of DS has no answer"; + } + } else { + auth_zone_log(z->name, VERB_ALGO, + "zonemd lookup of %s failed", typestr); + if(z->zonemd_callback_qtype == LDNS_RR_TYPE_DNSKEY) + reason = "lookup of DNSKEY failed"; + else reason = "lookup of DS failed"; + } + + if(!reason && !is_insecure && !dnskey && ds) { + dnskey = auth_zone_verify_zonemd_key_with_ds(z, env, + &env->mesh->mods, ds, &is_insecure, &ds_bogus, + &keystorage, downprot?sigalg:NULL); + if(!dnskey && !is_insecure && !reason) + reason = "DNSKEY verify with DS failed"; + } + + if(reason) { + auth_zone_zonemd_fail(z, env, reason, ds_bogus, NULL); + lock_rw_unlock(&z->lock); + return; + } + + auth_zone_verify_zonemd_with_key(z, env, &env->mesh->mods, dnskey, + is_insecure, NULL, downprot?sigalg:NULL); + regional_free_all(env->scratch); + lock_rw_unlock(&z->lock); +} + +/** lookup DNSKEY for ZONEMD verification */ +static int +zonemd_lookup_dnskey(struct auth_zone* z, struct module_env* env) +{ + struct query_info qinfo; + uint16_t qflags = BIT_RD; + struct edns_data edns; + sldns_buffer* buf = env->scratch_buffer; + int fetch_ds = 0; + + if(!z->fallback_enabled) { + /* we cannot actually get the DNSKEY, because it is in the + * zone we have ourselves, and it is not served yet + * (possibly), so fetch type DS */ + fetch_ds = 1; + } + if(z->zonemd_callback_env) { + /* another worker is already working on the callback + * for the DNSKEY lookup for ZONEMD verification. + * We do not also have to do ZONEMD verification, let that + * worker do it */ + auth_zone_log(z->name, VERB_ALGO, + "zonemd needs lookup of %s and that already is worked on by another worker", (fetch_ds?"DS":"DNSKEY")); + return 1; + } + + /* use mesh_new_callback to lookup the DNSKEY, + * and then wait for them to be looked up (in cache, or query) */ + qinfo.qname_len = z->namelen; + qinfo.qname = z->name; + qinfo.qclass = z->dclass; + if(fetch_ds) + qinfo.qtype = LDNS_RR_TYPE_DS; + else qinfo.qtype = LDNS_RR_TYPE_DNSKEY; + qinfo.local_alias = NULL; + if(verbosity >= VERB_ALGO) { + char buf1[512]; + char buf2[LDNS_MAX_DOMAINLEN+1]; + dname_str(z->name, buf2); + snprintf(buf1, sizeof(buf1), "auth zone %s: lookup %s " + "for zonemd verification", buf2, + (fetch_ds?"DS":"DNSKEY")); + log_query_info(VERB_ALGO, buf1, &qinfo); + } + edns.edns_present = 1; + edns.ext_rcode = 0; + edns.edns_version = 0; + edns.bits = EDNS_DO; + edns.opt_list_in = NULL; + edns.opt_list_out = NULL; + edns.opt_list_inplace_cb_out = NULL; + if(sldns_buffer_capacity(buf) < 65535) + edns.udp_size = (uint16_t)sldns_buffer_capacity(buf); + else edns.udp_size = 65535; + + /* store the worker-specific module env for the callback. + * We can then reference this when the callback executes */ + z->zonemd_callback_env = env; + z->zonemd_callback_qtype = qinfo.qtype; + /* the callback can be called straight away */ + lock_rw_unlock(&z->lock); + if(!mesh_new_callback(env->mesh, &qinfo, qflags, &edns, buf, 0, + &auth_zonemd_dnskey_lookup_callback, z, 0)) { + lock_rw_wrlock(&z->lock); + log_err("out of memory lookup of %s for zonemd", + (fetch_ds?"DS":"DNSKEY")); + return 0; + } + lock_rw_wrlock(&z->lock); + return 1; +} + +void auth_zone_verify_zonemd(struct auth_zone* z, struct module_env* env, + struct module_stack* mods, char** result, int offline, int only_online) +{ + char* reason = NULL, *why_bogus = NULL; + struct trust_anchor* anchor = NULL; + struct ub_packed_rrset_key* dnskey = NULL; + struct ub_packed_rrset_key keystorage; + int is_insecure = 0; + /* verify the ZONEMD if present. + * If not present check if absence is allowed by DNSSEC */ + if(!z->zonemd_check) + return; + if(z->data.count == 0) + return; /* no data */ + + /* if zone is under a trustanchor */ + /* is it equal to trustanchor - get dnskey's verified */ + /* else, find chain of trust by fetching DNSKEYs lookup for zone */ + /* result if that, if insecure, means no DNSSEC for the ZONEMD, + * otherwise we have the zone DNSKEY for the DNSSEC verification. */ + if(env->anchors) + anchor = anchors_lookup(env->anchors, z->name, z->namelen, + z->dclass); + if(anchor && anchor->numDS == 0 && anchor->numDNSKEY == 0) { + /* domain-insecure trust anchor for unsigned zones */ + lock_basic_unlock(&anchor->lock); + if(only_online) + return; + dnskey = NULL; + is_insecure = 1; + } else if(anchor && query_dname_compare(z->name, anchor->name) == 0) { + if(only_online) { + lock_basic_unlock(&anchor->lock); + return; + } + /* equal to trustanchor, no need for online lookups */ + dnskey = zonemd_get_dnskey_from_anchor(z, env, mods, anchor, + &is_insecure, &why_bogus, &keystorage); + lock_basic_unlock(&anchor->lock); + if(!dnskey && !reason && !is_insecure) { + reason = "verify DNSKEY RRset with trust anchor failed"; + } + } else if(anchor) { + lock_basic_unlock(&anchor->lock); + /* perform online lookups */ + if(offline) + return; + /* setup online lookups, and wait for them */ + if(zonemd_lookup_dnskey(z, env)) { + /* wait for the lookup */ + return; + } + reason = "could not lookup DNSKEY for chain of trust"; + } else { + /* the zone is not under a trust anchor */ + if(only_online) + return; + dnskey = NULL; + is_insecure = 1; + } + + if(reason) { + auth_zone_zonemd_fail(z, env, reason, why_bogus, result); + return; + } + + auth_zone_verify_zonemd_with_key(z, env, mods, dnskey, is_insecure, + result, NULL); + regional_free_all(env->scratch); +} + +void auth_zones_pickup_zonemd_verify(struct auth_zones* az, + struct module_env* env) +{ + struct auth_zone key; + uint8_t savezname[255+1]; + size_t savezname_len; + struct auth_zone* z; + key.node.key = &key; + lock_rw_rdlock(&az->lock); + RBTREE_FOR(z, struct auth_zone*, &az->ztree) { + lock_rw_wrlock(&z->lock); + if(!z->zonemd_check) { + lock_rw_unlock(&z->lock); + continue; + } + key.dclass = z->dclass; + key.namelabs = z->namelabs; + if(z->namelen > sizeof(savezname)) { + lock_rw_unlock(&z->lock); + log_err("auth_zones_pickup_zonemd_verify: zone name too long"); + continue; + } + savezname_len = z->namelen; + memmove(savezname, z->name, z->namelen); + lock_rw_unlock(&az->lock); + auth_zone_verify_zonemd(z, env, &env->mesh->mods, NULL, 0, 1); + lock_rw_unlock(&z->lock); + lock_rw_rdlock(&az->lock); + /* find the zone we had before, it is not deleted, + * because we have a flag for that that is processed at + * apply_cfg time */ + key.namelen = savezname_len; + key.name = savezname; + z = (struct auth_zone*)rbtree_search(&az->ztree, &key); + if(!z) + break; + } + lock_rw_unlock(&az->lock); +} diff --git a/contrib/unbound/services/authzone.h b/contrib/unbound/services/authzone.h index 3d94f30d6202..07614ed82963 100644 --- a/contrib/unbound/services/authzone.h +++ b/contrib/unbound/services/authzone.h @@ -132,8 +132,19 @@ struct auth_zone { /** for upstream: this zone answers queries that unbound intends to * send upstream. */ int for_upstream; + /** check ZONEMD records */ + int zonemd_check; + /** reject absence of ZONEMD records */ + int zonemd_reject_absence; /** RPZ zones */ struct rpz* rpz; + /** store the env (worker thread specific) for the zonemd callbacks + * from the mesh with the results of the lookup, if nonNULL, some + * worker has already picked up the zonemd verification task and + * this worker does not have to do it as well. */ + struct module_env* zonemd_callback_env; + /** for the zonemd callback, the type of data looked up */ + uint16_t zonemd_callback_qtype; /** zone has been deleted */ int zone_deleted; /** deletelist pointer, unused normally except during delete */ @@ -474,10 +485,13 @@ struct auth_zones* auth_zones_create(void); * @param cfg: config to apply. * @param setup: if true, also sets up values in the auth zones structure * @param is_rpz: set to 1 if at least one RPZ zone is configured. + * @param env: environment for offline verification. + * @param mods: modules in environment. * @return false on failure. */ int auth_zones_apply_cfg(struct auth_zones* az, struct config_file* cfg, - int setup, int* is_rpz); + int setup, int* is_rpz, struct module_env* env, + struct module_stack* mods); /** initial pick up of worker timeouts, ties events to worker event loop * @param az: auth zones structure @@ -622,9 +636,15 @@ int auth_zones_startprobesequence(struct auth_zones* az, /** read auth zone from zonefile. caller must lock zone. false on failure */ int auth_zone_read_zonefile(struct auth_zone* z, struct config_file* cfg); +/** find the apex SOA RRset, if it exists. NULL if no SOA RRset. */ +struct auth_rrset* auth_zone_get_soa_rrset(struct auth_zone* z); + /** find serial number of zone or false if none (no SOA record) */ int auth_zone_get_serial(struct auth_zone* z, uint32_t* serial); +/** Find auth_zone SOA and populate the values in xfr(soa values). */ +int xfr_find_soa(struct auth_zone* z, struct auth_xfer* xfr); + /** compare auth_zones for sorted rbtree */ int auth_zone_cmp(const void* z1, const void* z2); @@ -685,4 +705,86 @@ void auth_xfer_transfer_lookup_callback(void* arg, int rcode, */ int compare_serial(uint32_t a, uint32_t b); +/** + * Generate ZONEMD digest for the auth zone. + * @param z: the auth zone to digest. + * omits zonemd at apex and its RRSIG from the digest. + * @param scheme: the collation scheme to use. Numbers as defined for ZONEMD. + * @param hashalgo: the hash algo, from the registry defined for ZONEMD type. + * @param hash: the result buffer. + * @param buflen: size of the result buffer, must be large enough. or the + * routine fails. + * @param resultlen: size of the hash in the result buffer of the result. + * @param region: temp region for allocs during canonicalisation. + * @param buf: temp buffer during canonicalisation. + * @param reason: failure reason, returns a string, NULL on success. + * @return false on failure. + */ +int auth_zone_generate_zonemd_hash(struct auth_zone* z, int scheme, + int hashalgo, uint8_t* hash, size_t buflen, size_t* resultlen, + struct regional* region, struct sldns_buffer* buf, char** reason); + +/** ZONEMD scheme definitions */ +#define ZONEMD_SCHEME_SIMPLE 1 + +/** ZONEMD hash algorithm definition for SHA384 */ +#define ZONEMD_ALGO_SHA384 1 +/** ZONEMD hash algorithm definition for SHA512 */ +#define ZONEMD_ALGO_SHA512 2 + +/** returns true if a zonemd hash algo is supported */ +int zonemd_hashalgo_supported(int hashalgo); +/** returns true if a zonemd scheme is supported */ +int zonemd_scheme_supported(int scheme); + +/** + * Check ZONEMD digest for the auth zone. + * @param z: auth zone to digest. + * @param scheme: zonemd scheme. + * @param hashalgo: zonemd hash algorithm. + * @param hash: the hash to check. + * @param hashlen: length of hash buffer. + * @param region: temp region for allocs during canonicalisation. + * @param buf: temp buffer during canonicalisation. + * @param reason: string returned with failure reason. + * If the hash cannot be checked, but it is allowed, for unknown + * algorithms, the routine returns success, and the reason is nonNULL, + * with the allowance reason. + * @return false on failure. + */ +int auth_zone_generate_zonemd_check(struct auth_zone* z, int scheme, + int hashalgo, uint8_t* hash, size_t hashlen, struct regional* region, + struct sldns_buffer* buf, char** reason); + +/** + * Perform ZONEMD checks and verification for the auth zone. + * This includes DNSSEC verification if applicable. + * @param z: auth zone to check. Caller holds lock. wrlock. + * @param env: with temp region, buffer and config. + * @param mods: module stack for validator env. + * @param result: if not NULL, result string strdupped in here. + * @param offline: if true, there is no spawned lookup when online is needed. + * Those zones are skipped for ZONEMD checking. + * @param only_online: if true, only for ZONEMD that need online lookup + * of DNSKEY chain of trust are processed. + */ +void auth_zone_verify_zonemd(struct auth_zone* z, struct module_env* env, + struct module_stack* mods, char** result, int offline, + int only_online); + +/** mesh callback for zonemd on lookup of dnskey */ +void auth_zonemd_dnskey_lookup_callback(void* arg, int rcode, + struct sldns_buffer* buf, enum sec_status sec, char* why_bogus, + int was_ratelimited); + +/** + * Check the ZONEMD records that need online DNSSEC chain lookups, + * for them spawn the lookup process to get it checked out. + * Attaches the lookup process to the worker event base and mesh state. + * @param az: auth zones, every zones is checked. + * @param env: env of the worker where the task is attached. + */ +void auth_zones_pickup_zonemd_verify(struct auth_zones* az, + struct module_env* env); + #endif /* SERVICES_AUTHZONE_H */ diff --git a/contrib/unbound/services/cache/dns.c b/contrib/unbound/services/cache/dns.c index f3149b614b54..632ed79ace49 100644 --- a/contrib/unbound/services/cache/dns.c +++ b/contrib/unbound/services/cache/dns.c @@ -68,21 +68,27 @@ * in a prefetch situation to be updated (without becoming sticky). * @param qrep: update rrsets here if cache is better * @param region: for qrep allocs. + * @param qstarttime: time when delegations were looked up, this is perhaps + * earlier than the time in now. The time is used to determine if RRsets + * of type NS have expired, so that they can only be updated using + * lookups of delegation points that did not use them, since they had + * expired then. */ static void store_rrsets(struct module_env* env, struct reply_info* rep, time_t now, time_t leeway, int pside, struct reply_info* qrep, - struct regional* region) + struct regional* region, time_t qstarttime) { size_t i; + time_t ttl, min_ttl = rep->ttl; /* see if rrset already exists in cache, if not insert it. */ for(i=0; i<rep->rrset_count; i++) { rep->ref[i].key = rep->rrsets[i]; rep->ref[i].id = rep->rrsets[i]->id; /* update ref if it was in the cache */ switch(rrset_cache_update(env->rrset_cache, &rep->ref[i], - env->alloc, now + ((ntohs(rep->ref[i].key->rk.type)== - LDNS_RR_TYPE_NS && !pside)?0:leeway))) { + env->alloc, ((ntohs(rep->ref[i].key->rk.type)== + LDNS_RR_TYPE_NS && !pside)?qstarttime:now + leeway))) { case 0: /* ref unchanged, item inserted */ break; case 2: /* ref updated, cache is superior */ @@ -107,6 +113,15 @@ store_rrsets(struct module_env* env, struct reply_info* rep, time_t now, case 1: /* ref updated, item inserted */ rep->rrsets[i] = rep->ref[i].key; } + /* if ref was updated make sure the message ttl is updated to + * the minimum of the current rrsets. */ + ttl = ((struct packed_rrset_data*)rep->rrsets[i]->entry.data)->ttl; + if(ttl < min_ttl) min_ttl = ttl; + } + if(min_ttl < rep->ttl) { + rep->ttl = min_ttl; + rep->prefetch_ttl = PREFETCH_TTL_CALC(rep->ttl); + rep->serve_expired_ttl = rep->ttl + SERVE_EXPIRED_TTL; } } @@ -127,35 +142,11 @@ msg_cache_remove(struct module_env* env, uint8_t* qname, size_t qnamelen, slabhash_remove(env->msg_cache, h, &k); } -/** remove servfail msg cache entry */ -static void -msg_del_servfail(struct module_env* env, struct query_info* qinfo, - uint32_t flags) -{ - struct msgreply_entry* e; - /* see if the entry is servfail, and then remove it, so that - * lookups move from the cacheresponse stage to the recursionresponse - * stage */ - e = msg_cache_lookup(env, qinfo->qname, qinfo->qname_len, - qinfo->qtype, qinfo->qclass, flags, 0, 0); - if(!e) return; - /* we don't check for the ttl here, also expired servfail entries - * are removed. If the user uses serve-expired, they would still be - * used to answer from cache */ - if(FLAGS_GET_RCODE(((struct reply_info*)e->entry.data)->flags) - != LDNS_RCODE_SERVFAIL) { - lock_rw_unlock(&e->entry.lock); - return; - } - lock_rw_unlock(&e->entry.lock); - msg_cache_remove(env, qinfo->qname, qinfo->qname_len, qinfo->qtype, - qinfo->qclass, flags); -} - void dns_cache_store_msg(struct module_env* env, struct query_info* qinfo, hashvalue_type hash, struct reply_info* rep, time_t leeway, int pside, - struct reply_info* qrep, uint32_t flags, struct regional* region) + struct reply_info* qrep, uint32_t flags, struct regional* region, + time_t qstarttime) { struct msgreply_entry* e; time_t ttl = rep->ttl; @@ -170,18 +161,26 @@ dns_cache_store_msg(struct module_env* env, struct query_info* qinfo, /* there was a reply_info_sortref(rep) here but it seems to be * unnecessary, because the cache gets locked per rrset. */ reply_info_set_ttls(rep, *env->now); - store_rrsets(env, rep, *env->now, leeway, pside, qrep, region); + store_rrsets(env, rep, *env->now, leeway, pside, qrep, region, + qstarttime); if(ttl == 0 && !(flags & DNSCACHE_STORE_ZEROTTL)) { /* we do not store the message, but we did store the RRs, * which could be useful for delegation information */ verbose(VERB_ALGO, "TTL 0: dropped msg from cache"); - free(rep); - /* if the message is SERVFAIL in cache, remove that SERVFAIL, + reply_info_delete(rep, NULL); + /* if the message is in the cache, remove that msg, * so that the TTL 0 response can be returned for future - * responses (i.e. don't get answered by the servfail from + * responses (i.e. don't get answered from * cache, but instead go to recursion to get this TTL0 - * response). */ - msg_del_servfail(env, qinfo, flags); + * response). + * Possible messages that could be in the cache: + * - SERVFAIL + * - NXDOMAIN + * - NODATA + * - an older record that is expired + * - an older record that did not yet expire */ + msg_cache_remove(env, qinfo->qname, qinfo->qname_len, + qinfo->qtype, qinfo->qclass, flags); return; } @@ -197,7 +196,8 @@ dns_cache_store_msg(struct module_env* env, struct query_info* qinfo, /** find closest NS or DNAME and returns the rrset (locked) */ static struct ub_packed_rrset_key* find_closest_of_type(struct module_env* env, uint8_t* qname, size_t qnamelen, - uint16_t qclass, time_t now, uint16_t searchtype, int stripfront) + uint16_t qclass, time_t now, uint16_t searchtype, int stripfront, + int noexpiredabove, uint8_t* expiretop, size_t expiretoplen) { struct ub_packed_rrset_key *rrset; uint8_t lablen; @@ -212,8 +212,40 @@ find_closest_of_type(struct module_env* env, uint8_t* qname, size_t qnamelen, /* snip off front part of qname until the type is found */ while(qnamelen > 0) { if((rrset = rrset_cache_lookup(env->rrset_cache, qname, - qnamelen, searchtype, qclass, 0, now, 0))) - return rrset; + qnamelen, searchtype, qclass, 0, now, 0))) { + uint8_t* origqname = qname; + size_t origqnamelen = qnamelen; + if(!noexpiredabove) + return rrset; + /* if expiretop set, do not look above it, but + * qname is equal, so the just found result is also + * the nonexpired above part. */ + if(expiretop && qnamelen == expiretoplen && + query_dname_compare(qname, expiretop)==0) + return rrset; + /* check for expiry, but we have to let go of the rrset + * for the lock ordering */ + lock_rw_unlock(&rrset->entry.lock); + /* the rrset_cache_expired_above function always takes + * off one label (if qnamelen>0) and returns the final + * qname where it searched, so we can continue from + * there turning the O N*N search into O N. */ + if(!rrset_cache_expired_above(env->rrset_cache, &qname, + &qnamelen, searchtype, qclass, now, expiretop, + expiretoplen)) { + /* we want to return rrset, but it may be + * gone from cache, if so, just loop like + * it was not in the cache in the first place. + */ + if((rrset = rrset_cache_lookup(env-> + rrset_cache, origqname, origqnamelen, + searchtype, qclass, 0, now, 0))) { + return rrset; + } + } + log_nametypeclass(VERB_ALGO, "ignoring rrset because expired rrsets exist above it", origqname, searchtype, qclass); + continue; + } /* snip off front label */ lablen = *qname; @@ -324,6 +356,9 @@ cache_fill_missing(struct module_env* env, uint16_t qclass, struct ub_packed_rrset_key* akey; time_t now = *env->now; for(ns = dp->nslist; ns; ns = ns->next) { + if(ns->cache_lookup_count > ITERATOR_NAME_CACHELOOKUP_MAX) + continue; + ns->cache_lookup_count++; akey = rrset_cache_lookup(env->rrset_cache, ns->name, ns->namelen, LDNS_RR_TYPE_A, qclass, 0, now, 0); if(akey) { @@ -428,6 +463,7 @@ dns_msg_create(uint8_t* qname, size_t qnamelen, uint16_t qtype, return NULL; /* integer overflow protection */ msg->rep->flags = BIT_QR; /* with QR, no AA */ msg->rep->qdcount = 1; + msg->rep->reason_bogus = LDNS_EDE_NONE; msg->rep->rrsets = (struct ub_packed_rrset_key**) regional_alloc(region, capacity*sizeof(struct ub_packed_rrset_key*)); @@ -461,7 +497,8 @@ dns_msg_ansadd(struct dns_msg* msg, struct regional* region, struct delegpt* dns_cache_find_delegation(struct module_env* env, uint8_t* qname, size_t qnamelen, uint16_t qtype, uint16_t qclass, - struct regional* region, struct dns_msg** msg, time_t now) + struct regional* region, struct dns_msg** msg, time_t now, + int noexpiredabove, uint8_t* expiretop, size_t expiretoplen) { /* try to find closest NS rrset */ struct ub_packed_rrset_key* nskey; @@ -469,7 +506,7 @@ dns_cache_find_delegation(struct module_env* env, uint8_t* qname, struct delegpt* dp; nskey = find_closest_of_type(env, qname, qnamelen, qclass, now, - LDNS_RR_TYPE_NS, 0); + LDNS_RR_TYPE_NS, 0, noexpiredabove, expiretop, expiretoplen); if(!nskey) /* hope the caller has hints to prime or something */ return NULL; nsdata = (struct packed_rrset_data*)nskey->entry.data; @@ -524,6 +561,8 @@ gen_dns_msg(struct regional* region, struct query_info* q, size_t num) sizeof(struct reply_info) - sizeof(struct rrset_ref)); if(!msg->rep) return NULL; + msg->rep->reason_bogus = LDNS_EDE_NONE; + msg->rep->reason_bogus_str = NULL; if(num > RR_COUNT_MAX) return NULL; /* integer overflow protection */ msg->rep->rrsets = (struct ub_packed_rrset_key**) @@ -550,6 +589,14 @@ tomsg(struct module_env* env, struct query_info* q, struct reply_info* r, r->serve_expired_ttl < now) { return NULL; } + /* Ignore expired failure answers */ + if(FLAGS_GET_RCODE(r->flags) != + LDNS_RCODE_NOERROR && + FLAGS_GET_RCODE(r->flags) != + LDNS_RCODE_NXDOMAIN && + FLAGS_GET_RCODE(r->flags) != + LDNS_RCODE_YXDOMAIN) + return 0; } else { return NULL; } @@ -577,6 +624,11 @@ tomsg(struct module_env* env, struct query_info* q, struct reply_info* r, msg->rep->ar_numrrsets = r->ar_numrrsets; msg->rep->rrset_count = r->rrset_count; msg->rep->authoritative = r->authoritative; + msg->rep->reason_bogus = r->reason_bogus; + if(r->reason_bogus_str) { + msg->rep->reason_bogus_str = regional_strdup(region, r->reason_bogus_str); + } + if(!rrset_array_lock(r->ref, r->rrset_count, now_control)) { return NULL; } @@ -608,6 +660,28 @@ tomsg(struct module_env* env, struct query_info* q, struct reply_info* r, return msg; } +struct dns_msg* +dns_msg_deepcopy_region(struct dns_msg* origin, struct regional* region) +{ + size_t i; + struct dns_msg* res = NULL; + res = gen_dns_msg(region, &origin->qinfo, origin->rep->rrset_count); + if(!res) return NULL; + *res->rep = *origin->rep; + if(origin->rep->reason_bogus_str) { + res->rep->reason_bogus_str = regional_strdup(region, + origin->rep->reason_bogus_str); + } + for(i=0; i<res->rep->rrset_count; i++) { + res->rep->rrsets[i] = packed_rrset_copy_region( + origin->rep->rrsets[i], region, 0); + if(!res->rep->rrsets[i]) { + return NULL; + } + } + return res; +} + /** synthesize RRset-only response from cached RRset item */ static struct dns_msg* rrset_msg(struct ub_packed_rrset_key* rrset, struct regional* region, @@ -632,6 +706,7 @@ rrset_msg(struct ub_packed_rrset_key* rrset, struct regional* region, msg->rep->ns_numrrsets = 0; msg->rep->ar_numrrsets = 0; msg->rep->rrset_count = 1; + msg->rep->reason_bogus = LDNS_EDE_NONE; msg->rep->rrsets[0] = packed_rrset_copy_region(rrset, region, now); if(!msg->rep->rrsets[0]) /* copy CNAME */ return NULL; @@ -670,6 +745,7 @@ synth_dname_msg(struct ub_packed_rrset_key* rrset, struct regional* region, msg->rep->ns_numrrsets = 0; msg->rep->ar_numrrsets = 0; msg->rep->rrset_count = 1; + msg->rep->reason_bogus = LDNS_EDE_NONE; msg->rep->rrsets[0] = packed_rrset_copy_region(rrset, region, now); if(!msg->rep->rrsets[0]) /* copy DNAME */ return NULL; @@ -712,7 +788,7 @@ synth_dname_msg(struct ub_packed_rrset_key* rrset, struct regional* region, if(!newd) return NULL; ck->entry.data = newd; - newd->ttl = 0; /* 0 for synthesized CNAME TTL */ + newd->ttl = d->ttl - now; /* RFC6672: synth CNAME TTL == DNAME TTL */ newd->count = 1; newd->rrsig_count = 0; newd->trust = rrset_trust_ans_noAA; @@ -801,7 +877,7 @@ struct dns_msg* dns_cache_lookup(struct module_env* env, uint8_t* qname, size_t qnamelen, uint16_t qtype, uint16_t qclass, uint16_t flags, struct regional* region, struct regional* scratch, - int no_partial) + int no_partial, uint8_t* dpname, size_t dpnamelen) { struct lruhash_entry* e; struct query_info k; @@ -835,7 +911,7 @@ dns_cache_lookup(struct module_env* env, * consistent with the DNAME */ if(!no_partial && (rrset=find_closest_of_type(env, qname, qnamelen, qclass, now, - LDNS_RR_TYPE_DNAME, 1))) { + LDNS_RR_TYPE_DNAME, 1, 0, NULL, 0))) { /* synthesize a DNAME+CNAME message based on this */ enum sec_status sec_status = sec_status_unchecked; struct dns_msg* msg = synth_dname_msg(rrset, region, now, &k, @@ -923,6 +999,9 @@ dns_cache_lookup(struct module_env* env, * the same. We search upwards for NXDOMAINs. */ if(env->cfg->harden_below_nxdomain) { while(!dname_is_root(k.qname)) { + if(dpname && dpnamelen + && !dname_subdomain_c(k.qname, dpname)) + break; /* no synth nxdomain above the stub */ dname_remove_label(&k.qname, &k.qname_len); h = query_info_hash(&k, flags); e = slabhash_lookup(env->msg_cache, h, &k, 0); @@ -965,7 +1044,7 @@ dns_cache_lookup(struct module_env* env, int dns_cache_store(struct module_env* env, struct query_info* msgqinf, struct reply_info* msgrep, int is_referral, time_t leeway, int pside, - struct regional* region, uint32_t flags) + struct regional* region, uint32_t flags, time_t qstarttime) { struct reply_info* rep = NULL; /* alloc, malloc properly (not in region, like msg is) */ @@ -975,7 +1054,6 @@ dns_cache_store(struct module_env* env, struct query_info* msgqinf, /* ttl must be relative ;i.e. 0..86400 not time(0)+86400. * the env->now is added to message and RRsets in this routine. */ /* the leeway is used to invalidate other rrsets earlier */ - if(is_referral) { /* store rrsets */ struct rrset_ref ref; @@ -988,11 +1066,11 @@ dns_cache_store(struct module_env* env, struct query_info* msgqinf, /*ignore ret: it was in the cache, ref updated */ /* no leeway for typeNS */ (void)rrset_cache_update(env->rrset_cache, &ref, - env->alloc, *env->now + + env->alloc, ((ntohs(ref.key->rk.type)==LDNS_RR_TYPE_NS - && !pside) ? 0:leeway)); + && !pside) ? qstarttime:*env->now + leeway)); } - free(rep); + reply_info_delete(rep, NULL); return 1; } else { /* store msg, and rrsets */ @@ -1012,7 +1090,7 @@ dns_cache_store(struct module_env* env, struct query_info* msgqinf, rep->flags &= ~(BIT_AA | BIT_CD); h = query_info_hash(&qinf, (uint16_t)flags); dns_cache_store_msg(env, &qinf, h, rep, leeway, pside, msgrep, - flags, region); + flags, region, qstarttime); /* qname is used inside query_info_entrysetup, and set to * NULL. If it has not been used, free it. free(0) is safe. */ free(qinf.qname); diff --git a/contrib/unbound/services/cache/dns.h b/contrib/unbound/services/cache/dns.h index f1b77fb36c00..c2bf23c6de54 100644 --- a/contrib/unbound/services/cache/dns.h +++ b/contrib/unbound/services/cache/dns.h @@ -88,11 +88,13 @@ struct dns_msg { * @param flags: flags with BIT_CD for AAAA queries in dns64 translation. * The higher 16 bits are used internally to customize the cache policy. * (See DNSCACHE_STORE_xxx flags). + * @param qstarttime: time when the query was started, and thus when the + * delegations were looked up. * @return 0 on alloc error (out of memory). */ int dns_cache_store(struct module_env* env, struct query_info* qinf, struct reply_info* rep, int is_referral, time_t leeway, int pside, - struct regional* region, uint32_t flags); + struct regional* region, uint32_t flags, time_t qstarttime); /** * Store message in the cache. Stores in message cache and rrset cache. @@ -112,11 +114,14 @@ int dns_cache_store(struct module_env* env, struct query_info* qinf, * can be updated to full TTL even in prefetch situations. * @param qrep: message that can be altered with better rrs from cache. * @param flags: customization flags for the cache policy. + * @param qstarttime: time when the query was started, and thus when the + * delegations were looked up. * @param region: to allocate into for qmsg. */ void dns_cache_store_msg(struct module_env* env, struct query_info* qinfo, hashvalue_type hash, struct reply_info* rep, time_t leeway, int pside, - struct reply_info* qrep, uint32_t flags, struct regional* region); + struct reply_info* qrep, uint32_t flags, struct regional* region, + time_t qstarttime); /** * Find a delegation from the cache. @@ -129,11 +134,18 @@ void dns_cache_store_msg(struct module_env* env, struct query_info* qinfo, * @param msg: if not NULL, delegation message is returned here, synthesized * from the cache. * @param timenow: the time now, for checking if TTL on cache entries is OK. + * @param noexpiredabove: if set, no expired NS rrsets above the one found + * are tolerated. It only returns delegations where the delegations above + * it are valid. + * @param expiretop: if not NULL, name where check for expiry ends for + * noexpiredabove. + * @param expiretoplen: length of expiretop dname. * @return new delegation or NULL on error or if not found in cache. */ struct delegpt* dns_cache_find_delegation(struct module_env* env, uint8_t* qname, size_t qnamelen, uint16_t qtype, uint16_t qclass, - struct regional* region, struct dns_msg** msg, time_t timenow); + struct regional* region, struct dns_msg** msg, time_t timenow, + int noexpiredabove, uint8_t* expiretop, size_t expiretoplen); /** * generate dns_msg from cached message @@ -152,6 +164,15 @@ struct dns_msg* tomsg(struct module_env* env, struct query_info* q, struct reply_info* r, struct regional* region, time_t now, int allow_expired, struct regional* scratch); +/** + * Deep copy a dns_msg to a region. + * @param origin: the dns_msg to copy. + * @param region: the region to copy all the data to. + * @return the new dns_msg or NULL on malloc error. + */ +struct dns_msg* dns_msg_deepcopy_region(struct dns_msg* origin, + struct regional* region); + /** * Find cached message * @param env: module environment with the DNS cache. @@ -164,6 +185,8 @@ struct dns_msg* tomsg(struct module_env* env, struct query_info* q, * @param scratch: where to allocate temporary data. * @param no_partial: if true, only complete messages and not a partial * one (with only the start of the CNAME chain and not the rest). + * @param dpname: if not NULL, do not return NXDOMAIN above this name. + * @param dpnamelen: length of dpname. * @return new response message (alloced in region, rrsets do not have IDs). * or NULL on error or if not found in cache. * TTLs are made relative to the current time. @@ -171,7 +194,7 @@ struct dns_msg* tomsg(struct module_env* env, struct query_info* q, struct dns_msg* dns_cache_lookup(struct module_env* env, uint8_t* qname, size_t qnamelen, uint16_t qtype, uint16_t qclass, uint16_t flags, struct regional* region, struct regional* scratch, - int no_partial); + int no_partial, uint8_t* dpname, size_t dpnamelen); /** * find and add A and AAAA records for missing nameservers in delegpt diff --git a/contrib/unbound/services/cache/infra.c b/contrib/unbound/services/cache/infra.c index 2d16bcd6e405..457685ab5985 100644 --- a/contrib/unbound/services/cache/infra.c +++ b/contrib/unbound/services/cache/infra.c @@ -67,6 +67,11 @@ int infra_dp_ratelimit = 0; * in queries per second. */ int infra_ip_ratelimit = 0; +/** ratelimit value for client ip addresses, + * in queries per second. + * For clients with a valid DNS Cookie. */ +int infra_ip_ratelimit_cookie = 0; + size_t infra_sizefunc(void* k, void* ATTR_UNUSED(d)) { @@ -229,6 +234,81 @@ setup_domain_limits(struct infra_cache* infra, struct config_file* cfg) return 1; } +/** find or create element in wait limit netblock tree */ +static struct wait_limit_netblock_info* +wait_limit_netblock_findcreate(struct infra_cache* infra, char* str, + int cookie) +{ + rbtree_type* tree; + struct sockaddr_storage addr; + int net; + socklen_t addrlen; + struct wait_limit_netblock_info* d; + + if(!netblockstrtoaddr(str, 0, &addr, &addrlen, &net)) { + log_err("cannot parse wait limit netblock '%s'", str); + return 0; + } + + /* can we find it? */ + if(cookie) + tree = &infra->wait_limits_cookie_netblock; + else + tree = &infra->wait_limits_netblock; + d = (struct wait_limit_netblock_info*)addr_tree_find(tree, &addr, + addrlen, net); + if(d) + return d; + + /* create it */ + d = (struct wait_limit_netblock_info*)calloc(1, sizeof(*d)); + if(!d) + return NULL; + d->limit = -1; + if(!addr_tree_insert(tree, &d->node, &addr, addrlen, net)) { + log_err("duplicate element in domainlimit tree"); + free(d); + return NULL; + } + return d; +} + + +/** insert wait limit information into lookup tree */ +static int +infra_wait_limit_netblock_insert(struct infra_cache* infra, + struct config_file* cfg) +{ + struct config_str2list* p; + struct wait_limit_netblock_info* d; + for(p = cfg->wait_limit_netblock; p; p = p->next) { + d = wait_limit_netblock_findcreate(infra, p->str, 0); + if(!d) + return 0; + d->limit = atoi(p->str2); + } + for(p = cfg->wait_limit_cookie_netblock; p; p = p->next) { + d = wait_limit_netblock_findcreate(infra, p->str, 1); + if(!d) + return 0; + d->limit = atoi(p->str2); + } + return 1; +} + +/** setup wait limits tree (0 on failure) */ +static int +setup_wait_limits(struct infra_cache* infra, struct config_file* cfg) +{ + addr_tree_init(&infra->wait_limits_netblock); + addr_tree_init(&infra->wait_limits_cookie_netblock); + if(!infra_wait_limit_netblock_insert(infra, cfg)) + return 0; + addr_tree_init_parents(&infra->wait_limits_netblock); + addr_tree_init_parents(&infra->wait_limits_cookie_netblock); + return 1; +} + struct infra_cache* infra_create(struct config_file* cfg) { @@ -236,6 +316,9 @@ infra_create(struct config_file* cfg) sizeof(struct infra_cache)); size_t maxmem = cfg->infra_cache_numhosts * (sizeof(struct infra_key)+ sizeof(struct infra_data)+INFRA_BYTES_NAME); + if(!infra) { + return NULL; + } infra->hosts = slabhash_create(cfg->infra_cache_slabs, INFRA_HOST_STARTSIZE, maxmem, &infra_sizefunc, &infra_compfunc, &infra_delkeyfunc, &infra_deldatafunc, NULL); @@ -259,6 +342,10 @@ infra_create(struct config_file* cfg) infra_delete(infra); return NULL; } + if(!setup_wait_limits(infra, cfg)) { + infra_delete(infra); + return NULL; + } infra_ip_ratelimit = cfg->ip_ratelimit; infra->client_ip_rates = slabhash_create(cfg->ip_ratelimit_slabs, INFRA_HOST_STARTSIZE, cfg->ip_ratelimit_size, &ip_rate_sizefunc, @@ -279,6 +366,12 @@ static void domain_limit_free(rbnode_type* n, void* ATTR_UNUSED(arg)) } } +/** delete wait_limit_netblock_info entries */ +static void wait_limit_netblock_del(rbnode_type* n, void* ATTR_UNUSED(arg)) +{ + free(n); +} + void infra_delete(struct infra_cache* infra) { @@ -288,6 +381,10 @@ infra_delete(struct infra_cache* infra) slabhash_delete(infra->domain_rates); traverse_postorder(&infra->domain_limits, domain_limit_free, NULL); slabhash_delete(infra->client_ip_rates); + traverse_postorder(&infra->wait_limits_netblock, + wait_limit_netblock_del, NULL); + traverse_postorder(&infra->wait_limits_cookie_netblock, + wait_limit_netblock_del, NULL); free(infra); } @@ -718,13 +815,13 @@ infra_get_lame_rtt(struct infra_cache* infra, else *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000; } } + /* expired entry */ if(timenow > host->ttl) { - /* expired entry */ + /* see if this can be a re-probe of an unresponsive server */ /* minus 1000 because that is outside of the RTTBAND, so * blacklisted servers stay blacklisted if this is chosen */ - if(host->rtt.rto >= USEFUL_SERVER_TOP_TIMEOUT || - infra->infra_keep_probing) { + if(host->rtt.rto >= USEFUL_SERVER_TOP_TIMEOUT) { lock_rw_unlock(&e->lock); *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000; *lame = 0; @@ -831,14 +928,13 @@ static struct lruhash_entry* infra_find_ratedata(struct infra_cache* infra, /** find data item in array for ip addresses */ static struct lruhash_entry* infra_find_ip_ratedata(struct infra_cache* infra, - struct comm_reply* repinfo, int wr) + struct sockaddr_storage* addr, socklen_t addrlen, int wr) { struct ip_rate_key key; - hashvalue_type h = hash_addr(&(repinfo->addr), - repinfo->addrlen, 0); + hashvalue_type h = hash_addr(addr, addrlen, 0); memset(&key, 0, sizeof(key)); - key.addr = repinfo->addr; - key.addrlen = repinfo->addrlen; + key.addr = *addr; + key.addrlen = addrlen; key.entry.hash = h; return slabhash_lookup(infra->client_ip_rates, h, &key, wr); } @@ -873,10 +969,10 @@ static void infra_create_ratedata(struct infra_cache* infra, /** create rate data item for ip address */ static void infra_ip_create_ratedata(struct infra_cache* infra, - struct comm_reply* repinfo, time_t timenow) + struct sockaddr_storage* addr, socklen_t addrlen, time_t timenow, + int mesh_wait) { - hashvalue_type h = hash_addr(&(repinfo->addr), - repinfo->addrlen, 0); + hashvalue_type h = hash_addr(addr, addrlen, 0); struct ip_rate_key* k = (struct ip_rate_key*)calloc(1, sizeof(*k)); struct ip_rate_data* d = (struct ip_rate_data*)calloc(1, sizeof(*d)); if(!k || !d) { @@ -884,19 +980,21 @@ static void infra_ip_create_ratedata(struct infra_cache* infra, free(d); return; /* alloc failure */ } - k->addr = repinfo->addr; - k->addrlen = repinfo->addrlen; + k->addr = *addr; + k->addrlen = addrlen; lock_rw_init(&k->entry.lock); k->entry.hash = h; k->entry.key = k; k->entry.data = d; d->qps[0] = 1; d->timestamp[0] = timenow; + d->mesh_wait = mesh_wait; slabhash_insert(infra->client_ip_rates, h, &k->entry, d, NULL); } -/** find the second and return its rate counter, if none, remove oldest */ -static int* infra_rate_find_second(void* data, time_t t) +/** Find the second and return its rate counter. If none and should_add, remove + * oldest to accommodate. Else return none. */ +static int* infra_rate_find_second_or_none(void* data, time_t t, int should_add) { struct rate_data* d = (struct rate_data*)data; int i, oldest; @@ -904,6 +1002,7 @@ static int* infra_rate_find_second(void* data, time_t t) if(d->timestamp[i] == t) return &(d->qps[i]); } + if(!should_add) return NULL; /* remove oldest timestamp, and insert it at t with 0 qps */ oldest = 0; for(i=0; i<RATE_WINDOW; i++) { @@ -915,21 +1014,41 @@ static int* infra_rate_find_second(void* data, time_t t) return &(d->qps[oldest]); } -int infra_rate_max(void* data, time_t now) +/** find the second and return its rate counter, if none, remove oldest to + * accommodate */ +static int* infra_rate_give_second(void* data, time_t t) +{ + return infra_rate_find_second_or_none(data, t, 1); +} + +/** find the second and return its rate counter only if it exists. Caller + * should check for NULL return value */ +static int* infra_rate_get_second(void* data, time_t t) +{ + return infra_rate_find_second_or_none(data, t, 0); +} + +int infra_rate_max(void* data, time_t now, int backoff) { struct rate_data* d = (struct rate_data*)data; int i, max = 0; for(i=0; i<RATE_WINDOW; i++) { - if(now-d->timestamp[i] <= RATE_WINDOW) { - if(d->qps[i] > max) + if(backoff) { + if(now-d->timestamp[i] <= RATE_WINDOW && + d->qps[i] > max) { max = d->qps[i]; + } + } else { + if(now == d->timestamp[i]) { + return d->qps[i]; + } } } return max; } int infra_ratelimit_inc(struct infra_cache* infra, uint8_t* name, - size_t namelen, time_t timenow, struct query_info* qinfo, + size_t namelen, time_t timenow, int backoff, struct query_info* qinfo, struct comm_reply* replylist) { int lim, max; @@ -946,13 +1065,13 @@ int infra_ratelimit_inc(struct infra_cache* infra, uint8_t* name, /* find or insert ratedata */ entry = infra_find_ratedata(infra, name, namelen, 1); if(entry) { - int premax = infra_rate_max(entry->data, timenow); - int* cur = infra_rate_find_second(entry->data, timenow); + int premax = infra_rate_max(entry->data, timenow, backoff); + int* cur = infra_rate_give_second(entry->data, timenow); (*cur)++; - max = infra_rate_max(entry->data, timenow); + max = infra_rate_max(entry->data, timenow, backoff); lock_rw_unlock(&entry->lock); - if(premax < lim && max >= lim) { + if(premax <= lim && max > lim) { char buf[257], qnm[257], ts[12], cs[12], ip[128]; dname_str(name, buf); dname_str(qinfo->qname, qnm); @@ -960,19 +1079,19 @@ int infra_ratelimit_inc(struct infra_cache* infra, uint8_t* name, sldns_wire2str_class_buf(qinfo->qclass, cs, sizeof(cs)); ip[0]=0; if(replylist) { - addr_to_str((struct sockaddr_storage *)&replylist->addr, - replylist->addrlen, ip, sizeof(ip)); + addr_to_str((struct sockaddr_storage *)&replylist->remote_addr, + replylist->remote_addrlen, ip, sizeof(ip)); verbose(VERB_OPS, "ratelimit exceeded %s %d query %s %s %s from %s", buf, lim, qnm, cs, ts, ip); } else { verbose(VERB_OPS, "ratelimit exceeded %s %d query %s %s %s", buf, lim, qnm, cs, ts); } } - return (max < lim); + return (max <= lim); } /* create */ infra_create_ratedata(infra, name, namelen, timenow); - return (1 < lim); + return (1 <= lim); } void infra_ratelimit_dec(struct infra_cache* infra, uint8_t* name, @@ -984,14 +1103,19 @@ void infra_ratelimit_dec(struct infra_cache* infra, uint8_t* name, return; /* not enabled */ entry = infra_find_ratedata(infra, name, namelen, 1); if(!entry) return; /* not cached */ - cur = infra_rate_find_second(entry->data, timenow); + cur = infra_rate_get_second(entry->data, timenow); + if(cur == NULL) { + /* our timenow is not available anymore; nothing to decrease */ + lock_rw_unlock(&entry->lock); + return; + } if((*cur) > 0) (*cur)--; lock_rw_unlock(&entry->lock); } int infra_ratelimit_exceeded(struct infra_cache* infra, uint8_t* name, - size_t namelen, time_t timenow) + size_t namelen, time_t timenow, int backoff) { struct lruhash_entry* entry; int lim, max; @@ -1007,10 +1131,10 @@ int infra_ratelimit_exceeded(struct infra_cache* infra, uint8_t* name, entry = infra_find_ratedata(infra, name, namelen, 0); if(!entry) return 0; /* not cached */ - max = infra_rate_max(entry->data, timenow); + max = infra_rate_max(entry->data, timenow, backoff); lock_rw_unlock(&entry->lock); - return (max >= lim); + return (max > lim); } size_t @@ -1023,8 +1147,50 @@ infra_get_mem(struct infra_cache* infra) return s; } +/* Returns 1 if the limit has not been exceeded, 0 otherwise. */ +static int +check_ip_ratelimit(struct sockaddr_storage* addr, socklen_t addrlen, + struct sldns_buffer* buffer, int premax, int max, int has_cookie) +{ + int limit; + + if(has_cookie) limit = infra_ip_ratelimit_cookie; + else limit = infra_ip_ratelimit; + + /* Disabled */ + if(limit == 0) return 1; + + if(premax <= limit && max > limit) { + char client_ip[128], qnm[LDNS_MAX_DOMAINLEN+1+12+12]; + addr_to_str(addr, addrlen, client_ip, sizeof(client_ip)); + qnm[0]=0; + if(sldns_buffer_limit(buffer)>LDNS_HEADER_SIZE && + LDNS_QDCOUNT(sldns_buffer_begin(buffer))!=0) { + (void)sldns_wire2str_rrquestion_buf( + sldns_buffer_at(buffer, LDNS_HEADER_SIZE), + sldns_buffer_limit(buffer)-LDNS_HEADER_SIZE, + qnm, sizeof(qnm)); + if(strlen(qnm)>0 && qnm[strlen(qnm)-1]=='\n') + qnm[strlen(qnm)-1] = 0; /*remove newline*/ + if(strchr(qnm, '\t')) + *strchr(qnm, '\t') = ' '; + if(strchr(qnm, '\t')) + *strchr(qnm, '\t') = ' '; + verbose(VERB_OPS, "ip_ratelimit exceeded %s %d%s %s", + client_ip, limit, + has_cookie?"(cookie)":"", qnm); + } else { + verbose(VERB_OPS, "ip_ratelimit exceeded %s %d%s (no query name)", + client_ip, limit, + has_cookie?"(cookie)":""); + } + } + return (max <= limit); +} + int infra_ip_ratelimit_inc(struct infra_cache* infra, - struct comm_reply* repinfo, time_t timenow, struct sldns_buffer* buffer) + struct sockaddr_storage* addr, socklen_t addrlen, time_t timenow, + int has_cookie, int backoff, struct sldns_buffer* buffer) { int max; struct lruhash_entry* entry; @@ -1034,42 +1200,93 @@ int infra_ip_ratelimit_inc(struct infra_cache* infra, return 1; } /* find or insert ratedata */ - entry = infra_find_ip_ratedata(infra, repinfo, 1); + entry = infra_find_ip_ratedata(infra, addr, addrlen, 1); if(entry) { - int premax = infra_rate_max(entry->data, timenow); - int* cur = infra_rate_find_second(entry->data, timenow); + int premax = infra_rate_max(entry->data, timenow, backoff); + int* cur = infra_rate_give_second(entry->data, timenow); (*cur)++; - max = infra_rate_max(entry->data, timenow); + max = infra_rate_max(entry->data, timenow, backoff); lock_rw_unlock(&entry->lock); - - if(premax < infra_ip_ratelimit && max >= infra_ip_ratelimit) { - char client_ip[128], qnm[LDNS_MAX_DOMAINLEN+1+12+12]; - addr_to_str((struct sockaddr_storage *)&repinfo->addr, - repinfo->addrlen, client_ip, sizeof(client_ip)); - qnm[0]=0; - if(sldns_buffer_limit(buffer)>LDNS_HEADER_SIZE && - LDNS_QDCOUNT(sldns_buffer_begin(buffer))!=0) { - (void)sldns_wire2str_rrquestion_buf( - sldns_buffer_at(buffer, LDNS_HEADER_SIZE), - sldns_buffer_limit(buffer)-LDNS_HEADER_SIZE, - qnm, sizeof(qnm)); - if(strlen(qnm)>0 && qnm[strlen(qnm)-1]=='\n') - qnm[strlen(qnm)-1] = 0; /*remove newline*/ - if(strchr(qnm, '\t')) - *strchr(qnm, '\t') = ' '; - if(strchr(qnm, '\t')) - *strchr(qnm, '\t') = ' '; - verbose(VERB_OPS, "ip_ratelimit exceeded %s %d %s", - client_ip, infra_ip_ratelimit, qnm); - } else { - verbose(VERB_OPS, "ip_ratelimit exceeded %s %d (no query name)", - client_ip, infra_ip_ratelimit); - } - } - return (max <= infra_ip_ratelimit); + return check_ip_ratelimit(addr, addrlen, buffer, premax, max, + has_cookie); } /* create */ - infra_ip_create_ratedata(infra, repinfo, timenow); + infra_ip_create_ratedata(infra, addr, addrlen, timenow, 0); + return 1; +} + +int infra_wait_limit_allowed(struct infra_cache* infra, struct comm_reply* rep, + int cookie_valid, struct config_file* cfg) +{ + struct lruhash_entry* entry; + if(cfg->wait_limit == 0) + return 1; + + entry = infra_find_ip_ratedata(infra, &rep->client_addr, + rep->client_addrlen, 0); + if(entry) { + rbtree_type* tree; + struct wait_limit_netblock_info* w; + struct rate_data* d = (struct rate_data*)entry->data; + int mesh_wait = d->mesh_wait; + lock_rw_unlock(&entry->lock); + + /* have the wait amount, check how much is allowed */ + if(cookie_valid) + tree = &infra->wait_limits_cookie_netblock; + else tree = &infra->wait_limits_netblock; + w = (struct wait_limit_netblock_info*)addr_tree_lookup(tree, + &rep->client_addr, rep->client_addrlen); + if(w) { + if(w->limit != -1 && mesh_wait > w->limit) + return 0; + } else { + /* if there is no IP netblock specific information, + * use the configured value. */ + if(mesh_wait > (cookie_valid?cfg->wait_limit_cookie: + cfg->wait_limit)) + return 0; + } + } return 1; } + +void infra_wait_limit_inc(struct infra_cache* infra, struct comm_reply* rep, + time_t timenow, struct config_file* cfg) +{ + struct lruhash_entry* entry; + if(cfg->wait_limit == 0) + return; + + /* Find it */ + entry = infra_find_ip_ratedata(infra, &rep->client_addr, + rep->client_addrlen, 1); + if(entry) { + struct rate_data* d = (struct rate_data*)entry->data; + d->mesh_wait++; + lock_rw_unlock(&entry->lock); + return; + } + + /* Create it */ + infra_ip_create_ratedata(infra, &rep->client_addr, + rep->client_addrlen, timenow, 1); +} + +void infra_wait_limit_dec(struct infra_cache* infra, struct comm_reply* rep, + struct config_file* cfg) +{ + struct lruhash_entry* entry; + if(cfg->wait_limit == 0) + return; + + entry = infra_find_ip_ratedata(infra, &rep->client_addr, + rep->client_addrlen, 1); + if(entry) { + struct rate_data* d = (struct rate_data*)entry->data; + if(d->mesh_wait > 0) + d->mesh_wait--; + lock_rw_unlock(&entry->lock); + } +} diff --git a/contrib/unbound/services/cache/infra.h b/contrib/unbound/services/cache/infra.h index 14f97c4c64d3..ee6f384de345 100644 --- a/contrib/unbound/services/cache/infra.h +++ b/contrib/unbound/services/cache/infra.h @@ -122,6 +122,10 @@ struct infra_cache { rbtree_type domain_limits; /** hash table with query rates per client ip: ip_rate_key, ip_rate_data */ struct slabhash* client_ip_rates; + /** tree of addr_tree_node, with wait_limit_netblock_info information */ + rbtree_type wait_limits_netblock; + /** tree of addr_tree_node, with wait_limit_netblock_info information */ + rbtree_type wait_limits_cookie_netblock; }; /** ratelimit, unless overridden by domain_limits, 0 is off */ @@ -153,6 +157,8 @@ struct rate_key { /** ip ratelimit, 0 is off */ extern int infra_ip_ratelimit; +/** ip ratelimit for DNS Cookie clients, 0 is off */ +extern int infra_ip_ratelimit_cookie; /** * key for ip_ratelimit lookups, a source IP. @@ -182,10 +188,22 @@ struct rate_data { /** what the timestamp is of the qps array members, counter is * valid for that timestamp. Usually now and now-1. */ time_t timestamp[RATE_WINDOW]; + /** the number of queries waiting in the mesh */ + int mesh_wait; }; #define ip_rate_data rate_data +/** + * Data to store the configuration per netblock for the wait limit + */ +struct wait_limit_netblock_info { + /** The addr tree node, this must be first. */ + struct addr_tree_node node; + /** the limit on the amount */ + int limit; +}; + /** infra host cache default hash lookup size */ #define INFRA_HOST_STARTSIZE 32 /** bytes per zonename reserved in the hostcache, dnamelen(zonename.com.) */ @@ -368,6 +386,7 @@ long long infra_get_host_rto(struct infra_cache* infra, * @param name: zone name * @param namelen: zone name length * @param timenow: what time it is now. + * @param backoff: if backoff is enabled. * @param qinfo: for logging, query name. * @param replylist: for logging, querier's address (if any). * @return 1 if it could be incremented. 0 if the increment overshot the @@ -375,7 +394,7 @@ long long infra_get_host_rto(struct infra_cache* infra, * Failures like alloc failures are not returned (probably as 1). */ int infra_ratelimit_inc(struct infra_cache* infra, uint8_t* name, - size_t namelen, time_t timenow, struct query_info* qinfo, + size_t namelen, time_t timenow, int backoff, struct query_info* qinfo, struct comm_reply* replylist); /** @@ -398,13 +417,15 @@ void infra_ratelimit_dec(struct infra_cache* infra, uint8_t* name, * @param name: zone name * @param namelen: zone name length * @param timenow: what time it is now. + * @param backoff: if backoff is enabled. * @return true if exceeded. */ int infra_ratelimit_exceeded(struct infra_cache* infra, uint8_t* name, - size_t namelen, time_t timenow); + size_t namelen, time_t timenow, int backoff); -/** find the maximum rate stored, not too old. 0 if no information. */ -int infra_rate_max(void* data, time_t now); +/** find the maximum rate stored. 0 if no information. + * When backoff is enabled look for the maximum in the whole RATE_WINDOW. */ +int infra_rate_max(void* data, time_t now, int backoff); /** find the ratelimit in qps for a domain. 0 if no limit for domain. */ int infra_find_ratelimit(struct infra_cache* infra, uint8_t* name, @@ -413,14 +434,17 @@ int infra_find_ratelimit(struct infra_cache* infra, uint8_t* name, /** Update query ratelimit hash and decide * whether or not a query should be dropped. * @param infra: infra cache - * @param repinfo: information about client + * @param addr: client address + * @param addrlen: client address length * @param timenow: what time it is now. + * @param has_cookie: if the request came with a DNS Cookie. + * @param backoff: if backoff is enabled. * @param buffer: with query for logging. * @return 1 if it could be incremented. 0 if the increment overshot the * ratelimit and the query should be dropped. */ int infra_ip_ratelimit_inc(struct infra_cache* infra, - struct comm_reply* repinfo, time_t timenow, - struct sldns_buffer* buffer); + struct sockaddr_storage* addr, socklen_t addrlen, time_t timenow, + int has_cookie, int backoff, struct sldns_buffer* buffer); /** * Get memory used by the infra cache. @@ -466,4 +490,16 @@ void ip_rate_delkeyfunc(void* d, void* arg); /* delete data */ #define ip_rate_deldatafunc rate_deldatafunc +/** See if the IP address can have another reply in the wait limit */ +int infra_wait_limit_allowed(struct infra_cache* infra, struct comm_reply* rep, + int cookie_valid, struct config_file* cfg); + +/** Increment number of waiting replies for IP */ +void infra_wait_limit_inc(struct infra_cache* infra, struct comm_reply* rep, + time_t timenow, struct config_file* cfg); + +/** Decrement number of waiting replies for IP */ +void infra_wait_limit_dec(struct infra_cache* infra, struct comm_reply* rep, + struct config_file* cfg); + #endif /* SERVICES_CACHE_INFRA_H */ diff --git a/contrib/unbound/services/cache/rrset.c b/contrib/unbound/services/cache/rrset.c index 4e3d08bdaaf5..2c03214c8fe2 100644 --- a/contrib/unbound/services/cache/rrset.c +++ b/contrib/unbound/services/cache/rrset.c @@ -46,6 +46,7 @@ #include "util/data/packed_rrset.h" #include "util/data/msgreply.h" #include "util/data/msgparse.h" +#include "util/data/dname.h" #include "util/regional.h" #include "util/alloc.h" #include "util/net_help.h" @@ -127,6 +128,9 @@ need_to_update_rrset(void* nd, void* cd, time_t timenow, int equal, int ns) { struct packed_rrset_data* newd = (struct packed_rrset_data*)nd; struct packed_rrset_data* cached = (struct packed_rrset_data*)cd; + /* o if new data is expired, current data is better */ + if( newd->ttl < timenow && cached->ttl >= timenow) + return 0; /* o store if rrset has been validated * everything better than bogus data * secure is preferred */ @@ -440,6 +444,89 @@ rrset_check_sec_status(struct rrset_cache* r, lock_rw_unlock(&e->lock); } +void +rrset_cache_remove_above(struct rrset_cache* r, uint8_t** qname, size_t* + qnamelen, uint16_t searchtype, uint16_t qclass, time_t now, uint8_t* + qnametop, size_t qnametoplen) +{ + struct ub_packed_rrset_key *rrset; + uint8_t lablen; + + while(*qnamelen > 0) { + /* look one label higher */ + lablen = **qname; + *qname += lablen + 1; + *qnamelen -= lablen + 1; + if(*qnamelen <= 0) + return; + + /* stop at qnametop */ + if(qnametop && *qnamelen == qnametoplen && + query_dname_compare(*qname, qnametop)==0) + return; + + if(verbosity >= VERB_ALGO) { + /* looks up with a time of 0, to see expired entries */ + if((rrset = rrset_cache_lookup(r, *qname, + *qnamelen, searchtype, qclass, 0, 0, 0))) { + struct packed_rrset_data* data = + (struct packed_rrset_data*)rrset->entry.data; + int expired = (now > data->ttl); + lock_rw_unlock(&rrset->entry.lock); + if(expired) + log_nametypeclass(verbosity, "this " + "(grand)parent rrset will be " + "removed (expired)", + *qname, searchtype, qclass); + else log_nametypeclass(verbosity, "this " + "(grand)parent rrset will be " + "removed", + *qname, searchtype, qclass); + } + } + rrset_cache_remove(r, *qname, *qnamelen, searchtype, qclass, 0); + } +} + +int +rrset_cache_expired_above(struct rrset_cache* r, uint8_t** qname, size_t* + qnamelen, uint16_t searchtype, uint16_t qclass, time_t now, uint8_t* + qnametop, size_t qnametoplen) +{ + struct ub_packed_rrset_key *rrset; + uint8_t lablen; + + while(*qnamelen > 0) { + /* look one label higher */ + lablen = **qname; + *qname += lablen + 1; + *qnamelen -= lablen + 1; + if(*qnamelen <= 0) + break; + + /* looks up with a time of 0, to see expired entries */ + if((rrset = rrset_cache_lookup(r, *qname, + *qnamelen, searchtype, qclass, 0, 0, 0))) { + struct packed_rrset_data* data = + (struct packed_rrset_data*)rrset->entry.data; + if(now > data->ttl) { + /* it is expired, this is not wanted */ + lock_rw_unlock(&rrset->entry.lock); + log_nametypeclass(VERB_ALGO, "this rrset is expired", *qname, searchtype, qclass); + return 1; + } + /* it is not expired, continue looking */ + lock_rw_unlock(&rrset->entry.lock); + } + + /* do not look above the qnametop. */ + if(qnametop && *qnamelen == qnametoplen && + query_dname_compare(*qname, qnametop)==0) + break; + } + return 0; +} + void rrset_cache_remove(struct rrset_cache* r, uint8_t* nm, size_t nmlen, uint16_t type, uint16_t dclass, uint32_t flags) { diff --git a/contrib/unbound/services/cache/rrset.h b/contrib/unbound/services/cache/rrset.h index 35a0d732b048..6db79d90f532 100644 --- a/contrib/unbound/services/cache/rrset.h +++ b/contrib/unbound/services/cache/rrset.h @@ -120,7 +120,7 @@ void rrset_cache_touch(struct rrset_cache* r, struct ub_packed_rrset_key* key, * the new rrset. The reference may be changed if the cached rrset is * superior. * Before calling the rrset is presumed newly allocated and changeable. - * Afer calling you do not hold a lock, and the rrset is inserted in + * After calling you do not hold a lock, and the rrset is inserted in * the hashtable so you need a lock to change it. * @param alloc: how to allocate (and deallocate) the special rrset key. * @param timenow: current time (to see if ttl in cache is expired). @@ -143,7 +143,7 @@ int rrset_cache_update(struct rrset_cache* r, struct rrset_ref* ref, * @param rrset: which rrset to cache as wildcard. This rrset is left * untouched. * @param ce: the closest encloser, will be uses to generate the wildcard dname. - * @param ce_len: the closest encloser lenght. + * @param ce_len: the closest encloser length. * @param alloc: how to allocate (and deallocate) the special rrset key. * @param timenow: current time (to see if ttl in cache is expired). */ @@ -232,6 +232,37 @@ void rrset_check_sec_status(struct rrset_cache* r, struct ub_packed_rrset_key* rrset, time_t now); /** + * Removes rrsets above the qname, returns upper qname. + * @param r: the rrset cache. + * @param qname: the start qname, also used as the output. + * @param qnamelen: length of qname, updated when it returns. + * @param searchtype: qtype to search for. + * @param qclass: qclass to search for. + * @param now: current time. + * @param qnametop: the top qname to stop removal (it is not removed). + * @param qnametoplen: length of qnametop. + */ +void rrset_cache_remove_above(struct rrset_cache* r, uint8_t** qname, + size_t* qnamelen, uint16_t searchtype, uint16_t qclass, time_t now, + uint8_t* qnametop, size_t qnametoplen); + +/** + * Sees if an rrset is expired above the qname, returns upper qname. + * @param r: the rrset cache. + * @param qname: the start qname, also used as the output. + * @param qnamelen: length of qname, updated when it returns. + * @param searchtype: qtype to search for. + * @param qclass: qclass to search for. + * @param now: current time. + * @param qnametop: the top qname, don't look farther than that. + * @param qnametoplen: length of qnametop. + * @return true if there is an expired rrset above, false otherwise. + */ +int rrset_cache_expired_above(struct rrset_cache* r, uint8_t** qname, + size_t* qnamelen, uint16_t searchtype, uint16_t qclass, time_t now, + uint8_t* qnametop, size_t qnametoplen); + +/** * Remove an rrset from the cache, by name and type and flags * @param r: rrset cache * @param nm: name of rrset diff --git a/contrib/unbound/services/listen_dnsport.c b/contrib/unbound/services/listen_dnsport.c index b790660f2396..7eb59a1618a1 100644 --- a/contrib/unbound/services/listen_dnsport.c +++ b/contrib/unbound/services/listen_dnsport.c @@ -4,22 +4,22 @@ * Copyright (c) 2007, NLnet Labs. All rights reserved. * * This software is open source. - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: - * + * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. - * + * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. - * + * * Neither the name of the NLNET LABS nor the names of its contributors may * be used to endorse or promote products derived from this software without * specific prior written permission. - * + * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR @@ -47,6 +47,7 @@ #ifdef USE_TCP_FASTOPEN #include <netinet/tcp.h> #endif +#include <ctype.h> #include "services/listen_dnsport.h" #include "services/outside_network.h" #include "util/netevent.h" @@ -78,9 +79,11 @@ #ifdef HAVE_NET_IF_H #include <net/if.h> #endif - +#ifdef HAVE_LINUX_NET_TSTAMP_H +#include <linux/net_tstamp.h> +#endif /** number of queued TCP connections for listen() */ -#define TCP_BACKLOG 256 +#define TCP_BACKLOG 256 #ifndef THREADS_DISABLED /** lock on the counter of stream buffer memory */ @@ -123,16 +126,30 @@ verbose_print_addr(struct addrinfo *addr) (void)strlcpy(buf, "(null)", sizeof(buf)); } buf[sizeof(buf)-1] = 0; - verbose(VERB_ALGO, "creating %s%s socket %s %d", + verbose(VERB_ALGO, "creating %s%s socket %s %d", addr->ai_socktype==SOCK_DGRAM?"udp": addr->ai_socktype==SOCK_STREAM?"tcp":"otherproto", addr->ai_family==AF_INET?"4": addr->ai_family==AF_INET6?"6": - "_otherfam", buf, + "_otherfam", buf, ntohs(((struct sockaddr_in*)addr->ai_addr)->sin_port)); } } +void +verbose_print_unbound_socket(struct unbound_socket* ub_sock) +{ + if(verbosity >= VERB_ALGO) { + char buf[256]; + log_info("listing of unbound_socket structure:"); + addr_to_str((void*)ub_sock->addr, ub_sock->addrlen, buf, + sizeof(buf)); + log_info("%s s is: %d, fam is: %s, acl: %s", buf, ub_sock->s, + ub_sock->fam == AF_INET?"AF_INET":"AF_INET6", + ub_sock->acl?"yes":"no"); + } +} + #ifdef HAVE_SYSTEMD static int systemd_get_activated(int family, int socktype, int listen, @@ -174,7 +191,7 @@ systemd_get_activated(int family, int socktype, int listen, log_err("systemd sd_listen_fds(): %s", strerror(-r)); return -1; } - + for(i = 0; i < r; i++) { if(sd_is_socket(SD_LISTEN_FDS_START + i, family, socktype, listen)) { s = SD_LISTEN_FDS_START + i; @@ -240,7 +257,7 @@ create_udp_sock(int family, int socktype, struct sockaddr* addr, return -1; } #else - if(WSAGetLastError() == WSAEAFNOSUPPORT || + if(WSAGetLastError() == WSAEAFNOSUPPORT || WSAGetLastError() == WSAEPROTONOSUPPORT) { *noproto = 1; return -1; @@ -257,7 +274,7 @@ create_udp_sock(int family, int socktype, struct sockaddr* addr, #endif if(listen) { #ifdef SO_REUSEADDR - if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on, + if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on, (socklen_t)sizeof(on)) < 0) { log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s", sock_strerror(errno)); @@ -355,9 +372,9 @@ create_udp_sock(int family, int socktype, struct sockaddr* addr, socklen_t slen = (socklen_t)sizeof(got); # ifdef SO_RCVBUFFORCE /* Linux specific: try to use root permission to override - * system limits on rcvbuf. The limit is stored in + * system limits on rcvbuf. The limit is stored in * /proc/sys/net/core/rmem_max or sysctl net.core.rmem_max */ - if(setsockopt(s, SOL_SOCKET, SO_RCVBUFFORCE, (void*)&rcv, + if(setsockopt(s, SOL_SOCKET, SO_RCVBUFFORCE, (void*)&rcv, (socklen_t)sizeof(rcv)) < 0) { if(errno != EPERM) { log_err("setsockopt(..., SO_RCVBUFFORCE, " @@ -368,7 +385,7 @@ create_udp_sock(int family, int socktype, struct sockaddr* addr, return -1; } # endif /* SO_RCVBUFFORCE */ - if(setsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&rcv, + if(setsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&rcv, (socklen_t)sizeof(rcv)) < 0) { log_err("setsockopt(..., SO_RCVBUF, " "...) failed: %s", sock_strerror(errno)); @@ -379,7 +396,7 @@ create_udp_sock(int family, int socktype, struct sockaddr* addr, } /* check if we got the right thing or if system * reduced to some system max. Warn if so */ - if(getsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&got, + if(getsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&got, &slen) >= 0 && got < rcv/2) { log_warn("so-rcvbuf %u was not granted. " "Got %u. To fix: start with " @@ -400,9 +417,9 @@ create_udp_sock(int family, int socktype, struct sockaddr* addr, socklen_t slen = (socklen_t)sizeof(got); # ifdef SO_SNDBUFFORCE /* Linux specific: try to use root permission to override - * system limits on sndbuf. The limit is stored in + * system limits on sndbuf. The limit is stored in * /proc/sys/net/core/wmem_max or sysctl net.core.wmem_max */ - if(setsockopt(s, SOL_SOCKET, SO_SNDBUFFORCE, (void*)&snd, + if(setsockopt(s, SOL_SOCKET, SO_SNDBUFFORCE, (void*)&snd, (socklen_t)sizeof(snd)) < 0) { if(errno != EPERM) { log_err("setsockopt(..., SO_SNDBUFFORCE, " @@ -413,7 +430,7 @@ create_udp_sock(int family, int socktype, struct sockaddr* addr, return -1; } # endif /* SO_SNDBUFFORCE */ - if(setsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&snd, + if(setsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&snd, (socklen_t)sizeof(snd)) < 0) { log_err("setsockopt(..., SO_SNDBUF, " "...) failed: %s", sock_strerror(errno)); @@ -424,7 +441,7 @@ create_udp_sock(int family, int socktype, struct sockaddr* addr, } /* check if we got the right thing or if system * reduced to some system max. Warn if so */ - if(getsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&got, + if(getsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&got, &slen) >= 0 && got < snd/2) { log_warn("so-sndbuf %u was not granted. " "Got %u. To fix: start with " @@ -442,10 +459,21 @@ create_udp_sock(int family, int socktype, struct sockaddr* addr, if(err != NULL) log_warn("error setting IP DiffServ codepoint %d on UDP socket: %s", dscp, err); if(family == AF_INET6) { +# if defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT) + int omit6_set = 0; + int action; +# endif # if defined(IPV6_V6ONLY) - if(v6only) { + if(v6only +# ifdef HAVE_SYSTEMD + /* Systemd wants to control if the socket is v6 only + * or both, with BindIPv6Only=default, ipv6-only or + * both in systemd.socket, so it is not set here. */ + && !got_fd_from_systemd +# endif + ) { int val=(v6only==2)?0:1; - if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, + if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, (void*)&val, (socklen_t)sizeof(val)) < 0) { log_err("setsockopt(..., IPV6_V6ONLY" ", ...) failed: %s", sock_strerror(errno)); @@ -475,6 +503,7 @@ create_udp_sock(int family, int socktype, struct sockaddr* addr, return -1; } # elif defined(IPV6_MTU) +# ifndef USE_WINSOCK /* * On Linux, to send no larger than 1280, the PMTUD is * disabled by default for datagrams anyway, so we set @@ -482,14 +511,63 @@ create_udp_sock(int family, int socktype, struct sockaddr* addr, */ if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU, (void*)&mtu, (socklen_t)sizeof(mtu)) < 0) { - log_err("setsockopt(..., IPV6_MTU, ...) failed: %s", + log_err("setsockopt(..., IPV6_MTU, ...) failed: %s", sock_strerror(errno)); sock_close(s); *noproto = 0; *inuse = 0; return -1; } +# elif defined(IPV6_USER_MTU) + /* As later versions of the mingw crosscompiler define + * IPV6_MTU, do the same for windows but use IPV6_USER_MTU + * instead which is writable; IPV6_MTU is readonly there. */ + if (setsockopt(s, IPPROTO_IPV6, IPV6_USER_MTU, + (void*)&mtu, (socklen_t)sizeof(mtu)) < 0) { + if (WSAGetLastError() != WSAENOPROTOOPT) { + log_err("setsockopt(..., IPV6_USER_MTU, ...) failed: %s", + wsa_strerror(WSAGetLastError())); + sock_close(s); + *noproto = 0; + *inuse = 0; + return -1; + } + } +# endif /* USE_WINSOCK */ # endif /* IPv6 MTU */ +# if defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT) +# if defined(IP_PMTUDISC_OMIT) + action = IP_PMTUDISC_OMIT; + if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU_DISCOVER, + &action, (socklen_t)sizeof(action)) < 0) { + + if (errno != EINVAL) { + log_err("setsockopt(..., IPV6_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s", + strerror(errno)); + sock_close(s); + *noproto = 0; + *inuse = 0; + return -1; + } + } + else + { + omit6_set = 1; + } +# endif + if (omit6_set == 0) { + action = IP_PMTUDISC_DONT; + if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU_DISCOVER, + &action, (socklen_t)sizeof(action)) < 0) { + log_err("setsockopt(..., IPV6_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s", + strerror(errno)); + sock_close(s); + *noproto = 0; + *inuse = 0; + return -1; + } + } +# endif /* IPV6_MTU_DISCOVER */ } else if(family == AF_INET) { # if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT) /* linux 3.15 has IP_PMTUDISC_OMIT, Hannes Frederic Sowa made it so that @@ -502,7 +580,7 @@ create_udp_sock(int family, int socktype, struct sockaddr* addr, int action; # if defined(IP_PMTUDISC_OMIT) action = IP_PMTUDISC_OMIT; - if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER, + if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER, &action, (socklen_t)sizeof(action)) < 0) { if (errno != EINVAL) { @@ -534,8 +612,10 @@ create_udp_sock(int family, int socktype, struct sockaddr* addr, # elif defined(IP_DONTFRAG) && !defined(__APPLE__) /* the IP_DONTFRAG option if defined in the 11.0 OSX headers, * but does not work on that version, so we exclude it */ - int off = 0; - if (setsockopt(s, IPPROTO_IP, IP_DONTFRAG, + /* a nonzero value disables fragmentation, according to + * docs.oracle.com for ip(4). */ + int off = 1; + if (setsockopt(s, IPPROTO_IP, IP_DONTFRAG, &off, (socklen_t)sizeof(off)) < 0) { log_err("setsockopt(..., IP_DONTFRAG, ...) failed: %s", strerror(errno)); @@ -573,7 +653,7 @@ create_udp_sock(int family, int socktype, struct sockaddr* addr, if(WSAGetLastError() != WSAEADDRINUSE && WSAGetLastError() != WSAEADDRNOTAVAIL && !(WSAGetLastError() == WSAEACCES && verbosity < 4 && !listen)) { - log_err_addr("can't bind socket", + log_err_addr("can't bind socket", wsa_strerror(WSAGetLastError()), (struct sockaddr_storage*)addr, addrlen); } @@ -675,7 +755,7 @@ create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto, } #endif #ifdef SO_REUSEADDR - if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on, + if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on, (socklen_t)sizeof(on)) < 0) { log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s", sock_strerror(errno)); @@ -711,8 +791,15 @@ create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto, (void)reuseport; #endif /* defined(SO_REUSEPORT) */ #if defined(IPV6_V6ONLY) - if(addr->ai_family == AF_INET6 && v6only) { - if(setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, + if(addr->ai_family == AF_INET6 && v6only +# ifdef HAVE_SYSTEMD + /* Systemd wants to control if the socket is v6 only + * or both, with BindIPv6Only=default, ipv6-only or + * both in systemd.socket, so it is not set here. */ + && !got_fd_from_systemd +# endif + ) { + if(setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, (void*)&on, (socklen_t)sizeof(on)) < 0) { log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s", sock_strerror(errno)); @@ -764,7 +851,7 @@ create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto, addr->ai_addrlen); } #else - log_err_addr("can't bind socket", + log_err_addr("can't bind socket", wsa_strerror(WSAGetLastError()), (struct sockaddr_storage*)addr->ai_addr, addr->ai_addrlen); @@ -792,7 +879,7 @@ create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto, /* 5 is recommended on linux */ qlen = 5; #endif - if ((setsockopt(s, IPPROTO_TCP, TCP_FASTOPEN, &qlen, + if ((setsockopt(s, IPPROTO_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen))) == -1 ) { #ifdef ENOPROTOOPT /* squelch ENOPROTOOPT: freebsd server mode with kernel support @@ -822,9 +909,14 @@ set_ip_dscp(int socket, int addrfamily, int dscp) ds = dscp << 2; switch(addrfamily) { case AF_INET6: - if(setsockopt(socket, IPPROTO_IPV6, IPV6_TCLASS, (void*)&ds, sizeof(ds)) < 0) + #ifdef IPV6_TCLASS + if(setsockopt(socket, IPPROTO_IPV6, IPV6_TCLASS, (void*)&ds, + sizeof(ds)) < 0) return sock_strerror(errno); break; + #else + return "IPV6_TCLASS not defined on this system"; + #endif default: if(setsockopt(socket, IPPROTO_IP, IP_TOS, (void*)&ds, sizeof(ds)) < 0) return sock_strerror(errno); @@ -913,10 +1005,10 @@ err: * Create socket from getaddrinfo results */ static int -make_sock(int stype, const char* ifname, const char* port, +make_sock(int stype, const char* ifname, const char* port, struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd, int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind, - int use_systemd, int dscp) + int use_systemd, int dscp, struct unbound_socket* ub_sock) { struct addrinfo *res = NULL; int r, s, inuse, noproto; @@ -929,10 +1021,10 @@ make_sock(int stype, const char* ifname, const char* port, return -1; } #endif - log_err("node %s:%s getaddrinfo: %s %s", + log_err("node %s:%s getaddrinfo: %s %s", ifname?ifname:"default", port, gai_strerror(r), #ifdef EAI_SYSTEM - r==EAI_SYSTEM?(char*)strerror(errno):"" + (r==EAI_SYSTEM?(char*)strerror(errno):"") #else "" #endif @@ -958,16 +1050,36 @@ make_sock(int stype, const char* ifname, const char* port, *noip6 = 1; } } + + if(!res->ai_addr) { + log_err("getaddrinfo returned no address"); + freeaddrinfo(res); + sock_close(s); + return -1; + } + ub_sock->addr = memdup(res->ai_addr, res->ai_addrlen); + ub_sock->addrlen = res->ai_addrlen; + if(!ub_sock->addr) { + log_err("out of memory: allocate listening address"); + freeaddrinfo(res); + sock_close(s); + return -1; + } freeaddrinfo(res); + + ub_sock->s = s; + ub_sock->fam = hints->ai_family; + ub_sock->acl = NULL; + return s; } /** make socket and first see if ifname contains port override info */ static int -make_sock_port(int stype, const char* ifname, const char* port, +make_sock_port(int stype, const char* ifname, const char* port, struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd, int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind, - int use_systemd, int dscp) + int use_systemd, int dscp, struct unbound_socket* ub_sock) { char* s = strchr(ifname, '@'); if(s) { @@ -990,11 +1102,11 @@ make_sock_port(int stype, const char* ifname, const char* port, p[strlen(s+1)]=0; return make_sock(stype, newif, p, hints, v6only, noip6, rcv, snd, reuseport, transparent, tcp_mss, nodelay, freebind, - use_systemd, dscp); + use_systemd, dscp, ub_sock); } return make_sock(stype, ifname, port, hints, v6only, noip6, rcv, snd, reuseport, transparent, tcp_mss, nodelay, freebind, use_systemd, - dscp); + dscp, ub_sock); } /** @@ -1002,10 +1114,13 @@ make_sock_port(int stype, const char* ifname, const char* port, * @param list: list head. changed. * @param s: fd. * @param ftype: if fd is UDP. + * @param pp2_enabled: if PROXYv2 is enabled for this port. + * @param ub_sock: socket with address. * @return false on failure. list in unchanged then. */ static int -port_insert(struct listen_port** list, int s, enum listen_type ftype) +port_insert(struct listen_port** list, int s, enum listen_type ftype, + int pp2_enabled, struct unbound_socket* ub_sock) { struct listen_port* item = (struct listen_port*)malloc( sizeof(struct listen_port)); @@ -1014,13 +1129,34 @@ port_insert(struct listen_port** list, int s, enum listen_type ftype) item->next = *list; item->fd = s; item->ftype = ftype; + item->pp2_enabled = pp2_enabled; + item->socket = ub_sock; *list = item; return 1; } +/** set fd to receive software timestamps */ +static int +set_recvtimestamp(int s) +{ +#ifdef HAVE_LINUX_NET_TSTAMP_H + int opt = SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_SOFTWARE; + if (setsockopt(s, SOL_SOCKET, SO_TIMESTAMPNS, (void*)&opt, (socklen_t)sizeof(opt)) < 0) { + log_err("setsockopt(..., SO_TIMESTAMPNS, ...) failed: %s", + strerror(errno)); + return 0; + } + return 1; +#else + log_err("packets timestamping is not supported on this platform"); + (void)s; + return 0; +#endif +} + /** set fd to receive source address packet info */ static int -set_recvpktinfo(int s, int family) +set_recvpktinfo(int s, int family) { #if defined(IPV6_RECVPKTINFO) || defined(IPV6_PKTINFO) || (defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)) || defined(IP_PKTINFO) int on = 1; @@ -1043,7 +1179,7 @@ set_recvpktinfo(int s, int family) return 0; } # else - log_err("no IPV6_RECVPKTINFO and no IPV6_PKTINFO option, please " + log_err("no IPV6_RECVPKTINFO and IPV6_PKTINFO options, please " "disable interface-automatic or do-ip6 in config"); return 0; # endif /* defined IPV6_RECVPKTINFO */ @@ -1093,25 +1229,13 @@ if_is_ssl(const char* ifname, const char* port, int ssl_port, return 0; } -/** see if interface is https, its port number == the https port number */ -static int -if_is_https(const char* ifname, const char* port, int https_port) -{ - char* p = strchr(ifname, '@'); - if(!p && atoi(port) == https_port) - return 1; - if(p && atoi(p+1) == https_port) - return 1; - return 0; -} - /** * Helper for ports_open. Creates one interface (or NULL for default). * @param ifname: The interface ip address. * @param do_auto: use automatic interface detection. * If enabled, then ifname must be the wildcard name. * @param do_udp: if udp should be used. - * @param do_tcp: if udp should be used. + * @param do_tcp: if tcp should be used. * @param hints: for getaddrinfo. family and flags have to be set by caller. * @param port: Port number to use (as string). * @param list: list of open ports, appended to, changed to point to list head. @@ -1120,6 +1244,7 @@ if_is_https(const char* ifname, const char* port, int https_port) * @param ssl_port: ssl service port number * @param tls_additional_port: list of additional ssl service port numbers. * @param https_port: DoH service port number + * @param proxy_protocol_port: list of PROXYv2 port numbers. * @param reuseport: try to set SO_REUSEPORT if nonNULL and true. * set to false on exit if reuseport failed due to no kernel support. * @param transparent: set IP_TRANSPARENT socket option. @@ -1129,34 +1254,50 @@ if_is_https(const char* ifname, const char* port, int https_port) * @param use_systemd: if true, fetch sockets from systemd. * @param dnscrypt_port: dnscrypt service port number * @param dscp: DSCP to use. + * @param sock_queue_timeout: the sock_queue_timeout from config. Seconds to + * wait to discard if UDP packets have waited for long in the socket + * buffer. * @return: returns false on error. */ static int -ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, +ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, struct addrinfo *hints, const char* port, struct listen_port** list, size_t rcv, size_t snd, int ssl_port, struct config_strlist* tls_additional_port, int https_port, + struct config_strlist* proxy_protocol_port, int* reuseport, int transparent, int tcp_mss, int freebind, - int http2_nodelay, int use_systemd, int dnscrypt_port, int dscp) + int http2_nodelay, int use_systemd, int dnscrypt_port, int dscp, + int sock_queue_timeout) { int s, noip6=0; int is_https = if_is_https(ifname, port, https_port); + int is_dnscrypt = if_is_dnscrypt(ifname, port, dnscrypt_port); + int is_pp2 = if_is_pp2(ifname, port, proxy_protocol_port); int nodelay = is_https && http2_nodelay; -#ifdef USE_DNSCRYPT - int is_dnscrypt = ((strchr(ifname, '@') && - atoi(strchr(ifname, '@')+1) == dnscrypt_port) || - (!strchr(ifname, '@') && atoi(port) == dnscrypt_port)); -#else - int is_dnscrypt = 0; - (void)dnscrypt_port; -#endif + struct unbound_socket* ub_sock; if(!do_udp && !do_tcp) return 0; + + if(is_pp2) { + if(is_dnscrypt) { + fatal_exit("PROXYv2 and DNSCrypt combination not " + "supported!"); + } else if(is_https) { + fatal_exit("PROXYv2 and DoH combination not " + "supported!"); + } + } + if(do_auto) { - if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1, + ub_sock = calloc(1, sizeof(struct unbound_socket)); + if(!ub_sock) + return 0; + if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1, &noip6, rcv, snd, reuseport, transparent, - tcp_mss, nodelay, freebind, use_systemd, dscp)) == -1) { + tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock)) == -1) { + free(ub_sock->addr); + free(ub_sock); if(noip6) { log_warn("IPv6 protocol not available"); return 1; @@ -1166,27 +1307,48 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, /* getting source addr packet info is highly non-portable */ if(!set_recvpktinfo(s, hints->ai_family)) { sock_close(s); + free(ub_sock->addr); + free(ub_sock); return 0; } - if(!port_insert(list, s, - is_dnscrypt?listen_type_udpancil_dnscrypt:listen_type_udpancil)) { + if (sock_queue_timeout && !set_recvtimestamp(s)) { + log_warn("socket timestamping is not available"); + } + if(!port_insert(list, s, is_dnscrypt + ?listen_type_udpancil_dnscrypt:listen_type_udpancil, + is_pp2, ub_sock)) { sock_close(s); + free(ub_sock->addr); + free(ub_sock); return 0; } } else if(do_udp) { + ub_sock = calloc(1, sizeof(struct unbound_socket)); + if(!ub_sock) + return 0; /* regular udp socket */ - if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1, + if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1, &noip6, rcv, snd, reuseport, transparent, - tcp_mss, nodelay, freebind, use_systemd, dscp)) == -1) { + tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock)) == -1) { + free(ub_sock->addr); + free(ub_sock); if(noip6) { log_warn("IPv6 protocol not available"); return 1; } return 0; } - if(!port_insert(list, s, - is_dnscrypt?listen_type_udp_dnscrypt:listen_type_udp)) { + if (sock_queue_timeout && !set_recvtimestamp(s)) { + log_warn("socket timestamping is not available"); + } + if(!port_insert(list, s, is_dnscrypt + ?listen_type_udp_dnscrypt : + (sock_queue_timeout ? + listen_type_udpancil:listen_type_udp), + is_pp2, ub_sock)) { sock_close(s); + free(ub_sock->addr); + free(ub_sock); return 0; } } @@ -1194,6 +1356,9 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, int is_ssl = if_is_ssl(ifname, port, ssl_port, tls_additional_port); enum listen_type port_type; + ub_sock = calloc(1, sizeof(struct unbound_socket)); + if(!ub_sock) + return 0; if(is_ssl) port_type = listen_type_ssl; else if(is_https) @@ -1202,9 +1367,11 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, port_type = listen_type_tcp_dnscrypt; else port_type = listen_type_tcp; - if((s = make_sock_port(SOCK_STREAM, ifname, port, hints, 1, + if((s = make_sock_port(SOCK_STREAM, ifname, port, hints, 1, &noip6, 0, 0, reuseport, transparent, tcp_mss, nodelay, - freebind, use_systemd, dscp)) == -1) { + freebind, use_systemd, dscp, ub_sock)) == -1) { + free(ub_sock->addr); + free(ub_sock); if(noip6) { /*log_warn("IPv6 protocol not available");*/ return 1; @@ -1213,15 +1380,17 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, } if(is_ssl) verbose(VERB_ALGO, "setup TCP for SSL service"); - if(!port_insert(list, s, port_type)) { + if(!port_insert(list, s, port_type, is_pp2, ub_sock)) { sock_close(s); + free(ub_sock->addr); + free(ub_sock); return 0; } } return 1; } -/** +/** * Add items to commpoint list in front. * @param c: commpoint to add. * @param front: listen struct. @@ -1240,7 +1409,39 @@ listen_cp_insert(struct comm_point* c, struct listen_dnsport* front) return 1; } -struct listen_dnsport* +void listen_setup_locks(void) +{ + if(!stream_wait_lock_inited) { + lock_basic_init(&stream_wait_count_lock); + stream_wait_lock_inited = 1; + } + if(!http2_query_buffer_lock_inited) { + lock_basic_init(&http2_query_buffer_count_lock); + http2_query_buffer_lock_inited = 1; + } + if(!http2_response_buffer_lock_inited) { + lock_basic_init(&http2_response_buffer_count_lock); + http2_response_buffer_lock_inited = 1; + } +} + +void listen_desetup_locks(void) +{ + if(stream_wait_lock_inited) { + stream_wait_lock_inited = 0; + lock_basic_destroy(&stream_wait_count_lock); + } + if(http2_query_buffer_lock_inited) { + http2_query_buffer_lock_inited = 0; + lock_basic_destroy(&http2_query_buffer_count_lock); + } + if(http2_response_buffer_lock_inited) { + http2_response_buffer_lock_inited = 0; + lock_basic_destroy(&http2_response_buffer_count_lock); + } +} + +struct listen_dnsport* listen_create(struct comm_base* base, struct listen_port* ports, size_t bufsize, int tcp_accept_count, int tcp_idle_timeout, int harden_large_queries, uint32_t http_max_streams, @@ -1261,57 +1462,47 @@ listen_create(struct comm_base* base, struct listen_port* ports, free(front); return NULL; } - if(!stream_wait_lock_inited) { - lock_basic_init(&stream_wait_count_lock); - stream_wait_lock_inited = 1; - } - if(!http2_query_buffer_lock_inited) { - lock_basic_init(&http2_query_buffer_count_lock); - http2_query_buffer_lock_inited = 1; - } - if(!http2_response_buffer_lock_inited) { - lock_basic_init(&http2_response_buffer_count_lock); - http2_response_buffer_lock_inited = 1; - } /* create comm points as needed */ while(ports) { struct comm_point* cp = NULL; if(ports->ftype == listen_type_udp || - ports->ftype == listen_type_udp_dnscrypt) - cp = comm_point_create_udp(base, ports->fd, - front->udp_buff, cb, cb_arg); - else if(ports->ftype == listen_type_tcp || - ports->ftype == listen_type_tcp_dnscrypt) - cp = comm_point_create_tcp(base, ports->fd, + ports->ftype == listen_type_udp_dnscrypt) { + cp = comm_point_create_udp(base, ports->fd, + front->udp_buff, ports->pp2_enabled, cb, + cb_arg, ports->socket); + } else if(ports->ftype == listen_type_tcp || + ports->ftype == listen_type_tcp_dnscrypt) { + cp = comm_point_create_tcp(base, ports->fd, tcp_accept_count, tcp_idle_timeout, harden_large_queries, 0, NULL, tcp_conn_limit, bufsize, front->udp_buff, - ports->ftype, cb, cb_arg); - else if(ports->ftype == listen_type_ssl || + ports->ftype, ports->pp2_enabled, cb, cb_arg, + ports->socket); + } else if(ports->ftype == listen_type_ssl || ports->ftype == listen_type_http) { - cp = comm_point_create_tcp(base, ports->fd, + cp = comm_point_create_tcp(base, ports->fd, tcp_accept_count, tcp_idle_timeout, harden_large_queries, http_max_streams, http_endpoint, tcp_conn_limit, bufsize, front->udp_buff, - ports->ftype, cb, cb_arg); - if(http_notls && ports->ftype == listen_type_http) - cp->ssl = NULL; - else - cp->ssl = sslctx; + ports->ftype, ports->pp2_enabled, cb, cb_arg, + ports->socket); if(ports->ftype == listen_type_http) { if(!sslctx && !http_notls) { - log_warn("HTTPS port configured, but no TLS " - "tls-service-key or tls-service-pem " - "set"); + log_warn("HTTPS port configured, but " + "no TLS tls-service-key or " + "tls-service-pem set"); } #ifndef HAVE_SSL_CTX_SET_ALPN_SELECT_CB - if(!http_notls) - log_warn("Unbound is not compiled with an " - "OpenSSL version supporting ALPN " - " (OpenSSL >= 1.0.2). This is required " - "to use DNS-over-HTTPS"); + if(!http_notls) { + log_warn("Unbound is not compiled " + "with an OpenSSL version " + "supporting ALPN " + "(OpenSSL >= 1.0.2). This " + "is required to use " + "DNS-over-HTTPS"); + } #endif #ifndef HAVE_NGHTTP2_NGHTTP2_H log_warn("Unbound is not compiled with " @@ -1320,14 +1511,30 @@ listen_create(struct comm_base* base, struct listen_port* ports, #endif } } else if(ports->ftype == listen_type_udpancil || - ports->ftype == listen_type_udpancil_dnscrypt) - cp = comm_point_create_udp_ancil(base, ports->fd, - front->udp_buff, cb, cb_arg); + ports->ftype == listen_type_udpancil_dnscrypt) { +#if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG) + cp = comm_point_create_udp_ancil(base, ports->fd, + front->udp_buff, ports->pp2_enabled, cb, + cb_arg, ports->socket); +#else + log_warn("This system does not support UDP ancilliary data."); +#endif + } if(!cp) { - log_err("can't create commpoint"); + log_err("can't create commpoint"); listen_delete(front); return NULL; } + if((http_notls && ports->ftype == listen_type_http) || + (ports->ftype == listen_type_tcp) || + (ports->ftype == listen_type_udp) || + (ports->ftype == listen_type_udpancil) || + (ports->ftype == listen_type_tcp_dnscrypt) || + (ports->ftype == listen_type_udp_dnscrypt) || + (ports->ftype == listen_type_udpancil_dnscrypt)) + cp->ssl = NULL; + else + cp->ssl = sslctx; cp->dtenv = dtenv; cp->do_not_close = 1; #ifdef USE_DNSCRYPT @@ -1374,10 +1581,10 @@ listen_list_delete(struct listen_list* list) } } -void +void listen_delete(struct listen_dnsport* front) { - if(!front) + if(!front) return; listen_list_delete(front->cps); #ifdef USE_DNSCRYPT @@ -1388,18 +1595,6 @@ listen_delete(struct listen_dnsport* front) #endif sldns_buffer_free(front->udp_buff); free(front); - if(stream_wait_lock_inited) { - stream_wait_lock_inited = 0; - lock_basic_destroy(&stream_wait_count_lock); - } - if(http2_query_buffer_lock_inited) { - http2_query_buffer_lock_inited = 0; - lock_basic_destroy(&http2_query_buffer_count_lock); - } - if(http2_response_buffer_lock_inited) { - http2_response_buffer_lock_inited = 0; - lock_basic_destroy(&http2_response_buffer_count_lock); - } } #ifdef HAVE_GETIFADDRS @@ -1506,13 +1701,12 @@ resolve_ifa_name(struct ifaddrs *ifas, const char *search_ifa, char ***ip_addres } #endif /* HAVE_GETIFADDRS */ -int resolve_interface_names(struct config_file* cfg, char*** resif, - int* num_resif) +int resolve_interface_names(char** ifs, int num_ifs, + struct config_strlist* list, char*** resif, int* num_resif) { #ifdef HAVE_GETIFADDRS - int i; struct ifaddrs *addrs = NULL; - if(cfg->num_ifs == 0) { + if(num_ifs == 0 && list == NULL) { *resif = NULL; *num_resif = 0; return 1; @@ -1523,45 +1717,80 @@ int resolve_interface_names(struct config_file* cfg, char*** resif, freeifaddrs(addrs); return 0; } - for(i=0; i<cfg->num_ifs; i++) { - if(!resolve_ifa_name(addrs, cfg->ifs[i], resif, num_resif)) { - freeifaddrs(addrs); - config_del_strarray(*resif, *num_resif); - *resif = NULL; - *num_resif = 0; - return 0; + if(ifs) { + int i; + for(i=0; i<num_ifs; i++) { + if(!resolve_ifa_name(addrs, ifs[i], resif, num_resif)) { + freeifaddrs(addrs); + config_del_strarray(*resif, *num_resif); + *resif = NULL; + *num_resif = 0; + return 0; + } } } + if(list) { + struct config_strlist* p; + for(p = list; p; p = p->next) { + if(!resolve_ifa_name(addrs, p->str, resif, num_resif)) { + freeifaddrs(addrs); + config_del_strarray(*resif, *num_resif); + *resif = NULL; + *num_resif = 0; + return 0; + } +} + } freeifaddrs(addrs); return 1; #else - int i; - if(cfg->num_ifs == 0) { + struct config_strlist* p; + if(num_ifs == 0 && list == NULL) { *resif = NULL; *num_resif = 0; return 1; } - *num_resif = cfg->num_ifs; + *num_resif = num_ifs; + for(p = list; p; p = p->next) { + (*num_resif)++; + } *resif = calloc(*num_resif, sizeof(**resif)); if(!*resif) { log_err("out of memory"); return 0; } - for(i=0; i<*num_resif; i++) { - (*resif)[i] = strdup(cfg->ifs[i]); - if(!((*resif)[i])) { - log_err("out of memory"); - config_del_strarray(*resif, *num_resif); - *resif = NULL; - *num_resif = 0; - return 0; + if(ifs) { + int i; + for(i=0; i<num_ifs; i++) { + (*resif)[i] = strdup(ifs[i]); + if(!((*resif)[i])) { + log_err("out of memory"); + config_del_strarray(*resif, *num_resif); + *resif = NULL; + *num_resif = 0; + return 0; + } + } + } + if(list) { + int idx = num_ifs; + for(p = list; p; p = p->next) { + (*resif)[idx] = strdup(p->str); + if(!((*resif)[idx])) { + log_err("out of memory"); + config_del_strarray(*resif, *num_resif); + *resif = NULL; + *num_resif = 0; + return 0; + } + idx++; } } return 1; #endif /* HAVE_GETIFADDRS */ } -struct listen_port* +struct listen_port* listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs, int* reuseport) { @@ -1593,32 +1822,95 @@ listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs, } /* create ip4 and ip6 ports so that return addresses are nice. */ if(do_auto || num_ifs == 0) { + if(do_auto && cfg->if_automatic_ports && + cfg->if_automatic_ports[0]!=0) { + char* now = cfg->if_automatic_ports; + while(now && *now) { + char* after; + int extraport; + while(isspace((unsigned char)*now)) + now++; + if(!*now) + break; + after = now; + extraport = (int)strtol(now, &after, 10); + if(extraport < 0 || extraport > 65535) { + log_err("interface-automatic-ports port number out of range, at position %d of '%s'", (int)(now-cfg->if_automatic_ports)+1, cfg->if_automatic_ports); + listening_ports_free(list); + return NULL; + } + if(extraport == 0 && now == after) { + log_err("interface-automatic-ports could not be parsed, at position %d of '%s'", (int)(now-cfg->if_automatic_ports)+1, cfg->if_automatic_ports); + listening_ports_free(list); + return NULL; + } + now = after; + snprintf(portbuf, sizeof(portbuf), "%d", extraport); + if(do_ip6) { + hints.ai_family = AF_INET6; + if(!ports_create_if("::0", + do_auto, cfg->do_udp, do_tcp, + &hints, portbuf, &list, + cfg->so_rcvbuf, cfg->so_sndbuf, + cfg->ssl_port, cfg->tls_additional_port, + cfg->https_port, + cfg->proxy_protocol_port, + reuseport, cfg->ip_transparent, + cfg->tcp_mss, cfg->ip_freebind, + cfg->http_nodelay, cfg->use_systemd, + cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) { + listening_ports_free(list); + return NULL; + } + } + if(do_ip4) { + hints.ai_family = AF_INET; + if(!ports_create_if("0.0.0.0", + do_auto, cfg->do_udp, do_tcp, + &hints, portbuf, &list, + cfg->so_rcvbuf, cfg->so_sndbuf, + cfg->ssl_port, cfg->tls_additional_port, + cfg->https_port, + cfg->proxy_protocol_port, + reuseport, cfg->ip_transparent, + cfg->tcp_mss, cfg->ip_freebind, + cfg->http_nodelay, cfg->use_systemd, + cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) { + listening_ports_free(list); + return NULL; + } + } + } + return list; + } if(do_ip6) { hints.ai_family = AF_INET6; - if(!ports_create_if(do_auto?"::0":"::1", - do_auto, cfg->do_udp, do_tcp, + if(!ports_create_if(do_auto?"::0":"::1", + do_auto, cfg->do_udp, do_tcp, &hints, portbuf, &list, cfg->so_rcvbuf, cfg->so_sndbuf, cfg->ssl_port, cfg->tls_additional_port, - cfg->https_port, reuseport, cfg->ip_transparent, + cfg->https_port, cfg->proxy_protocol_port, + reuseport, cfg->ip_transparent, cfg->tcp_mss, cfg->ip_freebind, cfg->http_nodelay, cfg->use_systemd, - cfg->dnscrypt_port, cfg->ip_dscp)) { + cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) { listening_ports_free(list); return NULL; } } if(do_ip4) { hints.ai_family = AF_INET; - if(!ports_create_if(do_auto?"0.0.0.0":"127.0.0.1", - do_auto, cfg->do_udp, do_tcp, + if(!ports_create_if(do_auto?"0.0.0.0":"127.0.0.1", + do_auto, cfg->do_udp, do_tcp, &hints, portbuf, &list, cfg->so_rcvbuf, cfg->so_sndbuf, cfg->ssl_port, cfg->tls_additional_port, - cfg->https_port, reuseport, cfg->ip_transparent, + cfg->https_port, cfg->proxy_protocol_port, + reuseport, cfg->ip_transparent, cfg->tcp_mss, cfg->ip_freebind, cfg->http_nodelay, cfg->use_systemd, - cfg->dnscrypt_port, cfg->ip_dscp)) { + cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) { listening_ports_free(list); return NULL; } @@ -1629,13 +1921,14 @@ listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs, continue; hints.ai_family = AF_INET6; if(!ports_create_if(ifs[i], 0, cfg->do_udp, - do_tcp, &hints, portbuf, &list, + do_tcp, &hints, portbuf, &list, cfg->so_rcvbuf, cfg->so_sndbuf, cfg->ssl_port, cfg->tls_additional_port, - cfg->https_port, reuseport, cfg->ip_transparent, + cfg->https_port, cfg->proxy_protocol_port, + reuseport, cfg->ip_transparent, cfg->tcp_mss, cfg->ip_freebind, cfg->http_nodelay, cfg->use_systemd, - cfg->dnscrypt_port, cfg->ip_dscp)) { + cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) { listening_ports_free(list); return NULL; } @@ -1644,18 +1937,20 @@ listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs, continue; hints.ai_family = AF_INET; if(!ports_create_if(ifs[i], 0, cfg->do_udp, - do_tcp, &hints, portbuf, &list, + do_tcp, &hints, portbuf, &list, cfg->so_rcvbuf, cfg->so_sndbuf, cfg->ssl_port, cfg->tls_additional_port, - cfg->https_port, reuseport, cfg->ip_transparent, + cfg->https_port, cfg->proxy_protocol_port, + reuseport, cfg->ip_transparent, cfg->tcp_mss, cfg->ip_freebind, cfg->http_nodelay, cfg->use_systemd, - cfg->dnscrypt_port, cfg->ip_dscp)) { + cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) { listening_ports_free(list); return NULL; } } } + return list; } @@ -1667,6 +1962,11 @@ void listening_ports_free(struct listen_port* list) if(list->fd != -1) { sock_close(list->fd); } + /* rc_ports don't have ub_socket */ + if(list->socket) { + free(list->socket->addr); + free(list->socket); + } free(list); list = nx; } @@ -1675,8 +1975,8 @@ void listening_ports_free(struct listen_port* list) size_t listen_get_mem(struct listen_dnsport* listen) { struct listen_list* p; - size_t s = sizeof(*listen) + sizeof(*listen->base) + - sizeof(*listen->udp_buff) + + size_t s = sizeof(*listen) + sizeof(*listen->base) + + sizeof(*listen->udp_buff) + sldns_buffer_capacity(listen->udp_buff); #ifdef USE_DNSCRYPT s += sizeof(*listen->dnscrypt_udp_buff); @@ -1757,7 +2057,7 @@ void tcp_req_info_clear(struct tcp_req_info* req) } req->open_req_list = NULL; req->num_open_req = 0; - + /* free pending writable result packets */ item = req->done_req_list; while(item) { @@ -1816,7 +2116,7 @@ tcp_req_info_setup_listen(struct tcp_req_info* req) wr = 1; if(!req->read_is_closed) rd = 1; - + if(wr) { req->cp->tcp_is_reading = 0; comm_point_stop_listening(req->cp); @@ -1952,7 +2252,7 @@ tcp_req_info_handle_readdone(struct tcp_req_info* req) } req->in_worker_handle = 0; /* it should be waiting in the mesh for recursion. - * If mesh failed to add a new entry and called commpoint_drop_reply. + * If mesh failed to add a new entry and called commpoint_drop_reply. * Then the mesh state has been cleared. */ if(req->is_drop) { /* the reply has been dropped, stream has been closed. */ @@ -2012,7 +2312,7 @@ tcp_req_info_add_result(struct tcp_req_info* req, uint8_t* buf, size_t len) last = req->done_req_list; while(last && last->next) last = last->next; - + /* create new element */ item = (struct tcp_req_done_item*)malloc(sizeof(*item)); if(!item) { @@ -2371,6 +2671,10 @@ static int http2_query_read_done(struct http2_session* h2_session, "buffer already assigned to stream"); return -1; } + + /* the c->buffer might be used by mesh_send_reply and no be cleard + * need to be cleared before use */ + sldns_buffer_clear(h2_session->c->buffer); if(sldns_buffer_remaining(h2_session->c->buffer) < sldns_buffer_remaining(h2_stream->qbuffer)) { /* qbuffer will be free'd in frame close cb */ @@ -2500,7 +2804,7 @@ static int http2_req_begin_headers_cb(nghttp2_session* session, int ret; if(frame->hd.type != NGHTTP2_HEADERS || frame->headers.cat != NGHTTP2_HCAT_REQUEST) { - /* only interrested in request headers */ + /* only interested in request headers */ return 0; } if(!(h2_stream = http2_stream_create(frame->hd.stream_id))) { @@ -2572,18 +2876,45 @@ static int http2_buffer_uri_query(struct http2_session* h2_session, return 0; } - if(!(b64len = sldns_b64url_pton( - (char const *)start, length, - sldns_buffer_current(h2_stream->qbuffer), - expectb64len)) || b64len < 0) { - lock_basic_lock(&http2_query_buffer_count_lock); - http2_query_buffer_count -= expectb64len; - lock_basic_unlock(&http2_query_buffer_count_lock); - sldns_buffer_free(h2_stream->qbuffer); - h2_stream->qbuffer = NULL; - /* return without error, method can be an - * unknown POST */ - return 1; + if(sldns_b64_contains_nonurl((char const*)start, length)) { + char buf[65536+4]; + verbose(VERB_ALGO, "HTTP2 stream contains wrong b64 encoding"); + /* copy to the scratch buffer temporarily to terminate the + * string with a zero */ + if(length+1 > sizeof(buf)) { + /* too long */ + lock_basic_lock(&http2_query_buffer_count_lock); + http2_query_buffer_count -= expectb64len; + lock_basic_unlock(&http2_query_buffer_count_lock); + sldns_buffer_free(h2_stream->qbuffer); + h2_stream->qbuffer = NULL; + return 1; + } + memmove(buf, start, length); + buf[length] = 0; + if(!(b64len = sldns_b64_pton(buf, sldns_buffer_current( + h2_stream->qbuffer), expectb64len)) || b64len < 0) { + lock_basic_lock(&http2_query_buffer_count_lock); + http2_query_buffer_count -= expectb64len; + lock_basic_unlock(&http2_query_buffer_count_lock); + sldns_buffer_free(h2_stream->qbuffer); + h2_stream->qbuffer = NULL; + return 1; + } + } else { + if(!(b64len = sldns_b64url_pton( + (char const *)start, length, + sldns_buffer_current(h2_stream->qbuffer), + expectb64len)) || b64len < 0) { + lock_basic_lock(&http2_query_buffer_count_lock); + http2_query_buffer_count -= expectb64len; + lock_basic_unlock(&http2_query_buffer_count_lock); + sldns_buffer_free(h2_stream->qbuffer); + h2_stream->qbuffer = NULL; + /* return without error, method can be an + * unknown POST */ + return 1; + } } sldns_buffer_skip(h2_stream->qbuffer, (size_t)b64len); return 1; @@ -2601,7 +2932,7 @@ static int http2_req_header_cb(nghttp2_session* session, * the HEADER */ if(frame->hd.type != NGHTTP2_HEADERS || frame->headers.cat != NGHTTP2_HCAT_REQUEST) { - /* only interrested in request headers */ + /* only interested in request headers */ return 0; } if(!(h2_stream = nghttp2_session_get_stream_user_data(session, @@ -2697,7 +3028,7 @@ static int http2_req_header_cb(nghttp2_session* session, h2_stream->query_too_large = 1; return 0; } - /* guaranteed to only contian digits and be null terminated */ + /* guaranteed to only contain digits and be null terminated */ h2_stream->content_length = atoi((const char*)value); if(h2_stream->content_length > h2_session->c->http2_stream_max_qbuffer_size) { @@ -2737,7 +3068,7 @@ static int http2_req_data_chunk_recv_cb(nghttp2_session* ATTR_UNUSED(session), /* setting this to msg-buffer-size can result in a lot * of memory consuption. Most queries should fit in a * single DATA frame, and most POST queries will - * containt content-length which does not impose this + * contain content-length which does not impose this * limit. */ qlen = len; } diff --git a/contrib/unbound/services/listen_dnsport.h b/contrib/unbound/services/listen_dnsport.h index f438ff4580f7..84ac4b068b1b 100644 --- a/contrib/unbound/services/listen_dnsport.h +++ b/contrib/unbound/services/listen_dnsport.h @@ -43,6 +43,7 @@ #define LISTEN_DNSPORT_H #include "util/netevent.h" +#include "daemon/acl_list.h" #ifdef HAVE_NGHTTP2_NGHTTP2_H #include <nghttp2/nghttp2.h> #endif @@ -102,6 +103,22 @@ enum listen_type { listen_type_http }; +/* + * socket properties (just like NSD nsd_socket structure definition) + */ +struct unbound_socket { + /** the address of the socket */ + struct sockaddr* addr; + /** length of the address */ + socklen_t addrlen; + /** socket descriptor returned by socket() syscall */ + int s; + /** address family (AF_INET/AF_INET6) */ + int fam; + /** ACL on the socket (listening interface) */ + struct acl_addr* acl; +}; + /** * Single linked list to store shared ports that have been * opened for use by all threads. @@ -113,6 +130,11 @@ struct listen_port { int fd; /** type of file descriptor, udp or tcp */ enum listen_type ftype; + /** if the port should support PROXYv2 */ + int pp2_enabled; + /** fill in unbound_socket structure for every opened socket at + * Unbound startup */ + struct unbound_socket* socket; }; /** @@ -136,16 +158,19 @@ struct listen_port* listening_ports_open(struct config_file* cfg, */ void listening_ports_free(struct listen_port* list); +struct config_strlist; /** * Resolve interface names in config and store result IP addresses - * @param cfg: config + * @param ifs: array of interfaces. The list of interface names, if not NULL. + * @param num_ifs: length of ifs array. + * @param list: if not NULL, this is used as the list of interface names. * @param resif: string array (malloced array of malloced strings) with * result. NULL if cfg has none. * @param num_resif: length of resif. Zero if cfg has zero num_ifs. * @return 0 on failure. */ -int resolve_interface_names(struct config_file* cfg, char*** resif, - int* num_resif); +int resolve_interface_names(char** ifs, int num_ifs, + struct config_strlist* list, char*** resif, int* num_resif); /** * Create commpoints with for this thread for the shared ports. @@ -182,6 +207,11 @@ listen_create(struct comm_base* base, struct listen_port* ports, */ void listen_delete(struct listen_dnsport* listen); +/** setup the locks for the listen ports */ +void listen_setup_locks(void); +/** desetup the locks for the listen ports */ +void listen_desetup_locks(void); + /** * delete listen_list of commpoints. Calls commpointdelete() on items. * This may close the fds or not depending on flags. @@ -424,4 +454,9 @@ int http2_submit_dns_response(void* v); char* set_ip_dscp(int socket, int addrfamily, int ds); +/** for debug and profiling purposes only + * @param ub_sock: the structure containing created socket info we want to print or log for + */ +void verbose_print_unbound_socket(struct unbound_socket* ub_sock); + #endif /* LISTEN_DNSPORT_H */ diff --git a/contrib/unbound/services/localzone.c b/contrib/unbound/services/localzone.c index fd2ff2bb67f7..51056c8ffef4 100644 --- a/contrib/unbound/services/localzone.c +++ b/contrib/unbound/services/localzone.c @@ -56,6 +56,44 @@ * with 16 bytes for an A record, a 64K packet has about 4000 max */ #define LOCALZONE_RRSET_COUNT_MAX 4096 +/** print all RRsets in local zone */ +static void +local_zone_out(struct local_zone* z) +{ + struct local_data* d; + struct local_rrset* p; + RBTREE_FOR(d, struct local_data*, &z->data) { + for(p = d->rrsets; p; p = p->next) { + log_nametypeclass(NO_VERBOSE, "rrset", d->name, + ntohs(p->rrset->rk.type), + ntohs(p->rrset->rk.rrset_class)); + } + } +} + +static void +local_zone_print(struct local_zone* z) +{ + char buf[64]; + lock_rw_rdlock(&z->lock); + snprintf(buf, sizeof(buf), "%s zone", + local_zone_type2str(z->type)); + log_nametypeclass(NO_VERBOSE, buf, z->name, 0, z->dclass); + local_zone_out(z); + lock_rw_unlock(&z->lock); +} + +void local_zones_print(struct local_zones* zones) +{ + struct local_zone* z; + lock_rw_rdlock(&zones->lock); + log_info("number of auth zones %u", (unsigned)zones->ztree.count); + RBTREE_FOR(z, struct local_zone*, &zones->ztree) { + local_zone_print(z); + } + lock_rw_unlock(&zones->lock); +} + struct local_zones* local_zones_create(void) { @@ -292,14 +330,16 @@ get_rr_nameclass(const char* str, uint8_t** nm, uint16_t* dclass, static struct local_rrset* local_data_find_type(struct local_data* data, uint16_t type, int alias_ok) { - struct local_rrset* p; + struct local_rrset* p, *cname = NULL; type = htons(type); for(p = data->rrsets; p; p = p->next) { if(p->rrset->rk.type == type) return p; if(alias_ok && p->rrset->rk.type == htons(LDNS_RR_TYPE_CNAME)) - return p; + cname = p; } + if(alias_ok) + return cname; return NULL; } @@ -465,7 +505,7 @@ lz_find_create_node(struct local_zone* z, uint8_t* nm, size_t nmlen, /* Mark the SOA record for the zone. This only marks the SOA rrset; the data * for the RR is entered later on local_zone_enter_rr() as with the other - * records. An artifical soa_negative record with a modified TTL (minimum of + * records. An artificial soa_negative record with a modified TTL (minimum of * the TTL and the SOA.MINIMUM) is also created and marked for usage with * negative answers and to avoid allocations during those answers. */ static int @@ -745,9 +785,15 @@ static int lz_enter_zones(struct local_zones* zones, struct config_file* cfg) { struct config_str2list* p; +#ifndef THREADS_DISABLED struct local_zone* z; +#endif for(p = cfg->local_zones; p; p = p->next) { - if(!(z=lz_enter_zone(zones, p->str, p->str2, + if(!( +#ifndef THREADS_DISABLED + z= +#endif + lz_enter_zone(zones, p->str, p->str2, LDNS_RR_CLASS_IN))) return 0; lock_rw_unlock(&z->lock); @@ -892,6 +938,11 @@ int local_zone_enter_defaults(struct local_zones* zones, struct config_file* cfg } lock_rw_unlock(&z->lock); } + /* home.arpa. zone (RFC 8375) */ + if(!add_empty_default(zones, cfg, "home.arpa.")) { + log_err("out of memory adding default zone"); + return 0; + } /* onion. zone (RFC 7686) */ if(!add_empty_default(zones, cfg, "onion.")) { log_err("out of memory adding default zone"); @@ -999,6 +1050,38 @@ lz_setup_implicit(struct local_zones* zones, struct config_file* cfg) lock_rw_rdlock(&zones->lock); if(!local_zones_lookup(zones, rr_name, len, labs, rr_class, rr_type)) { + /* Check if there is a zone that this could go + * under but for different class; created zones are + * always for LDNS_RR_CLASS_IN. Create the zone with + * a different class but the same configured + * local_zone_type. */ + struct local_zone* z = local_zones_lookup(zones, + rr_name, len, labs, LDNS_RR_CLASS_IN, rr_type); + if(z) { + uint8_t* name = memdup(z->name, z->namelen); + size_t znamelen = z->namelen; + int znamelabs = z->namelabs; + enum localzone_type ztype = z->type; + lock_rw_unlock(&zones->lock); + if(!name) { + log_err("out of memory"); + free(rr_name); + return 0; + } + if(!( +#ifndef THREADS_DISABLED + z = +#endif + lz_enter_zone_dname(zones, name, + znamelen, znamelabs, + ztype, rr_class))) { + free(rr_name); + return 0; + } + lock_rw_unlock(&z->lock); + free(rr_name); + continue; + } if(!have_name) { dclass = rr_class; nm = rr_name; @@ -1027,7 +1110,9 @@ lz_setup_implicit(struct local_zones* zones, struct config_file* cfg) } if(have_name) { uint8_t* n2; +#ifndef THREADS_DISABLED struct local_zone* z; +#endif /* allocate zone of smallest shared topdomain to contain em */ n2 = nm; dname_remove_labels(&n2, &nmlen, nmlabs - match); @@ -1039,7 +1124,11 @@ lz_setup_implicit(struct local_zones* zones, struct config_file* cfg) } log_nametypeclass(VERB_ALGO, "implicit transparent local-zone", n2, 0, dclass); - if(!(z=lz_enter_zone_dname(zones, n2, nmlen, match, + if(!( +#ifndef THREADS_DISABLED + z= +#endif + lz_enter_zone_dname(zones, n2, nmlen, match, local_zone_transparent, dclass))) { return 0; } @@ -1203,38 +1292,6 @@ local_zones_find_le(struct local_zones* zones, return (struct local_zone*)node; } -/** print all RRsets in local zone */ -static void -local_zone_out(struct local_zone* z) -{ - struct local_data* d; - struct local_rrset* p; - RBTREE_FOR(d, struct local_data*, &z->data) { - for(p = d->rrsets; p; p = p->next) { - log_nametypeclass(NO_VERBOSE, "rrset", d->name, - ntohs(p->rrset->rk.type), - ntohs(p->rrset->rk.rrset_class)); - } - } -} - -void local_zones_print(struct local_zones* zones) -{ - struct local_zone* z; - lock_rw_rdlock(&zones->lock); - log_info("number of auth zones %u", (unsigned)zones->ztree.count); - RBTREE_FOR(z, struct local_zone*, &zones->ztree) { - char buf[64]; - lock_rw_rdlock(&z->lock); - snprintf(buf, sizeof(buf), "%s zone", - local_zone_type2str(z->type)); - log_nametypeclass(NO_VERBOSE, buf, z->name, 0, z->dclass); - local_zone_out(z); - lock_rw_unlock(&z->lock); - } - lock_rw_unlock(&zones->lock); -} - /** encode answer consisting of 1 rrset */ static int local_encode(struct query_info* qinfo, struct module_env* env, @@ -1253,6 +1310,7 @@ local_encode(struct query_info* qinfo, struct module_env* env, else rep.ns_numrrsets = 1; rep.rrset_count = 1; rep.rrsets = &rrset; + rep.reason_bogus = LDNS_EDE_NONE; udpsize = edns->udp_size; edns->edns_version = EDNS_ADVERTISED_VERSION; edns->udp_size = EDNS_ADVERTISED_SIZE; @@ -1273,7 +1331,8 @@ local_encode(struct query_info* qinfo, struct module_env* env, static void local_error_encode(struct query_info* qinfo, struct module_env* env, struct edns_data* edns, struct comm_reply* repinfo, sldns_buffer* buf, - struct regional* temp, int rcode, int r) + struct regional* temp, int rcode, int r, int ede_code, + const char* ede_txt) { edns->edns_version = EDNS_ADVERTISED_VERSION; edns->udp_size = EDNS_ADVERTISED_SIZE; @@ -1282,7 +1341,13 @@ local_error_encode(struct query_info* qinfo, struct module_env* env, if(!inplace_cb_reply_local_call(env, qinfo, NULL, NULL, rcode, edns, repinfo, temp, env->now_tv)) - edns->opt_list = NULL; + edns->opt_list_inplace_cb_out = NULL; + + if(ede_code != LDNS_EDE_NONE && env->cfg->ede) { + edns_opt_list_append_ede(&edns->opt_list_out, temp, + ede_code, ede_txt); + } + error_encode(buf, r, qinfo, *(uint16_t*)sldns_buffer_begin(buf), sldns_buffer_read_u16_at(buf, 2), edns); } @@ -1469,7 +1534,7 @@ local_data_answer(struct local_zone* z, struct module_env* env, return 0; /* invalid cname */ if(dname_is_wild(ctarget)) { /* synthesize cname target */ - struct packed_rrset_data* d; + struct packed_rrset_data* d, *lr_d; /* -3 for wildcard label and root label from qname */ size_t newtargetlen = qinfo->qname_len + ctargetlen - 3; @@ -1480,7 +1545,9 @@ local_data_answer(struct local_zone* z, struct module_env* env, qinfo->local_alias = NULL; local_error_encode(qinfo, env, edns, repinfo, buf, temp, LDNS_RCODE_YXDOMAIN, - (LDNS_RCODE_YXDOMAIN|BIT_AA)); + (LDNS_RCODE_YXDOMAIN|BIT_AA), + LDNS_EDE_OTHER, + "DNAME expansion became too large"); return 1; } memset(&qinfo->local_alias->rrset->entry, 0, @@ -1495,8 +1562,10 @@ local_data_answer(struct local_zone* z, struct module_env* env, + newtargetlen); if(!d) return 0; /* out of memory */ + lr_d = (struct packed_rrset_data*)lr->rrset->entry.data; qinfo->local_alias->rrset->entry.data = d; - d->ttl = 0; /* 0 for synthesized CNAME TTL */ + d->ttl = lr_d->rr_ttl[0]; /* RFC6672-like behavior: + synth CNAME TTL uses original TTL*/ d->count = 1; d->rrsig_count = 0; d->trust = rrset_trust_ans_noAA; @@ -1509,7 +1578,7 @@ local_data_answer(struct local_zone* z, struct module_env* env, /* write qname */ memmove(d->rr_data[0] + sizeof(uint16_t), qinfo->qname, qinfo->qname_len - 1); - /* write cname target wilcard wildcard label */ + /* write cname target wildcard label */ memmove(d->rr_data[0] + sizeof(uint16_t) + qinfo->qname_len - 1, ctarget + 2, ctargetlen - 2); @@ -1539,7 +1608,7 @@ local_zone_does_not_cover(struct local_zone* z, struct query_info* qinfo, struct local_data key; struct local_data* ld = NULL; struct local_rrset* lr = NULL; - if(z->type == local_zone_always_transparent) + if(z->type == local_zone_always_transparent || z->type == local_zone_block_a) return 1; if(z->type != local_zone_transparent && z->type != local_zone_typetransparent @@ -1558,6 +1627,15 @@ local_zone_does_not_cover(struct local_zone* z, struct query_info* qinfo, return (lr == NULL); } +static inline int +local_zone_is_udp_query(struct comm_reply* repinfo) { + return repinfo != NULL + ? (repinfo->c != NULL + ? repinfo->c->type == comm_udp + : 0) + : 0; +} + int local_zones_zone_answer(struct local_zone* z, struct module_env* env, struct query_info* qinfo, struct edns_data* edns, @@ -1574,13 +1652,16 @@ local_zones_zone_answer(struct local_zone* z, struct module_env* env, } else if(lz_type == local_zone_refuse || lz_type == local_zone_always_refuse) { local_error_encode(qinfo, env, edns, repinfo, buf, temp, - LDNS_RCODE_REFUSED, (LDNS_RCODE_REFUSED|BIT_AA)); + LDNS_RCODE_REFUSED, (LDNS_RCODE_REFUSED|BIT_AA), + LDNS_EDE_NONE, NULL); return 1; } else if(lz_type == local_zone_static || lz_type == local_zone_redirect || lz_type == local_zone_inform_redirect || lz_type == local_zone_always_nxdomain || - lz_type == local_zone_always_nodata) { + lz_type == local_zone_always_nodata || + (lz_type == local_zone_truncate + && local_zone_is_udp_query(repinfo))) { /* for static, reply nodata or nxdomain * for redirect, reply nodata */ /* no additional section processing, @@ -1590,18 +1671,30 @@ local_zones_zone_answer(struct local_zone* z, struct module_env* env, */ int rcode = (ld || lz_type == local_zone_redirect || lz_type == local_zone_inform_redirect || - lz_type == local_zone_always_nodata)? + lz_type == local_zone_always_nodata || + lz_type == local_zone_truncate)? LDNS_RCODE_NOERROR:LDNS_RCODE_NXDOMAIN; - if(z->soa && z->soa_negative) + rcode = (lz_type == local_zone_truncate ? (rcode|BIT_TC) : rcode); + if(z != NULL && z->soa && z->soa_negative) return local_encode(qinfo, env, edns, repinfo, buf, temp, z->soa_negative, 0, rcode); - local_error_encode(qinfo, env, edns, repinfo, buf, temp, rcode, - (rcode|BIT_AA)); + local_error_encode(qinfo, env, edns, repinfo, buf, temp, + rcode, (rcode|BIT_AA), LDNS_EDE_NONE, NULL); return 1; } else if(lz_type == local_zone_typetransparent || lz_type == local_zone_always_transparent) { /* no NODATA or NXDOMAINS for this zone type */ return 0; + } else if(lz_type == local_zone_block_a) { + /* Return NODATA for all A queries */ + if(qinfo->qtype == LDNS_RR_TYPE_A) { + local_error_encode(qinfo, env, edns, repinfo, buf, temp, + LDNS_RCODE_NOERROR, (LDNS_RCODE_NOERROR|BIT_AA), + LDNS_EDE_NONE, NULL); + return 1; + } + + return 0; } else if(lz_type == local_zone_always_null) { /* 0.0.0.0 or ::0 or noerror/nodata for this zone type, * used for blocklists. */ @@ -1637,9 +1730,10 @@ local_zones_zone_answer(struct local_zone* z, struct module_env* env, return local_encode(qinfo, env, edns, repinfo, buf, temp, &lrr, 1, LDNS_RCODE_NOERROR); } else { + /* NODATA: No EDE needed */ local_error_encode(qinfo, env, edns, repinfo, buf, temp, LDNS_RCODE_NOERROR, - (LDNS_RCODE_NOERROR|BIT_AA)); + (LDNS_RCODE_NOERROR|BIT_AA), -1, NULL); } return 1; } @@ -1649,11 +1743,12 @@ local_zones_zone_answer(struct local_zone* z, struct module_env* env, * does not, then we should make this noerror/nodata */ if(ld && ld->rrsets) { int rcode = LDNS_RCODE_NOERROR; - if(z->soa && z->soa_negative) + if(z != NULL && z->soa && z->soa_negative) return local_encode(qinfo, env, edns, repinfo, buf, temp, z->soa_negative, 0, rcode); + /* NODATA: No EDE needed */ local_error_encode(qinfo, env, edns, repinfo, buf, temp, rcode, - (rcode|BIT_AA)); + (rcode|BIT_AA), LDNS_EDE_NONE, NULL); return 1; } @@ -1664,13 +1759,13 @@ local_zones_zone_answer(struct local_zone* z, struct module_env* env, /** print log information for an inform zone query */ static void lz_inform_print(struct local_zone* z, struct query_info* qinfo, - struct comm_reply* repinfo) + struct sockaddr_storage* addr, socklen_t addrlen) { char ip[128], txt[512]; char zname[LDNS_MAX_DOMAINLEN+1]; - uint16_t port = ntohs(((struct sockaddr_in*)&repinfo->addr)->sin_port); + uint16_t port = ntohs(((struct sockaddr_in*)addr)->sin_port); dname_str(z->name, zname); - addr_to_str(&repinfo->addr, repinfo->addrlen, ip, sizeof(ip)); + addr_to_str(addr, addrlen, ip, sizeof(ip)); snprintf(txt, sizeof(txt), "%s %s %s@%u", zname, local_zone_type2str(z->type), ip, (unsigned)port); log_nametypeclass(NO_VERBOSE, txt, qinfo->qname, qinfo->qtype, qinfo->qclass); @@ -1685,7 +1780,8 @@ lz_type(uint8_t *taglist, size_t taglen, uint8_t *taglist2, size_t taglen2, struct local_zone_override* lzo; if(repinfo && override_tree) { lzo = (struct local_zone_override*)addr_tree_lookup( - override_tree, &repinfo->addr, repinfo->addrlen); + override_tree, &repinfo->client_addr, + repinfo->client_addrlen); if(lzo && lzo->type) { verbose(VERB_ALGO, "local zone override to type %s", local_zone_type2str(lzo->type)); @@ -1765,7 +1861,8 @@ local_zones_answer(struct local_zones* zones, struct module_env* env, if(z && (lzt == local_zone_transparent || lzt == local_zone_typetransparent || lzt == local_zone_inform || - lzt == local_zone_always_transparent) && + lzt == local_zone_always_transparent || + lzt == local_zone_block_a) && local_zone_does_not_cover(z, qinfo, labs)) { lock_rw_unlock(&z->lock); z = NULL; @@ -1808,10 +1905,12 @@ local_zones_answer(struct local_zones* zones, struct module_env* env, lzt == local_zone_inform_deny || lzt == local_zone_inform_redirect) && repinfo) - lz_inform_print(z, qinfo, repinfo); + lz_inform_print(z, qinfo, &repinfo->client_addr, + repinfo->client_addrlen); if(lzt != local_zone_always_refuse && lzt != local_zone_always_transparent + && lzt != local_zone_block_a && lzt != local_zone_always_nxdomain && lzt != local_zone_always_nodata && lzt != local_zone_always_deny @@ -1842,12 +1941,14 @@ const char* local_zone_type2str(enum localzone_type t) case local_zone_inform_deny: return "inform_deny"; case local_zone_inform_redirect: return "inform_redirect"; case local_zone_always_transparent: return "always_transparent"; + case local_zone_block_a: return "block_a"; case local_zone_always_refuse: return "always_refuse"; case local_zone_always_nxdomain: return "always_nxdomain"; case local_zone_always_nodata: return "always_nodata"; case local_zone_always_deny: return "always_deny"; case local_zone_always_null: return "always_null"; case local_zone_noview: return "noview"; + case local_zone_truncate: return "truncate"; case local_zone_invalid: return "invalid"; } return "badtyped"; @@ -1875,6 +1976,8 @@ int local_zone_str2type(const char* type, enum localzone_type* t) *t = local_zone_inform_redirect; else if(strcmp(type, "always_transparent") == 0) *t = local_zone_always_transparent; + else if(strcmp(type, "block_a") == 0) + *t = local_zone_block_a; else if(strcmp(type, "always_refuse") == 0) *t = local_zone_always_refuse; else if(strcmp(type, "always_nxdomain") == 0) @@ -1887,6 +1990,8 @@ int local_zone_str2type(const char* type, enum localzone_type* t) *t = local_zone_always_null; else if(strcmp(type, "noview") == 0) *t = local_zone_noview; + else if(strcmp(type, "truncate") == 0) + *t = local_zone_truncate; else if(strcmp(type, "nodefault") == 0) *t = local_zone_nodefault; else return 0; diff --git a/contrib/unbound/services/localzone.h b/contrib/unbound/services/localzone.h index 3da5c8754bf3..4456893ee112 100644 --- a/contrib/unbound/services/localzone.h +++ b/contrib/unbound/services/localzone.h @@ -88,6 +88,8 @@ enum localzone_type { local_zone_inform_redirect, /** resolve normally, even when there is local data */ local_zone_always_transparent, + /** resolve normally, even when there is local data but return NODATA for A queries */ + local_zone_block_a, /** answer with error, even when there is local data */ local_zone_always_refuse, /** answer with nxdomain, even when there is local data */ @@ -101,6 +103,8 @@ enum localzone_type { local_zone_always_null, /** answer not from the view, but global or no-answer */ local_zone_noview, + /** truncate the response; client should retry via tcp */ + local_zone_truncate, /** Invalid type, cannot be used to generate answer */ local_zone_invalid }; @@ -158,7 +162,7 @@ struct local_zone { rbtree_type data; /** if data contains zone apex SOA data, this is a ptr to it. */ struct ub_packed_rrset_key* soa; - /** if data contains zone apex SOA data, this is a prt to an + /** if data contains zone apex SOA data, this is a ptr to an * artificial negative SOA rrset (TTL is the minimum of the TTL and the * SOA.MINIMUM). */ struct ub_packed_rrset_key* soa_negative; @@ -255,7 +259,7 @@ void local_zone_delete(struct local_zone* z); * @param dclass: class to lookup. * @param dtype: type to lookup, if type DS a zone higher is used for zonecuts. * @param taglist: taglist to lookup. - * @param taglen: lenth of taglist. + * @param taglen: length of taglist. * @param ignoretags: lookup zone by name and class, regardless the * local-zone's tags. * @return closest local_zone or NULL if no covering zone is found. @@ -563,6 +567,8 @@ enum respip_action { respip_always_nodata = local_zone_always_nodata, /** answer with nodata response */ respip_always_deny = local_zone_always_deny, + /** RPZ: truncate answer in order to force switch to tcp */ + respip_truncate = local_zone_truncate, /* The rest of the values are only possible as * access-control-tag-action */ diff --git a/contrib/unbound/services/mesh.c b/contrib/unbound/services/mesh.c index 91d23debf351..e886c4b92c84 100644 --- a/contrib/unbound/services/mesh.c +++ b/contrib/unbound/services/mesh.c @@ -4,22 +4,22 @@ * Copyright (c) 2007, NLnet Labs. All rights reserved. * * This software is open source. - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: - * + * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. - * + * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. - * + * * Neither the name of the NLNET LABS nor the names of its contributors may * be used to endorse or promote products derived from this software without * specific prior written permission. - * + * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR @@ -47,6 +47,7 @@ #include "services/outbound_list.h" #include "services/cache/dns.h" #include "services/cache/rrset.h" +#include "services/cache/infra.h" #include "util/log.h" #include "util/net_help.h" #include "util/module.h" @@ -63,70 +64,18 @@ #include "util/data/dname.h" #include "respip/respip.h" #include "services/listen_dnsport.h" +#include "util/timeval_func.h" -/** subtract timers and the values do not overflow or become negative */ -static void -timeval_subtract(struct timeval* d, const struct timeval* end, const struct timeval* start) -{ -#ifndef S_SPLINT_S - time_t end_usec = end->tv_usec; - d->tv_sec = end->tv_sec - start->tv_sec; - if(end_usec < start->tv_usec) { - end_usec += 1000000; - d->tv_sec--; - } - d->tv_usec = end_usec - start->tv_usec; +#ifdef CLIENT_SUBNET +#include "edns-subnet/subnetmod.h" +#include "edns-subnet/edns-subnet.h" #endif -} - -/** add timers and the values do not overflow or become negative */ -static void -timeval_add(struct timeval* d, const struct timeval* add) -{ -#ifndef S_SPLINT_S - d->tv_sec += add->tv_sec; - d->tv_usec += add->tv_usec; - if(d->tv_usec >= 1000000 ) { - d->tv_usec -= 1000000; - d->tv_sec++; - } +#ifdef HAVE_SYS_TYPES_H +# include <sys/types.h> #endif -} - -/** divide sum of timers to get average */ -static void -timeval_divide(struct timeval* avg, const struct timeval* sum, size_t d) -{ -#ifndef S_SPLINT_S - size_t leftover; - if(d == 0) { - avg->tv_sec = 0; - avg->tv_usec = 0; - return; - } - avg->tv_sec = sum->tv_sec / d; - avg->tv_usec = sum->tv_usec / d; - /* handle fraction from seconds divide */ - leftover = sum->tv_sec - avg->tv_sec*d; - avg->tv_usec += (leftover*1000000)/d; +#ifdef HAVE_NETDB_H +#include <netdb.h> #endif -} - -/** histogram compare of time values */ -static int -timeval_smaller(const struct timeval* x, const struct timeval* y) -{ -#ifndef S_SPLINT_S - if(x->tv_sec < y->tv_sec) - return 1; - else if(x->tv_sec == y->tv_sec) { - if(x->tv_usec <= y->tv_usec) - return 1; - else return 0; - } - else return 0; -#endif -} /** * Compare two response-ip client info entries for the purpose of mesh state @@ -238,7 +187,7 @@ mesh_state_ref_compare(const void* ap, const void* bp) return mesh_state_compare(a->s, b->s); } -struct mesh_area* +struct mesh_area* mesh_create(struct module_stack* stack, struct module_env* env) { struct mesh_area* mesh = calloc(1, sizeof(struct mesh_area)); @@ -264,6 +213,7 @@ mesh_create(struct module_stack* stack, struct module_env* env) mesh->stats_jostled = 0; mesh->stats_dropped = 0; mesh->ans_expired = 0; + mesh->ans_cachedb = 0; mesh->max_reply_states = env->cfg->num_queries_per_thread; mesh->max_forever_states = (mesh->max_reply_states+1)/2; #ifndef S_SPLINT_S @@ -287,7 +237,7 @@ mesh_delete_helper(rbnode_type* n) * traversal and rbtree rebalancing do not work together */ } -void +void mesh_delete(struct mesh_area* mesh) { if(!mesh) @@ -330,7 +280,7 @@ int mesh_make_new_space(struct mesh_area* mesh, sldns_buffer* qbuf) if(m && m->reply_list && m->list_select == mesh_jostle_list) { /* how old is it? */ struct timeval age; - timeval_subtract(&age, mesh->env->now_tv, + timeval_subtract(&age, mesh->env->now_tv, &m->reply_list->start_time); if(timeval_smaller(&mesh->jostle_max, &age)) { /* its a goner */ @@ -433,10 +383,10 @@ mesh_serve_expired_init(struct mesh_state* mstate, int timeout) mstate->s.serve_expired_data->get_cached_answer = mstate->s.serve_expired_data->get_cached_answer? mstate->s.serve_expired_data->get_cached_answer: - mesh_serve_expired_lookup; + &mesh_serve_expired_lookup; /* In case this timer already popped, start it again */ - if(!mstate->s.serve_expired_data->timer) { + if(!mstate->s.serve_expired_data->timer && timeout != -1) { mstate->s.serve_expired_data->timer = comm_timer_create( mstate->s.env->worker_base, mesh_serve_expired_callback, mstate); if(!mstate->s.serve_expired_data->timer) @@ -452,10 +402,11 @@ mesh_serve_expired_init(struct mesh_state* mstate, int timeout) void mesh_new_client(struct mesh_area* mesh, struct query_info* qinfo, struct respip_client_info* cinfo, uint16_t qflags, - struct edns_data* edns, struct comm_reply* rep, uint16_t qid) + struct edns_data* edns, struct comm_reply* rep, uint16_t qid, + int rpz_passthru) { struct mesh_state* s = NULL; - int unique = unique_mesh_state(edns->opt_list, mesh->env); + int unique = unique_mesh_state(edns->opt_list_in, mesh->env); int was_detached = 0; int was_noreply = 0; int added = 0; @@ -465,6 +416,14 @@ void mesh_new_client(struct mesh_area* mesh, struct query_info* qinfo, if(rep->c->tcp_req_info) { r_buffer = rep->c->tcp_req_info->spool_buffer; } + if(!infra_wait_limit_allowed(mesh->env->infra_cache, rep, + edns->cookie_valid, mesh->env->cfg)) { + verbose(VERB_ALGO, "Too many queries waiting from the IP. " + "dropping incoming query."); + comm_point_drop_reply(rep); + mesh->stats_dropped++; + return; + } if(!unique) s = mesh_area_find(mesh, cinfo, qinfo, qflags&(BIT_RD|BIT_CD), 0, 0); /* does this create a new reply state? */ @@ -499,26 +458,30 @@ void mesh_new_client(struct mesh_area* mesh, struct query_info* qinfo, log_err("mesh_state_create: out of memory; SERVFAIL"); if(!inplace_cb_reply_servfail_call(mesh->env, qinfo, NULL, NULL, LDNS_RCODE_SERVFAIL, edns, rep, mesh->env->scratch, mesh->env->now_tv)) - edns->opt_list = NULL; + edns->opt_list_inplace_cb_out = NULL; error_encode(r_buffer, LDNS_RCODE_SERVFAIL, qinfo, qid, qflags, edns); comm_point_send_reply(rep); return; } + /* set detached (it is now) */ + mesh->num_detached_states++; if(unique) mesh_state_make_unique(s); + s->s.rpz_passthru = rpz_passthru; /* copy the edns options we got from the front */ - if(edns->opt_list) { - s->s.edns_opts_front_in = edns_opt_copy_region(edns->opt_list, + if(edns->opt_list_in) { + s->s.edns_opts_front_in = edns_opt_copy_region(edns->opt_list_in, s->s.region); if(!s->s.edns_opts_front_in) { - log_err("mesh_state_create: out of memory; SERVFAIL"); + log_err("edns_opt_copy_region: out of memory; SERVFAIL"); if(!inplace_cb_reply_servfail_call(mesh->env, qinfo, NULL, NULL, LDNS_RCODE_SERVFAIL, edns, rep, mesh->env->scratch, mesh->env->now_tv)) - edns->opt_list = NULL; + edns->opt_list_inplace_cb_out = NULL; error_encode(r_buffer, LDNS_RCODE_SERVFAIL, qinfo, qid, qflags, edns); comm_point_send_reply(rep); + mesh_state_delete(&s->s); return; } } @@ -530,8 +493,6 @@ void mesh_new_client(struct mesh_area* mesh, struct query_info* qinfo, #endif rbtree_insert(&mesh->all, &s->node); log_assert(n != NULL); - /* set detached (it is now) */ - mesh->num_detached_states++; added = 1; } if(!s->reply_list && !s->cb_list) { @@ -559,6 +520,19 @@ void mesh_new_client(struct mesh_area* mesh, struct query_info* qinfo, log_err("mesh_new_client: out of memory initializing serve expired"); goto servfail_mem; } +#ifdef USE_CACHEDB + if(!timeout && mesh->env->cfg->serve_expired && + !mesh->env->cfg->serve_expired_client_timeout && + (mesh->env->cachedb_enabled && + mesh->env->cfg->cachedb_check_when_serve_expired)) { + if(!mesh_serve_expired_init(s, -1)) { + log_err("mesh_new_client: out of memory initializing serve expired"); + goto servfail_mem; + } + } +#endif + infra_wait_limit_inc(mesh->env->infra_cache, rep, *mesh->env->now, + mesh->env->cfg); /* update statistics */ if(was_detached) { log_assert(mesh->num_detached_states > 0); @@ -572,11 +546,11 @@ void mesh_new_client(struct mesh_area* mesh, struct query_info* qinfo, /* move to either the forever or the jostle_list */ if(mesh->num_forever_states < mesh->max_forever_states) { mesh->num_forever_states ++; - mesh_list_insert(s, &mesh->forever_first, + mesh_list_insert(s, &mesh->forever_first, &mesh->forever_last); s->list_select = mesh_forever_list; } else { - mesh_list_insert(s, &mesh->jostle_first, + mesh_list_insert(s, &mesh->jostle_first, &mesh->jostle_last); s->list_select = mesh_jostle_list; } @@ -588,7 +562,7 @@ void mesh_new_client(struct mesh_area* mesh, struct query_info* qinfo, servfail_mem: if(!inplace_cb_reply_servfail_call(mesh->env, qinfo, &s->s, NULL, LDNS_RCODE_SERVFAIL, edns, rep, mesh->env->scratch, mesh->env->now_tv)) - edns->opt_list = NULL; + edns->opt_list_inplace_cb_out = NULL; error_encode(r_buffer, LDNS_RCODE_SERVFAIL, qinfo, qid, qflags, edns); comm_point_send_reply(rep); @@ -597,13 +571,13 @@ servfail_mem: return; } -int +int mesh_new_callback(struct mesh_area* mesh, struct query_info* qinfo, - uint16_t qflags, struct edns_data* edns, sldns_buffer* buf, - uint16_t qid, mesh_cb_func_type cb, void* cb_arg) + uint16_t qflags, struct edns_data* edns, sldns_buffer* buf, + uint16_t qid, mesh_cb_func_type cb, void* cb_arg, int rpz_passthru) { struct mesh_state* s = NULL; - int unique = unique_mesh_state(edns->opt_list, mesh->env); + int unique = unique_mesh_state(edns->opt_list_in, mesh->env); int timeout = mesh->env->cfg->serve_expired? mesh->env->cfg->serve_expired_client_timeout:0; int was_detached = 0; @@ -624,12 +598,16 @@ mesh_new_callback(struct mesh_area* mesh, struct query_info* qinfo, if(!s) { return 0; } + /* set detached (it is now) */ + mesh->num_detached_states++; if(unique) mesh_state_make_unique(s); - if(edns->opt_list) { - s->s.edns_opts_front_in = edns_opt_copy_region(edns->opt_list, + s->s.rpz_passthru = rpz_passthru; + if(edns->opt_list_in) { + s->s.edns_opts_front_in = edns_opt_copy_region(edns->opt_list_in, s->s.region); if(!s->s.edns_opts_front_in) { + mesh_state_delete(&s->s); return 0; } } @@ -640,8 +618,6 @@ mesh_new_callback(struct mesh_area* mesh, struct query_info* qinfo, #endif rbtree_insert(&mesh->all, &s->node); log_assert(n != NULL); - /* set detached (it is now) */ - mesh->num_detached_states++; added = 1; } if(!s->reply_list && !s->cb_list) { @@ -658,8 +634,22 @@ mesh_new_callback(struct mesh_area* mesh, struct query_info* qinfo, } /* add serve expired timer if not already there */ if(timeout && !mesh_serve_expired_init(s, timeout)) { + if(added) + mesh_state_delete(&s->s); return 0; } +#ifdef USE_CACHEDB + if(!timeout && mesh->env->cfg->serve_expired && + !mesh->env->cfg->serve_expired_client_timeout && + (mesh->env->cachedb_enabled && + mesh->env->cfg->cachedb_check_when_serve_expired)) { + if(!mesh_serve_expired_init(s, -1)) { + if(added) + mesh_state_delete(&s->s); + return 0; + } + } +#endif /* update statistics */ if(was_detached) { log_assert(mesh->num_detached_states > 0); @@ -680,7 +670,8 @@ mesh_new_callback(struct mesh_area* mesh, struct query_info* qinfo, * 0 (false), in which case the new state is only made runnable so it * will not be run recursively on top of the current state. */ static void mesh_schedule_prefetch(struct mesh_area* mesh, - struct query_info* qinfo, uint16_t qflags, time_t leeway, int run) + struct query_info* qinfo, uint16_t qflags, time_t leeway, int run, + int rpz_passthru) { struct mesh_state* s = mesh_area_find(mesh, NULL, qinfo, qflags&(BIT_RD|BIT_CD), 0, 0); @@ -726,15 +717,100 @@ static void mesh_schedule_prefetch(struct mesh_area* mesh, /* move to either the forever or the jostle_list */ if(mesh->num_forever_states < mesh->max_forever_states) { mesh->num_forever_states ++; - mesh_list_insert(s, &mesh->forever_first, + mesh_list_insert(s, &mesh->forever_first, + &mesh->forever_last); + s->list_select = mesh_forever_list; + } else { + mesh_list_insert(s, &mesh->jostle_first, + &mesh->jostle_last); + s->list_select = mesh_jostle_list; + } + } + s->s.rpz_passthru = rpz_passthru; + + if(!run) { +#ifdef UNBOUND_DEBUG + n = +#else + (void) +#endif + rbtree_insert(&mesh->run, &s->run_node); + log_assert(n != NULL); + return; + } + + mesh_run(mesh, s, module_event_new, NULL); +} + +#ifdef CLIENT_SUBNET +/* Same logic as mesh_schedule_prefetch but tailored to the subnet module logic + * like passing along the comm_reply info. This will be faked into an EDNS + * option for processing by the subnet module if the client has not already + * attached its own ECS data. */ +static void mesh_schedule_prefetch_subnet(struct mesh_area* mesh, + struct query_info* qinfo, uint16_t qflags, time_t leeway, int run, + int rpz_passthru, struct sockaddr_storage* addr, struct edns_option* edns_list) +{ + struct mesh_state* s = NULL; + struct edns_option* opt = NULL; +#ifdef UNBOUND_DEBUG + struct rbnode_type* n; +#endif + if(!mesh_make_new_space(mesh, NULL)) { + verbose(VERB_ALGO, "Too many queries. dropped prefetch."); + mesh->stats_dropped ++; + return; + } + + s = mesh_state_create(mesh->env, qinfo, NULL, + qflags&(BIT_RD|BIT_CD), 0, 0); + if(!s) { + log_err("prefetch_subnet mesh_state_create: out of memory"); + return; + } + mesh_state_make_unique(s); + + opt = edns_opt_list_find(edns_list, mesh->env->cfg->client_subnet_opcode); + if(opt) { + /* Use the client's ECS data */ + if(!edns_opt_list_append(&s->s.edns_opts_front_in, opt->opt_code, + opt->opt_len, opt->opt_data, s->s.region)) { + log_err("prefetch_subnet edns_opt_list_append: out of memory"); + return; + } + } else { + /* Store the client's address. Later in the subnet module, + * it is decided whether to include an ECS option or not. + */ + s->s.client_addr = *addr; + } +#ifdef UNBOUND_DEBUG + n = +#else + (void) +#endif + rbtree_insert(&mesh->all, &s->node); + log_assert(n != NULL); + /* set detached (it is now) */ + mesh->num_detached_states++; + /* make it ignore the cache */ + sock_list_insert(&s->s.blacklist, NULL, 0, s->s.region); + s->s.prefetch_leeway = leeway; + + if(s->list_select == mesh_no_list) { + /* move to either the forever or the jostle_list */ + if(mesh->num_forever_states < mesh->max_forever_states) { + mesh->num_forever_states ++; + mesh_list_insert(s, &mesh->forever_first, &mesh->forever_last); s->list_select = mesh_forever_list; } else { - mesh_list_insert(s, &mesh->jostle_first, + mesh_list_insert(s, &mesh->jostle_first, &mesh->jostle_last); s->list_select = mesh_jostle_list; } } + s->s.rpz_passthru = rpz_passthru; if(!run) { #ifdef UNBOUND_DEBUG @@ -749,11 +825,22 @@ static void mesh_schedule_prefetch(struct mesh_area* mesh, mesh_run(mesh, s, module_event_new, NULL); } +#endif /* CLIENT_SUBNET */ void mesh_new_prefetch(struct mesh_area* mesh, struct query_info* qinfo, - uint16_t qflags, time_t leeway) + uint16_t qflags, time_t leeway, int rpz_passthru, + struct sockaddr_storage* addr, struct edns_option* opt_list) { - mesh_schedule_prefetch(mesh, qinfo, qflags, leeway, 1); + (void)addr; + (void)opt_list; +#ifdef CLIENT_SUBNET + if(addr) + mesh_schedule_prefetch_subnet(mesh, qinfo, qflags, leeway, 1, + rpz_passthru, addr, opt_list); + else +#endif + mesh_schedule_prefetch(mesh, qinfo, qflags, leeway, 1, + rpz_passthru); } void mesh_report_reply(struct mesh_area* mesh, struct outbound_entry* e, @@ -779,7 +866,7 @@ mesh_state_create(struct module_env* env, struct query_info* qinfo, int i; if(!region) return NULL; - mstate = (struct mesh_state*)regional_alloc(region, + mstate = (struct mesh_state*)regional_alloc(region, sizeof(struct mesh_state)); if(!mstate) { alloc_reg_release(env->alloc, region); @@ -833,6 +920,7 @@ mesh_state_create(struct module_env* env, struct query_info* qinfo, mstate->s.no_cache_store = 0; mstate->s.need_refetch = 0; mstate->s.was_ratelimited = 0; + mstate->s.qstarttime = *env->now; /* init modules */ for(i=0; i<env->mesh->mods.num; i++) { @@ -848,19 +936,13 @@ mesh_state_create(struct module_env* env, struct query_info* qinfo, return mstate; } -int -mesh_state_is_unique(struct mesh_state* mstate) -{ - return mstate->unique != NULL; -} - void mesh_state_make_unique(struct mesh_state* mstate) { mstate->unique = mstate; } -void +void mesh_state_cleanup(struct mesh_state* mstate) { struct mesh_area* mesh; @@ -882,6 +964,8 @@ mesh_state_cleanup(struct mesh_state* mstate) * takes no time and also it does not do the mesh accounting */ mstate->reply_list = NULL; for(; rep; rep=rep->next) { + infra_wait_limit_dec(mesh->env->infra_cache, + &rep->query_reply, mesh->env->cfg); comm_point_drop_reply(&rep->query_reply); log_assert(mesh->num_reply_addrs > 0); mesh->num_reply_addrs--; @@ -906,7 +990,7 @@ mesh_state_cleanup(struct mesh_state* mstate) alloc_reg_release(mstate->s.env->alloc, mstate->s.region); } -void +void mesh_state_delete(struct module_qstate* qstate) { struct mesh_area* mesh; @@ -919,10 +1003,10 @@ mesh_state_delete(struct module_qstate* qstate) mesh_detach_subs(&mstate->s); if(mstate->list_select == mesh_forever_list) { mesh->num_forever_states --; - mesh_list_remove(mstate, &mesh->forever_first, + mesh_list_remove(mstate, &mesh->forever_first, &mesh->forever_last); } else if(mstate->list_select == mesh_jostle_list) { - mesh_list_remove(mstate, &mesh->jostle_first, + mesh_list_remove(mstate, &mesh->jostle_first, &mesh->jostle_last); } if(!mstate->reply_list && !mstate->cb_list @@ -994,7 +1078,7 @@ void mesh_detach_subs(struct module_qstate* qstate) if(!ref->s->reply_list && !ref->s->cb_list && ref->s->super_set.count == 0) { mesh->num_detached_states++; - log_assert(mesh->num_detached_states + + log_assert(mesh->num_detached_states + mesh->num_reply_states <= mesh->all.count); } } @@ -1059,7 +1143,7 @@ int mesh_attach_sub(struct module_qstate* qstate, struct query_info* qinfo, if(!mesh_state_attachment(qstate->mesh_info, sub)) return 0; /* if it was a duplicate attachment, the count was not zero before */ - if(!sub->reply_list && !sub->cb_list && was_detached && + if(!sub->reply_list && !sub->cb_list && was_detached && sub->super_set.count == 1) { /* it used to be detached, before this one got added */ log_assert(mesh->num_detached_states > 0); @@ -1129,9 +1213,9 @@ mesh_do_callback(struct mesh_state* m, int rcode, struct reply_info* rep, else secure = 0; if(!rep && rcode == LDNS_RCODE_NOERROR) rcode = LDNS_RCODE_SERVFAIL; - if(!rcode && (rep->security == sec_status_bogus || + if(!rcode && rep && (rep->security == sec_status_bogus || rep->security == sec_status_secure_sentinel_fail)) { - if(!(reason = errinf_to_str_bogus(&m->s))) + if(!(reason = errinf_to_str_bogus(&m->s, NULL))) rcode = LDNS_RCODE_SERVFAIL; } /* send the reply */ @@ -1139,11 +1223,11 @@ mesh_do_callback(struct mesh_state* m, int rcode, struct reply_info* rep, if(rcode == LDNS_RCODE_SERVFAIL) { if(!inplace_cb_reply_servfail_call(m->s.env, &m->s.qinfo, &m->s, rep, rcode, &r->edns, NULL, m->s.region, start_time)) - r->edns.opt_list = NULL; + r->edns.opt_list_inplace_cb_out = NULL; } else { if(!inplace_cb_reply_call(m->s.env, &m->s.qinfo, &m->s, rep, rcode, &r->edns, NULL, m->s.region, start_time)) - r->edns.opt_list = NULL; + r->edns.opt_list_inplace_cb_out = NULL; } fptr_ok(fptr_whitelist_mesh_cb(r->cb)); (*r->cb)(r->cb_arg, rcode, r->buf, sec_status_unchecked, NULL, @@ -1155,13 +1239,15 @@ mesh_do_callback(struct mesh_state* m, int rcode, struct reply_info* rep, r->edns.udp_size = EDNS_ADVERTISED_SIZE; r->edns.ext_rcode = 0; r->edns.bits &= EDNS_DO; + if(m->s.env->cfg->disable_edns_do && (r->edns.bits&EDNS_DO)) + r->edns.edns_present = 0; if(!inplace_cb_reply_call(m->s.env, &m->s.qinfo, &m->s, rep, LDNS_RCODE_NOERROR, &r->edns, NULL, m->s.region, start_time) || - !reply_info_answer_encode(&m->s.qinfo, rep, r->qid, - r->qflags, r->buf, 0, 1, - m->s.env->scratch, udp_size, &r->edns, - (int)(r->edns.bits & EDNS_DO), secure)) + !reply_info_answer_encode(&m->s.qinfo, rep, r->qid, + r->qflags, r->buf, 0, 1, + m->s.env->scratch, udp_size, &r->edns, + (int)(r->edns.bits & EDNS_DO), secure)) { fptr_ok(fptr_whitelist_mesh_cb(r->cb)); (*r->cb)(r->cb_arg, LDNS_RCODE_SERVFAIL, r->buf, @@ -1169,7 +1255,8 @@ mesh_do_callback(struct mesh_state* m, int rcode, struct reply_info* rep, } else { fptr_ok(fptr_whitelist_mesh_cb(r->cb)); (*r->cb)(r->cb_arg, LDNS_RCODE_NOERROR, r->buf, - rep->security, reason, was_ratelimited); + (rep?rep->security:sec_status_unchecked), + reason, was_ratelimited); } } free(reason); @@ -1177,6 +1264,49 @@ mesh_do_callback(struct mesh_state* m, int rcode, struct reply_info* rep, m->s.env->mesh->num_reply_addrs--; } +static inline int +mesh_is_rpz_respip_tcponly_action(struct mesh_state const* m) +{ + struct respip_action_info const* respip_info = m->s.respip_action_info; + return (respip_info == NULL + ? 0 + : (respip_info->rpz_used + && !respip_info->rpz_disabled + && respip_info->action == respip_truncate)) + || m->s.tcp_required; +} + +static inline int +mesh_is_udp(struct mesh_reply const* r) +{ + return r->query_reply.c->type == comm_udp; +} + +static inline void +mesh_find_and_attach_ede_and_reason(struct mesh_state* m, + struct reply_info* rep, struct mesh_reply* r) +{ + /* OLD note: + * During validation the EDE code can be received via two + * code paths. One code path fills the reply_info EDE, and + * the other fills it in the errinf_strlist. These paths + * intersect at some points, but where is opaque due to + * the complexity of the validator. At the time of writing + * we make the choice to prefer the EDE from errinf_strlist + * but a compelling reason to do otherwise is just as valid + * NEW note: + * The compelling reason is that with caching support, the value + * in the reply_info is cached. + * The reason members of the reply_info struct should be + * updated as they are already cached. No reason to + * try and find the EDE information in errinf anymore. + */ + if(rep->reason_bogus != LDNS_EDE_NONE) { + edns_opt_list_append_ede(&r->edns.opt_list_out, + m->s.region, rep->reason_bogus, rep->reason_bogus_str); + } +} + /** * Send reply to mesh reply entry * @param m: mesh state to send it for. @@ -1195,22 +1325,24 @@ mesh_send_reply(struct mesh_state* m, int rcode, struct reply_info* rep, struct timeval end_time; struct timeval duration; int secure; - /* Copy the client's EDNS for later restore, to make sure the edns - * compare is with the correct edns options. */ - struct edns_data edns_bak = r->edns; /* briefly set the replylist to null in case the * meshsendreply calls tcpreqinfo sendreply that * comm_point_drops because of size, and then the * null stops the mesh state remove and thus * reply_list modification and accounting */ struct mesh_reply* rlist = m->reply_list; + + /* rpz: apply actions */ + rcode = mesh_is_udp(r) && mesh_is_rpz_respip_tcponly_action(m) + ? (rcode|BIT_TC) : rcode; + /* examine security status */ if(m->s.env->need_to_validate && (!(r->qflags&BIT_CD) || - m->s.env->cfg->ignore_cd) && rep && + m->s.env->cfg->ignore_cd) && rep && (rep->security <= sec_status_bogus || rep->security == sec_status_secure_sentinel_fail)) { rcode = LDNS_RCODE_SERVFAIL; - if(m->s.env->cfg->stat_extended) + if(m->s.env->cfg->stat_extended) m->s.env->mesh->ans_bogus++; } if(rep && rep->security == sec_status_secure) @@ -1242,8 +1374,9 @@ mesh_send_reply(struct mesh_state* m, int rcode, struct reply_info* rep, prev->edns.edns_present == r->edns.edns_present && prev->edns.bits == r->edns.bits && prev->edns.udp_size == r->edns.udp_size && - edns_opt_list_compare(prev->edns.opt_list, r->edns.opt_list) - == 0) { + edns_opt_list_compare(prev->edns.opt_list_out, r->edns.opt_list_out) == 0 && + edns_opt_list_compare(prev->edns.opt_list_inplace_cb_out, r->edns.opt_list_inplace_cb_out) == 0 + ) { /* if the previous reply is identical to this one, fix ID */ if(prev_buffer != r_buffer) sldns_buffer_copy(r_buffer, prev_buffer); @@ -1259,11 +1392,18 @@ mesh_send_reply(struct mesh_state* m, int rcode, struct reply_info* rep, if(rcode == LDNS_RCODE_SERVFAIL) { if(!inplace_cb_reply_servfail_call(m->s.env, &m->s.qinfo, &m->s, rep, rcode, &r->edns, &r->query_reply, m->s.region, &r->start_time)) - r->edns.opt_list = NULL; - } else { + r->edns.opt_list_inplace_cb_out = NULL; + } else { if(!inplace_cb_reply_call(m->s.env, &m->s.qinfo, &m->s, rep, rcode, &r->edns, &r->query_reply, m->s.region, &r->start_time)) - r->edns.opt_list = NULL; + r->edns.opt_list_inplace_cb_out = NULL; + } + /* Send along EDE EDNS0 option when SERVFAILing; usually + * DNSSEC validation failures */ + /* Since we are SERVFAILing here, CD bit and rep->security + * is already handled. */ + if(m->s.env->cfg->ede && rep) { + mesh_find_and_attach_ede_and_reason(m, rep, r); } error_encode(r_buffer, rcode, &m->s.qinfo, r->qid, r->qflags, &r->edns); @@ -1276,29 +1416,41 @@ mesh_send_reply(struct mesh_state* m, int rcode, struct reply_info* rep, r->edns.udp_size = EDNS_ADVERTISED_SIZE; r->edns.ext_rcode = 0; r->edns.bits &= EDNS_DO; + if(m->s.env->cfg->disable_edns_do && (r->edns.bits&EDNS_DO)) + r->edns.edns_present = 0; m->s.qinfo.qname = r->qname; m->s.qinfo.local_alias = r->local_alias; + + /* Attach EDE without SERVFAIL if the validation failed. + * Need to explicitly check for rep->security otherwise failed + * validation paths may attach to a secure answer. */ + if(m->s.env->cfg->ede && rep && + (rep->security <= sec_status_bogus || + rep->security == sec_status_secure_sentinel_fail)) { + mesh_find_and_attach_ede_and_reason(m, rep, r); + } + if(!inplace_cb_reply_call(m->s.env, &m->s.qinfo, &m->s, rep, LDNS_RCODE_NOERROR, &r->edns, &r->query_reply, m->s.region, &r->start_time) || - !apply_edns_options(&r->edns, &edns_bak, - m->s.env->cfg, r->query_reply.c, - m->s.region) || - !reply_info_answer_encode(&m->s.qinfo, rep, r->qid, + !reply_info_answer_encode(&m->s.qinfo, rep, r->qid, r->qflags, r_buffer, 0, 1, m->s.env->scratch, udp_size, &r->edns, (int)(r->edns.bits & EDNS_DO), - secure)) + secure)) { if(!inplace_cb_reply_servfail_call(m->s.env, &m->s.qinfo, &m->s, rep, LDNS_RCODE_SERVFAIL, &r->edns, &r->query_reply, m->s.region, &r->start_time)) - r->edns.opt_list = NULL; + r->edns.opt_list_inplace_cb_out = NULL; + /* internal server error (probably malloc failure) so no + * EDE (RFC8914) needed */ error_encode(r_buffer, LDNS_RCODE_SERVFAIL, &m->s.qinfo, r->qid, r->qflags, &r->edns); } - r->edns = edns_bak; m->reply_list = NULL; comm_point_send_reply(&r->query_reply); m->reply_list = rlist; } + infra_wait_limit_dec(m->s.env->infra_cache, &r->query_reply, + m->s.env->cfg); /* account */ log_assert(m->s.env->mesh->num_reply_addrs > 0); m->s.env->mesh->num_reply_addrs--; @@ -1319,8 +1471,11 @@ mesh_send_reply(struct mesh_state* m, int rcode, struct reply_info* rep, } /* Log reply sent */ if(m->s.env->cfg->log_replies) { - log_reply_info(NO_VERBOSE, &m->s.qinfo, &r->query_reply.addr, - r->query_reply.addrlen, duration, 0, r_buffer); + log_reply_info(NO_VERBOSE, &m->s.qinfo, + &r->query_reply.client_addr, + r->query_reply.client_addrlen, duration, 0, r_buffer, + (m->s.env->cfg->log_destaddr?(void*)r->query_reply.c->socket->addr:NULL), + r->query_reply.c->type); } } @@ -1333,6 +1488,7 @@ void mesh_query_done(struct mesh_state* mstate) struct reply_info* rep = (mstate->s.return_msg? mstate->s.return_msg->rep:NULL); struct timeval tv = {0, 0}; + int i = 0; /* No need for the serve expired timer anymore; we are going to reply. */ if(mstate->s.serve_expired_data) { comm_timer_delete(mstate->s.serve_expired_data->timer); @@ -1340,18 +1496,39 @@ void mesh_query_done(struct mesh_state* mstate) } if(mstate->s.return_rcode == LDNS_RCODE_SERVFAIL || (rep && FLAGS_GET_RCODE(rep->flags) == LDNS_RCODE_SERVFAIL)) { - /* we are SERVFAILing; check for expired asnwer here */ + /* we are SERVFAILing; check for expired answer here */ mesh_serve_expired_callback(mstate); if((mstate->reply_list || mstate->cb_list) && mstate->s.env->cfg->log_servfail && !mstate->s.env->cfg->val_log_squelch) { char* err = errinf_to_str_servfail(&mstate->s); - if(err) - log_err("%s", err); - free(err); + if(err) { log_err("%s", err); } } } for(r = mstate->reply_list; r; r = r->next) { + struct timeval old; + timeval_subtract(&old, mstate->s.env->now_tv, &r->start_time); + if(mstate->s.env->cfg->discard_timeout != 0 && + ((int)old.tv_sec)*1000+((int)old.tv_usec)/1000 > + mstate->s.env->cfg->discard_timeout) { + /* Drop the reply, it is too old */ + /* briefly set the reply_list to NULL, so that the + * tcp req info cleanup routine that calls the mesh + * to deregister the meshstate for it is not done + * because the list is NULL and also accounting is not + * done there, but instead we do that here. */ + struct mesh_reply* reply_list = mstate->reply_list; + verbose(VERB_ALGO, "drop reply, it is older than discard-timeout"); + infra_wait_limit_dec(mstate->s.env->infra_cache, + &r->query_reply, mstate->s.env->cfg); + mstate->reply_list = NULL; + comm_point_drop_reply(&r->query_reply); + mstate->reply_list = reply_list; + mstate->s.env->mesh->stats_dropped++; + continue; + } + + i++; tv = r->start_time; /* if a response-ip address block has been stored the @@ -1361,17 +1538,8 @@ void mesh_query_done(struct mesh_state* mstate) respip_inform_print(mstate->s.respip_action_info, r->qname, mstate->s.qinfo.qtype, mstate->s.qinfo.qclass, r->local_alias, - &r->query_reply); - if(mstate->s.env->cfg->stat_extended && - mstate->s.respip_action_info->rpz_used) { - if(mstate->s.respip_action_info->rpz_disabled) - mstate->s.env->mesh->rpz_action[RPZ_DISABLED_ACTION]++; - if(mstate->s.respip_action_info->rpz_cname_override) - mstate->s.env->mesh->rpz_action[RPZ_CNAME_OVERRIDE_ACTION]++; - else - mstate->s.env->mesh->rpz_action[respip_action_to_rpz_action( - mstate->s.respip_action_info->action)]++; - } + &r->query_reply.client_addr, + r->query_reply.client_addrlen); } /* if this query is determined to be dropped during the @@ -1383,6 +1551,8 @@ void mesh_query_done(struct mesh_state* mstate) * because the list is NULL and also accounting is not * done there, but instead we do that here. */ struct mesh_reply* reply_list = mstate->reply_list; + infra_wait_limit_dec(mstate->s.env->infra_cache, + &r->query_reply, mstate->s.env->cfg); mstate->reply_list = NULL; comm_point_drop_reply(&r->query_reply); mstate->reply_list = reply_list; @@ -1402,6 +1572,27 @@ void mesh_query_done(struct mesh_state* mstate) prev_buffer = r_buffer; } } + /* Account for each reply sent. */ + if(i > 0 && mstate->s.respip_action_info && + mstate->s.respip_action_info->addrinfo && + mstate->s.env->cfg->stat_extended && + mstate->s.respip_action_info->rpz_used) { + if(mstate->s.respip_action_info->rpz_disabled) + mstate->s.env->mesh->rpz_action[RPZ_DISABLED_ACTION] += i; + if(mstate->s.respip_action_info->rpz_cname_override) + mstate->s.env->mesh->rpz_action[RPZ_CNAME_OVERRIDE_ACTION] += i; + else + mstate->s.env->mesh->rpz_action[respip_action_to_rpz_action( + mstate->s.respip_action_info->action)] += i; + } + if(!mstate->s.is_drop && i > 0) { + if(mstate->s.env->cfg->stat_extended + && mstate->s.is_cachedb_answer) { + mstate->s.env->mesh->ans_cachedb += i; + } + } + + /* Mesh area accounting */ if(mstate->reply_list) { mstate->reply_list = NULL; if(!mstate->reply_list && !mstate->cb_list) { @@ -1414,6 +1605,7 @@ void mesh_query_done(struct mesh_state* mstate) mstate->s.env->mesh->num_detached_states++; } mstate->replies_sent = 1; + while((c = mstate->cb_list) != NULL) { /* take this cb off the list; so that the list can be * changed, eg. by adds from the callback routine */ @@ -1440,7 +1632,7 @@ void mesh_walk_supers(struct mesh_area* mesh, struct mesh_state* mstate) /* callback the function to inform super of result */ fptr_ok(fptr_whitelist_mod_inform_super( mesh->mods.mod[ref->s->s.curmod]->inform_super)); - (*mesh->mods.mod[ref->s->s.curmod]->inform_super)(&mstate->s, + (*mesh->mods.mod[ref->s->s.curmod]->inform_super)(&mstate->s, ref->s->s.curmod, &ref->s->s); /* copy state that is always relevant to super */ copy_state_to_super(&mstate->s, ref->s->s.curmod, &ref->s->s); @@ -1464,7 +1656,7 @@ struct mesh_state* mesh_area_find(struct mesh_area* mesh, * desire aggregation).*/ key.unique = NULL; key.s.client_info = cinfo; - + result = (struct mesh_state*)rbtree_search(&mesh->all, &key); return result; } @@ -1473,7 +1665,7 @@ int mesh_state_add_cb(struct mesh_state* s, struct edns_data* edns, sldns_buffer* buf, mesh_cb_func_type cb, void* cb_arg, uint16_t qid, uint16_t qflags) { - struct mesh_cb* r = regional_alloc(s->s.region, + struct mesh_cb* r = regional_alloc(s->s.region, sizeof(struct mesh_cb)); if(!r) return 0; @@ -1482,12 +1674,15 @@ int mesh_state_add_cb(struct mesh_state* s, struct edns_data* edns, r->cb = cb; r->cb_arg = cb_arg; r->edns = *edns; - if(edns->opt_list) { - r->edns.opt_list = edns_opt_copy_region(edns->opt_list, - s->s.region); - if(!r->edns.opt_list) - return 0; - } + if(edns->opt_list_in && !(r->edns.opt_list_in = + edns_opt_copy_region(edns->opt_list_in, s->s.region))) + return 0; + if(edns->opt_list_out && !(r->edns.opt_list_out = + edns_opt_copy_region(edns->opt_list_out, s->s.region))) + return 0; + if(edns->opt_list_inplace_cb_out && !(r->edns.opt_list_inplace_cb_out = + edns_opt_copy_region(edns->opt_list_inplace_cb_out, s->s.region))) + return 0; r->qid = qid; r->qflags = qflags; r->next = s->cb_list; @@ -1500,18 +1695,21 @@ int mesh_state_add_reply(struct mesh_state* s, struct edns_data* edns, struct comm_reply* rep, uint16_t qid, uint16_t qflags, const struct query_info* qinfo) { - struct mesh_reply* r = regional_alloc(s->s.region, + struct mesh_reply* r = regional_alloc(s->s.region, sizeof(struct mesh_reply)); if(!r) return 0; r->query_reply = *rep; r->edns = *edns; - if(edns->opt_list) { - r->edns.opt_list = edns_opt_copy_region(edns->opt_list, - s->s.region); - if(!r->edns.opt_list) - return 0; - } + if(edns->opt_list_in && !(r->edns.opt_list_in = + edns_opt_copy_region(edns->opt_list_in, s->s.region))) + return 0; + if(edns->opt_list_out && !(r->edns.opt_list_out = + edns_opt_copy_region(edns->opt_list_out, s->s.region))) + return 0; + if(edns->opt_list_inplace_cb_out && !(r->edns.opt_list_inplace_cb_out = + edns_opt_copy_region(edns->opt_list_inplace_cb_out, s->s.region))) + return 0; r->qid = qid; r->qflags = qflags; r->start_time = *s->s.env->now_tv; @@ -1557,7 +1755,7 @@ int mesh_state_add_reply(struct mesh_state* s, struct edns_data* edns, return 0; /* the rrset is not packed, like in the cache, but it is - * individualy allocated with an allocator from localzone. */ + * individually allocated with an allocator from localzone. */ d = regional_alloc_zero(s->s.region, sizeof(*d)); if(!d) return 0; @@ -1599,7 +1797,7 @@ mesh_copy_qinfo(struct mesh_state* mstate, struct query_info** qinfop, * Handles module finished. * @param mesh: the mesh area. * @param mstate: currently active mesh state. - * Deleted if finished, calls _done and _supers to + * Deleted if finished, calls _done and _supers to * send replies to clients and inform other mesh states. * This in turn may create additional runnable mesh states. * @param s: state at which the current module exited. @@ -1633,7 +1831,7 @@ mesh_continue(struct mesh_area* mesh, struct mesh_state* mstate, } if(s == module_restart_next) { int curmod = mstate->s.curmod; - for(; mstate->s.curmod < mesh->mods.num; + for(; mstate->s.curmod < mesh->mods.num; mstate->s.curmod++) { fptr_ok(fptr_whitelist_mod_clear( mesh->mods.mod[mstate->s.curmod]->clear)); @@ -1665,7 +1863,20 @@ mesh_continue(struct mesh_area* mesh, struct mesh_state* mstate, if(s == module_finished) { if(mstate->s.curmod == 0) { struct query_info* qinfo = NULL; + struct edns_option* opt_list = NULL; + struct sockaddr_storage addr; uint16_t qflags; + int rpz_p = 0; + +#ifdef CLIENT_SUBNET + struct edns_option* ecs; + if(mstate->s.need_refetch && mstate->reply_list && + modstack_find(&mesh->mods, "subnetcache") != -1 && + mstate->s.env->unique_mesh) { + addr = mstate->reply_list->query_reply.client_addr; + } else +#endif + memset(&addr, 0, sizeof(addr)); mesh_query_done(mstate); mesh_walk_supers(mesh, mstate); @@ -1674,13 +1885,30 @@ mesh_continue(struct mesh_area* mesh, struct mesh_state* mstate, * from an external DNS server, we'll need to schedule * a prefetch after removing the current state, so * we need to make a copy of the query info here. */ - if(mstate->s.need_refetch) + if(mstate->s.need_refetch) { mesh_copy_qinfo(mstate, &qinfo, &qflags); +#ifdef CLIENT_SUBNET + /* Make also a copy of the ecs option if any */ + if((ecs = edns_opt_list_find( + mstate->s.edns_opts_front_in, + mstate->s.env->cfg->client_subnet_opcode)) != NULL) { + (void)edns_opt_list_append(&opt_list, + ecs->opt_code, ecs->opt_len, + ecs->opt_data, + mstate->s.env->scratch); + } +#endif + rpz_p = mstate->s.rpz_passthru; + } - mesh_state_delete(&mstate->s); if(qinfo) { - mesh_schedule_prefetch(mesh, qinfo, qflags, - 0, 1); + mesh_state_delete(&mstate->s); + mesh_new_prefetch(mesh, qinfo, qflags, 0, + rpz_p, + addr.ss_family!=AF_UNSPEC?&addr:NULL, + opt_list); + } else { + mesh_state_delete(&mstate->s); } return 0; } @@ -1708,7 +1936,7 @@ void mesh_run(struct mesh_area* mesh, struct mesh_state* mstate, mstate->s.reply = NULL; regional_free_all(mstate->s.env->scratch); s = mstate->s.ext_state[mstate->s.curmod]; - verbose(VERB_ALGO, "mesh_run: %s module exit state is %s", + verbose(VERB_ALGO, "mesh_run: %s module exit state is %s", mesh->mods.mod[mstate->s.curmod]->name, strextstate(s)); e = NULL; if(mesh_continue(mesh, mstate, s, &ev)) @@ -1728,14 +1956,14 @@ void mesh_run(struct mesh_area* mesh, struct mesh_state* mstate, } } -void +void mesh_log_list(struct mesh_area* mesh) { char buf[30]; struct mesh_state* m; int num = 0; RBTREE_FOR(m, struct mesh_state*, &mesh->all) { - snprintf(buf, sizeof(buf), "%d%s%s%s%s%s%s mod%d %s%s", + snprintf(buf, sizeof(buf), "%d%s%s%s%s%s%s mod%d %s%s", num++, (m->s.is_priming)?"p":"", /* prime */ (m->s.is_valrec)?"v":"", /* prime */ (m->s.query_flags&BIT_RD)?"RD":"", @@ -1744,18 +1972,18 @@ mesh_log_list(struct mesh_area* mesh) (m->sub_set.count!=0)?"c":"", /* children */ m->s.curmod, (m->reply_list)?"rep":"", /*hasreply*/ (m->cb_list)?"cb":"" /* callbacks */ - ); + ); log_query_info(VERB_ALGO, buf, &m->s.qinfo); } } -void +void mesh_stats(struct mesh_area* mesh, const char* str) { verbose(VERB_DETAIL, "%s %u recursion states (%u with reply, " "%u detached), %u waiting replies, %u recursion replies " - "sent, %d replies dropped, %d states jostled out", - str, (unsigned)mesh->all.count, + "sent, %d replies dropped, %d states jostled out", + str, (unsigned)mesh->all.count, (unsigned)mesh->num_reply_states, (unsigned)mesh->num_detached_states, (unsigned)mesh->num_reply_addrs, @@ -1764,7 +1992,7 @@ mesh_stats(struct mesh_area* mesh, const char* str) (unsigned)mesh->stats_jostled); if(mesh->replies_sent > 0) { struct timeval avg; - timeval_divide(&avg, &mesh->replies_sum_wait, + timeval_divide(&avg, &mesh->replies_sum_wait, mesh->replies_sent); log_info("average recursion processing time " ARG_LL "d.%6.6d sec", @@ -1774,7 +2002,7 @@ mesh_stats(struct mesh_area* mesh, const char* str) } } -void +void mesh_stats_clear(struct mesh_area* mesh) { if(!mesh) @@ -1788,12 +2016,13 @@ mesh_stats_clear(struct mesh_area* mesh) mesh->ans_secure = 0; mesh->ans_bogus = 0; mesh->ans_expired = 0; + mesh->ans_cachedb = 0; memset(&mesh->ans_rcode[0], 0, sizeof(size_t)*UB_STATS_RCODE_NUM); memset(&mesh->rpz_action[0], 0, sizeof(size_t)*UB_STATS_RPZ_ACTION_NUM); mesh->ans_nodata = 0; } -size_t +size_t mesh_get_mem(struct mesh_area* mesh) { struct mesh_state* m; @@ -1807,14 +2036,13 @@ mesh_get_mem(struct mesh_area* mesh) return s; } -int +int mesh_detect_cycle(struct module_qstate* qstate, struct query_info* qinfo, uint16_t flags, int prime, int valrec) { struct mesh_area* mesh = qstate->env->mesh; struct mesh_state* dep_m = NULL; - if(!mesh_state_is_unique(qstate->mesh_info)) - dep_m = mesh_area_find(mesh, NULL, qinfo, flags, prime, valrec); + dep_m = mesh_area_find(mesh, NULL, qinfo, flags, prime, valrec); return mesh_detect_cycle_found(qstate, dep_m); } @@ -1857,6 +2085,8 @@ void mesh_state_remove_reply(struct mesh_area* mesh, struct mesh_state* m, /* delete it, but allocated in m region */ log_assert(mesh->num_reply_addrs > 0); mesh->num_reply_addrs--; + infra_wait_limit_dec(mesh->env->infra_cache, + &n->query_reply, mesh->env->cfg); /* prev = prev; */ n = n->next; @@ -1891,7 +2121,7 @@ apply_respip_action(struct module_qstate* qstate, return 1; if(!respip_rewrite_reply(qinfo, cinfo, rep, encode_repp, actinfo, - alias_rrset, 0, qstate->region, az)) + alias_rrset, 0, qstate->region, az, NULL)) return 0; /* xxx_deny actions mean dropping the reply, unless the original reply @@ -1925,6 +2155,7 @@ mesh_serve_expired_callback(void* arg) struct timeval tv = {0, 0}; int must_validate = (!(qstate->query_flags&BIT_CD) || qstate->env->cfg->ignore_cd) && qstate->env->need_to_validate; + int i = 0; if(!qstate->serve_expired_data) return; verbose(VERB_ALGO, "Serve expired: Trying to reply with expired data"); comm_timer_delete(qstate->serve_expired_data->timer); @@ -1941,7 +2172,7 @@ mesh_serve_expired_callback(void* arg) while(1) { fptr_ok(fptr_whitelist_serve_expired_lookup( qstate->serve_expired_data->get_cached_answer)); - msg = qstate->serve_expired_data->get_cached_answer(qstate, + msg = (*qstate->serve_expired_data->get_cached_answer)(qstate, lookup_qinfo); if(!msg) return; @@ -1996,6 +2227,29 @@ mesh_serve_expired_callback(void* arg) log_dns_msg("Serve expired lookup", &qstate->qinfo, msg->rep); for(r = mstate->reply_list; r; r = r->next) { + struct timeval old; + timeval_subtract(&old, mstate->s.env->now_tv, &r->start_time); + if(mstate->s.env->cfg->discard_timeout != 0 && + ((int)old.tv_sec)*1000+((int)old.tv_usec)/1000 > + mstate->s.env->cfg->discard_timeout) { + /* Drop the reply, it is too old */ + /* briefly set the reply_list to NULL, so that the + * tcp req info cleanup routine that calls the mesh + * to deregister the meshstate for it is not done + * because the list is NULL and also accounting is not + * done there, but instead we do that here. */ + struct mesh_reply* reply_list = mstate->reply_list; + verbose(VERB_ALGO, "drop reply, it is older than discard-timeout"); + infra_wait_limit_dec(mstate->s.env->infra_cache, + &r->query_reply, mstate->s.env->cfg); + mstate->reply_list = NULL; + comm_point_drop_reply(&r->query_reply); + mstate->reply_list = reply_list; + mstate->s.env->mesh->stats_dropped++; + continue; + } + + i++; tv = r->start_time; /* If address info is returned, it means the action should be an @@ -2003,17 +2257,16 @@ mesh_serve_expired_callback(void* arg) if(actinfo.addrinfo) { respip_inform_print(&actinfo, r->qname, qstate->qinfo.qtype, qstate->qinfo.qclass, - r->local_alias, &r->query_reply); - - if(qstate->env->cfg->stat_extended && actinfo.rpz_used) { - if(actinfo.rpz_disabled) - qstate->env->mesh->rpz_action[RPZ_DISABLED_ACTION]++; - if(actinfo.rpz_cname_override) - qstate->env->mesh->rpz_action[RPZ_CNAME_OVERRIDE_ACTION]++; - else - qstate->env->mesh->rpz_action[ - respip_action_to_rpz_action(actinfo.action)]++; - } + r->local_alias, &r->query_reply.client_addr, + r->query_reply.client_addrlen); + } + + /* Add EDE Stale Answer (RCF8914). Ignore global ede as this is + * warning instead of an error */ + if (r->edns.edns_present && qstate->env->cfg->ede_serve_expired && + qstate->env->cfg->ede) { + edns_opt_list_append_ede(&r->edns.opt_list_out, + mstate->s.region, LDNS_EDE_STALE_ANSWER, NULL); } r_buffer = r->query_reply.c->buffer; @@ -2023,13 +2276,27 @@ mesh_serve_expired_callback(void* arg) r, r_buffer, prev, prev_buffer); if(r->query_reply.c->tcp_req_info) tcp_req_info_remove_mesh_state(r->query_reply.c->tcp_req_info, mstate); + infra_wait_limit_dec(mstate->s.env->infra_cache, + &r->query_reply, mstate->s.env->cfg); prev = r; prev_buffer = r_buffer; - - /* Account for each reply sent. */ - mesh->ans_expired++; - } + /* Account for each reply sent. */ + if(i > 0) { + mesh->ans_expired += i; + if(actinfo.addrinfo && qstate->env->cfg->stat_extended && + actinfo.rpz_used) { + if(actinfo.rpz_disabled) + qstate->env->mesh->rpz_action[RPZ_DISABLED_ACTION] += i; + if(actinfo.rpz_cname_override) + qstate->env->mesh->rpz_action[RPZ_CNAME_OVERRIDE_ACTION] += i; + else + qstate->env->mesh->rpz_action[ + respip_action_to_rpz_action(actinfo.action)] += i; + } + } + + /* Mesh area accounting */ if(mstate->reply_list) { mstate->reply_list = NULL; if(!mstate->reply_list && !mstate->cb_list) { @@ -2040,6 +2307,7 @@ mesh_serve_expired_callback(void* arg) } } } + while((c = mstate->cb_list) != NULL) { /* take this cb off the list; so that the list can be * changed, eg. by adds from the callback routine */ @@ -2055,3 +2323,18 @@ mesh_serve_expired_callback(void* arg) mesh_do_callback(mstate, LDNS_RCODE_NOERROR, msg->rep, c, &tv); } } + +void +mesh_respond_serve_expired(struct mesh_state* mstate) +{ + if(!mstate->s.serve_expired_data) + mesh_serve_expired_init(mstate, -1); + mesh_serve_expired_callback(mstate); +} + +int mesh_jostle_exceeded(struct mesh_area* mesh) +{ + if(mesh->all.count < mesh->max_reply_states) + return 0; + return 1; +} diff --git a/contrib/unbound/services/mesh.h b/contrib/unbound/services/mesh.h index d0a4b5fb3d0d..5bd53e065e8f 100644 --- a/contrib/unbound/services/mesh.h +++ b/contrib/unbound/services/mesh.h @@ -114,6 +114,8 @@ struct mesh_area { size_t stats_dropped; /** stats, number of expired replies sent */ size_t ans_expired; + /** stats, number of cached replies from cachedb */ + size_t ans_cachedb; /** number of replies sent */ size_t replies_sent; /** sum of waiting times for the replies */ @@ -296,10 +298,13 @@ void mesh_delete(struct mesh_area* mesh); * @param edns: edns data from client query. * @param rep: where to reply to. * @param qid: query id to reply with. + * @param rpz_passthru: if true, the rpz passthru was previously found and + * further rpz processing is stopped. */ void mesh_new_client(struct mesh_area* mesh, struct query_info* qinfo, struct respip_client_info* cinfo, uint16_t qflags, - struct edns_data* edns, struct comm_reply* rep, uint16_t qid); + struct edns_data* edns, struct comm_reply* rep, uint16_t qid, + int rpz_passthru); /** * New query with callback. Create new query state if needed, and @@ -314,11 +319,13 @@ void mesh_new_client(struct mesh_area* mesh, struct query_info* qinfo, * @param qid: query id to reply with. * @param cb: callback function. * @param cb_arg: callback user arg. + * @param rpz_passthru: if true, the rpz passthru was previously found and + * further rpz processing is stopped. * @return 0 on error. */ int mesh_new_callback(struct mesh_area* mesh, struct query_info* qinfo, uint16_t qflags, struct edns_data* edns, struct sldns_buffer* buf, - uint16_t qid, mesh_cb_func_type cb, void* cb_arg); + uint16_t qid, mesh_cb_func_type cb, void* cb_arg, int rpz_passthru); /** * New prefetch message. Create new query state if needed. @@ -328,9 +335,15 @@ int mesh_new_callback(struct mesh_area* mesh, struct query_info* qinfo, * @param qinfo: query from client. * @param qflags: flags from client query. * @param leeway: TTL leeway what to expire earlier for this update. + * @param rpz_passthru: if true, the rpz passthru was previously found and + * further rpz processing is stopped. + * @param addr: sockaddr_storage for the client; to be used with subnet. + * @param opt_list: edns opt_list from the client; to be used when subnet is + * enabled. */ void mesh_new_prefetch(struct mesh_area* mesh, struct query_info* qinfo, - uint16_t qflags, time_t leeway); + uint16_t qflags, time_t leeway, int rpz_passthru, + struct sockaddr_storage* addr, struct edns_option* opt_list); /** * Handle new event from the wire. A serviced query has returned. @@ -468,14 +481,6 @@ struct mesh_state* mesh_state_create(struct module_env* env, uint16_t qflags, int prime, int valrec); /** - * Check if the mesh state is unique. - * A unique mesh state uses it's unique member to point to itself, else NULL. - * @param mstate: mesh state to check. - * @return true if the mesh state is unique, false otherwise. - */ -int mesh_state_is_unique(struct mesh_state* mstate); - -/** * Make a mesh state unique. * A unique mesh state uses it's unique member to point to itself. * @param mstate: mesh state to check. @@ -674,4 +679,21 @@ struct dns_msg* mesh_serve_expired_lookup(struct module_qstate* qstate, struct query_info* lookup_qinfo); +/** + * See if the mesh has space for more queries. You can allocate queries + * anyway, but this checks for the allocated space. + * @param mesh: mesh area. + * @return true if the query list is full. + * It checks the number of all queries, not just number of reply states, + * that have a client address. So that spawned queries count too, + * that were created by the iterator, or other modules. + */ +int mesh_jostle_exceeded(struct mesh_area* mesh); + +/** + * Give the serve expired responses. + * @param mstate: mesh state for query that has serve_expired_data. + */ +void mesh_respond_serve_expired(struct mesh_state* mstate); + #endif /* SERVICES_MESH_H */ diff --git a/contrib/unbound/services/modstack.c b/contrib/unbound/services/modstack.c index a600549b16c3..a90d7178c410 100644 --- a/contrib/unbound/services/modstack.c +++ b/contrib/unbound/services/modstack.c @@ -88,57 +88,60 @@ count_modules(const char* s) return num; } -void +void modstack_init(struct module_stack* stack) { stack->num = 0; stack->mod = NULL; } -int +int modstack_config(struct module_stack* stack, const char* module_conf) { - int i; - verbose(VERB_QUERY, "module config: \"%s\"", module_conf); - stack->num = count_modules(module_conf); - if(stack->num == 0) { - log_err("error: no modules specified"); - return 0; - } - if(stack->num > MAX_MODULE) { - log_err("error: too many modules (%d max %d)", - stack->num, MAX_MODULE); - return 0; - } - stack->mod = (struct module_func_block**)calloc((size_t) - stack->num, sizeof(struct module_func_block*)); - if(!stack->mod) { - log_err("out of memory"); - return 0; - } - for(i=0; i<stack->num; i++) { - stack->mod[i] = module_factory(&module_conf); - if(!stack->mod[i]) { + int i; + verbose(VERB_QUERY, "module config: \"%s\"", module_conf); + stack->num = count_modules(module_conf); + if(stack->num == 0) { + log_err("error: no modules specified"); + return 0; + } + if(stack->num > MAX_MODULE) { + log_err("error: too many modules (%d max %d)", + stack->num, MAX_MODULE); + return 0; + } + stack->mod = (struct module_func_block**)calloc((size_t) + stack->num, sizeof(struct module_func_block*)); + if(!stack->mod) { + log_err("out of memory"); + return 0; + } + for(i=0; i<stack->num; i++) { + stack->mod[i] = module_factory(&module_conf); + if(!stack->mod[i]) { char md[256]; + char * s = md; snprintf(md, sizeof(md), "%s", module_conf); - if(strchr(md, ' ')) *(strchr(md, ' ')) = 0; - if(strchr(md, '\t')) *(strchr(md, '\t')) = 0; - log_err("Unknown value in module-config, module: '%s'." + /* Leading spaces are present on errors. */ + while (*s && isspace((unsigned char)*s)) + s++; + if(strchr(s, ' ')) *(strchr(s, ' ')) = 0; + if(strchr(s, '\t')) *(strchr(s, '\t')) = 0; + log_err("Unknown value in module-config, module: '%s'." " This module is not present (not compiled in)," - " See the list of linked modules with unbound -h", - md); - return 0; - } - } - return 1; + " See the list of linked modules with unbound -V", s); + return 0; + } + } + return 1; } /** The list of module names */ const char** module_list_avail(void) { - /* these are the modules available */ - static const char* names[] = { + /* these are the modules available */ + static const char* names[] = { "dns64", #ifdef WITH_PYTHONMODULE "python", @@ -156,7 +159,7 @@ module_list_avail(void) "subnetcache", #endif #ifdef USE_IPSET - "ipset", + "ipset", #endif "respip", "validator", diff --git a/contrib/unbound/services/outbound_list.h b/contrib/unbound/services/outbound_list.h index ad59e42d1929..73c137d50993 100644 --- a/contrib/unbound/services/outbound_list.h +++ b/contrib/unbound/services/outbound_list.h @@ -79,7 +79,7 @@ void outbound_list_init(struct outbound_list* list); * Clear the user owner outbound list structure. * Deletes serviced queries. * @param list: the list structure. It is cleared, but the list struct itself - * is callers responsability to delete. + * is callers responsibility to delete. */ void outbound_list_clear(struct outbound_list* list); diff --git a/contrib/unbound/services/outside_network.c b/contrib/unbound/services/outside_network.c index 6c6b42ccbdb8..1f89740da360 100644 --- a/contrib/unbound/services/outside_network.c +++ b/contrib/unbound/services/outside_network.c @@ -86,14 +86,20 @@ static void serviced_tcp_initiate(struct serviced_query* sq, sldns_buffer* buff) static int randomize_and_send_udp(struct pending* pend, sldns_buffer* packet, int timeout); -/** remove waiting tcp from the outnet waiting list */ -static void waiting_list_remove(struct outside_network* outnet, - struct waiting_tcp* w); - -/** remove reused element from tree and lru list */ -static void reuse_tcp_remove_tree_list(struct outside_network* outnet, +/** select a DNS ID for a TCP stream */ +static uint16_t tcp_select_id(struct outside_network* outnet, struct reuse_tcp* reuse); +/** Perform serviced query UDP sending operation */ +static int serviced_udp_send(struct serviced_query* sq, sldns_buffer* buff); + +/** Send serviced query over TCP return false on initial failure */ +static int serviced_tcp_send(struct serviced_query* sq, sldns_buffer* buff); + +/** call the callbacks for a serviced query */ +static void serviced_callbacks(struct serviced_query* sq, int error, + struct comm_point* c, struct comm_reply* rep); + int pending_cmp(const void* key1, const void* key2) { @@ -198,15 +204,17 @@ waiting_tcp_delete(struct waiting_tcp* w) * Pick random outgoing-interface of that family, and bind it. * port set to 0 so OS picks a port number for us. * if it is the ANY address, do not bind. + * @param pend: pending tcp structure, for storing the local address choice. * @param w: tcp structure with destination address. * @param s: socket fd. * @return false on error, socket closed. */ static int -pick_outgoing_tcp(struct waiting_tcp* w, int s) +pick_outgoing_tcp(struct pending_tcp* pend, struct waiting_tcp* w, int s) { struct port_if* pi = NULL; int num; + pend->pi = NULL; #ifdef INET6 if(addr_is_ip6(&w->addr, w->addrlen)) num = w->outnet->num_ip6; @@ -226,6 +234,7 @@ pick_outgoing_tcp(struct waiting_tcp* w, int s) #endif pi = &w->outnet->ip4_ifs[ub_random_max(w->outnet->rnd, num)]; log_assert(pi); + pend->pi = pi; if(addr_is_any(&pi->addr, pi->addrlen)) { /* binding to the ANY interface is for listening sockets */ return 1; @@ -235,7 +244,14 @@ pick_outgoing_tcp(struct waiting_tcp* w, int s) ((struct sockaddr_in6*)&pi->addr)->sin6_port = 0; else ((struct sockaddr_in*)&pi->addr)->sin_port = 0; if(bind(s, (struct sockaddr*)&pi->addr, pi->addrlen) != 0) { - log_err("outgoing tcp: bind: %s", sock_strerror(errno)); +#ifndef USE_WINSOCK +#ifdef EADDRNOTAVAIL + if(!(verbosity < 4 && errno == EADDRNOTAVAIL)) +#endif +#else /* USE_WINSOCK */ + if(!(verbosity < 4 && WSAGetLastError() == WSAEADDRNOTAVAIL)) +#endif + log_err("outgoing tcp: bind: %s", sock_strerror(errno)); sock_close(s); return 0; } @@ -251,7 +267,7 @@ outnet_get_tcp_fd(struct sockaddr_storage* addr, socklen_t addrlen, int tcp_mss, int s; int af; char* err; -#ifdef SO_REUSEADDR +#if defined(SO_REUSEADDR) || defined(IP_BIND_ADDRESS_NO_PORT) int on = 1; #endif #ifdef INET6 @@ -297,7 +313,13 @@ outnet_get_tcp_fd(struct sockaddr_storage* addr, socklen_t addrlen, int tcp_mss, " setsockopt(TCP_MAXSEG) unsupported"); #endif /* defined(IPPROTO_TCP) && defined(TCP_MAXSEG) */ } - +#ifdef IP_BIND_ADDRESS_NO_PORT + if(setsockopt(s, IPPROTO_IP, IP_BIND_ADDRESS_NO_PORT, (void*)&on, + (socklen_t)sizeof(on)) < 0) { + verbose(VERB_ALGO, "outgoing tcp:" + " setsockopt(.. IP_BIND_ADDRESS_NO_PORT ..) failed"); + } +#endif /* IP_BIND_ADDRESS_NO_PORT */ return s; } @@ -337,6 +359,8 @@ log_reuse_tcp(enum verbosity_value v, const char* msg, struct reuse_tcp* reuse) uint16_t port; char addrbuf[128]; if(verbosity < v) return; + if(!reuse || !reuse->pending || !reuse->pending->c) + return; addr_to_str(&reuse->addr, reuse->addrlen, addrbuf, sizeof(addrbuf)); port = ntohs(((struct sockaddr_in*)&reuse->addr)->sin_port); verbose(v, "%s %s#%u fd %d", msg, addrbuf, (unsigned)port, @@ -344,7 +368,8 @@ log_reuse_tcp(enum verbosity_value v, const char* msg, struct reuse_tcp* reuse) } /** pop the first element from the writewait list */ -static struct waiting_tcp* reuse_write_wait_pop(struct reuse_tcp* reuse) +struct waiting_tcp* +reuse_write_wait_pop(struct reuse_tcp* reuse) { struct waiting_tcp* w = reuse->write_wait_first; if(!w) @@ -356,13 +381,17 @@ static struct waiting_tcp* reuse_write_wait_pop(struct reuse_tcp* reuse) w->write_wait_next->write_wait_prev = NULL; else reuse->write_wait_last = NULL; w->write_wait_queued = 0; + w->write_wait_next = NULL; + w->write_wait_prev = NULL; return w; } /** remove the element from the writewait list */ -static void reuse_write_wait_remove(struct reuse_tcp* reuse, - struct waiting_tcp* w) +void +reuse_write_wait_remove(struct reuse_tcp* reuse, struct waiting_tcp* w) { + log_assert(w); + log_assert(w->write_wait_queued); if(!w) return; if(!w->write_wait_queued) @@ -370,24 +399,34 @@ static void reuse_write_wait_remove(struct reuse_tcp* reuse, if(w->write_wait_prev) w->write_wait_prev->write_wait_next = w->write_wait_next; else reuse->write_wait_first = w->write_wait_next; + log_assert(!w->write_wait_prev || + w->write_wait_prev->write_wait_next != w->write_wait_prev); if(w->write_wait_next) w->write_wait_next->write_wait_prev = w->write_wait_prev; else reuse->write_wait_last = w->write_wait_prev; + log_assert(!w->write_wait_next + || w->write_wait_next->write_wait_prev != w->write_wait_next); w->write_wait_queued = 0; + w->write_wait_next = NULL; + w->write_wait_prev = NULL; } /** push the element after the last on the writewait list */ -static void reuse_write_wait_push_back(struct reuse_tcp* reuse, - struct waiting_tcp* w) +void +reuse_write_wait_push_back(struct reuse_tcp* reuse, struct waiting_tcp* w) { if(!w) return; log_assert(!w->write_wait_queued); if(reuse->write_wait_last) { reuse->write_wait_last->write_wait_next = w; + log_assert(reuse->write_wait_last->write_wait_next != + reuse->write_wait_last); w->write_wait_prev = reuse->write_wait_last; } else { reuse->write_wait_first = w; + w->write_wait_prev = NULL; } + w->write_wait_next = NULL; reuse->write_wait_last = w; w->write_wait_queued = 1; } @@ -396,9 +435,18 @@ static void reuse_write_wait_push_back(struct reuse_tcp* reuse, void reuse_tree_by_id_insert(struct reuse_tcp* reuse, struct waiting_tcp* w) { +#ifdef UNBOUND_DEBUG + rbnode_type* added; +#endif log_assert(w->id_node.key == NULL); w->id_node.key = w; +#ifdef UNBOUND_DEBUG + added = +#else + (void) +#endif rbtree_insert(&reuse->tree_by_id, &w->id_node); + log_assert(added); /* should have been added */ } /** find element in tree by id */ @@ -424,34 +472,45 @@ tree_by_id_get_id(rbnode_type* node) } /** insert into reuse tcp tree and LRU, false on failure (duplicate) */ -static int +int reuse_tcp_insert(struct outside_network* outnet, struct pending_tcp* pend_tcp) { log_reuse_tcp(VERB_CLIENT, "reuse_tcp_insert", &pend_tcp->reuse); if(pend_tcp->reuse.item_on_lru_list) { if(!pend_tcp->reuse.node.key) - log_err("internal error: reuse_tcp_insert: on lru list without key"); + log_err("internal error: reuse_tcp_insert: " + "in lru list without key"); return 1; } pend_tcp->reuse.node.key = &pend_tcp->reuse; pend_tcp->reuse.pending = pend_tcp; if(!rbtree_insert(&outnet->tcp_reuse, &pend_tcp->reuse.node)) { - /* this is a duplicate connection, close this one */ - verbose(VERB_CLIENT, "reuse_tcp_insert: duplicate connection"); - pend_tcp->reuse.node.key = NULL; - return 0; + /* We are not in the LRU list but we are already in the + * tcp_reuse tree, strange. + * Continue to add ourselves to the LRU list. */ + log_err("internal error: reuse_tcp_insert: in lru list but " + "not in the tree"); } /* insert into LRU, first is newest */ pend_tcp->reuse.lru_prev = NULL; if(outnet->tcp_reuse_first) { pend_tcp->reuse.lru_next = outnet->tcp_reuse_first; + log_assert(pend_tcp->reuse.lru_next != &pend_tcp->reuse); outnet->tcp_reuse_first->lru_prev = &pend_tcp->reuse; + log_assert(outnet->tcp_reuse_first->lru_prev != + outnet->tcp_reuse_first); } else { pend_tcp->reuse.lru_next = NULL; outnet->tcp_reuse_last = &pend_tcp->reuse; } outnet->tcp_reuse_first = &pend_tcp->reuse; pend_tcp->reuse.item_on_lru_list = 1; + log_assert((!outnet->tcp_reuse_first && !outnet->tcp_reuse_last) || + (outnet->tcp_reuse_first && outnet->tcp_reuse_last)); + log_assert(outnet->tcp_reuse_first != outnet->tcp_reuse_first->lru_next && + outnet->tcp_reuse_first != outnet->tcp_reuse_first->lru_prev); + log_assert(outnet->tcp_reuse_last != outnet->tcp_reuse_last->lru_next && + outnet->tcp_reuse_last != outnet->tcp_reuse_last->lru_prev); return 1; } @@ -491,9 +550,28 @@ reuse_tcp_find(struct outside_network* outnet, struct sockaddr_storage* addr, log_assert(&key_p.reuse != (struct reuse_tcp*)result); log_assert(&key_p != ((struct reuse_tcp*)result)->pending); } + + /* It is possible that we search for something before the first element + * in the tree. Replace a null pointer with the first element. + */ + if (!result) { + verbose(VERB_CLIENT, "reuse_tcp_find: taking first"); + result = rbtree_first(&outnet->tcp_reuse); + } + /* not found, return null */ if(!result || result == RBTREE_NULL) return NULL; + + /* It is possible that we got the previous address, but that the + * address we are looking for is in the tree. If the address we got + * is less than the address we are looking, then take the next entry. + */ + if (reuse_cmp_addrportssl(result->key, &key_p.reuse) < 0) { + verbose(VERB_CLIENT, "reuse_tcp_find: key too low"); + result = rbtree_next(result); + } + verbose(VERB_CLIENT, "reuse_tcp_find check inexact match"); /* inexact match, find one of possibly several connections to the * same destination address, with the correct port, ssl, and @@ -511,7 +589,7 @@ reuse_tcp_find(struct outside_network* outnet, struct sockaddr_storage* addr, while(result && result != RBTREE_NULL && reuse_cmp_addrportssl(result->key, &key_p.reuse) == 0) { if(((struct reuse_tcp*)result)->tree_by_id.count < - MAX_REUSE_TCP_QUERIES) { + outnet->max_reuse_tcp_queries) { /* same address, port, ssl-yes-or-no, and has * space for another query */ return (struct reuse_tcp*)result; @@ -561,13 +639,22 @@ outnet_tcp_take_into_use(struct waiting_tcp* w) log_assert(w->addrlen > 0); pend->c->tcp_do_toggle_rw = 0; pend->c->tcp_do_close = 0; + + /* Consistency check, if we have ssl_upstream but no sslctx, then + * log an error and return failure. + */ + if (w->ssl_upstream && !w->outnet->sslctx) { + log_err("SSL upstream requested but no SSL context"); + return 0; + } + /* open socket */ s = outnet_get_tcp_fd(&w->addr, w->addrlen, w->outnet->tcp_mss, w->outnet->ip_dscp); if(s == -1) return 0; - if(!pick_outgoing_tcp(w, s)) + if(!pick_outgoing_tcp(pend, w, s)) return 0; fd_set_nonblock(s); @@ -661,12 +748,12 @@ outnet_tcp_take_into_use(struct waiting_tcp* w) pend->next_free = NULL; pend->query = w; pend->reuse.outnet = w->outnet; - pend->c->repinfo.addrlen = w->addrlen; + pend->c->repinfo.remote_addrlen = w->addrlen; pend->c->tcp_more_read_again = &pend->reuse.cp_more_read_again; pend->c->tcp_more_write_again = &pend->reuse.cp_more_write_again; pend->reuse.cp_more_read_again = 0; pend->reuse.cp_more_write_again = 0; - memcpy(&pend->c->repinfo.addr, &w->addr, w->addrlen); + memcpy(&pend->c->repinfo.remote_addr, &w->addr, w->addrlen); pend->reuse.pending = pend; /* Remove from tree in case the is_ssl will be different and causes the @@ -689,28 +776,156 @@ outnet_tcp_take_into_use(struct waiting_tcp* w) /** Touch the lru of a reuse_tcp element, it is in use. * This moves it to the front of the list, where it is not likely to * be closed. Items at the back of the list are closed to make space. */ -static void +void reuse_tcp_lru_touch(struct outside_network* outnet, struct reuse_tcp* reuse) { if(!reuse->item_on_lru_list) { log_err("internal error: we need to touch the lru_list but item not in list"); return; /* not on the list, no lru to modify */ } + log_assert(reuse->lru_prev || + (!reuse->lru_prev && outnet->tcp_reuse_first == reuse)); if(!reuse->lru_prev) return; /* already first in the list */ /* remove at current position */ /* since it is not first, there is a previous element */ reuse->lru_prev->lru_next = reuse->lru_next; + log_assert(reuse->lru_prev->lru_next != reuse->lru_prev); if(reuse->lru_next) reuse->lru_next->lru_prev = reuse->lru_prev; else outnet->tcp_reuse_last = reuse->lru_prev; + log_assert(!reuse->lru_next || reuse->lru_next->lru_prev != reuse->lru_next); + log_assert(outnet->tcp_reuse_last != outnet->tcp_reuse_last->lru_next && + outnet->tcp_reuse_last != outnet->tcp_reuse_last->lru_prev); /* insert at the front */ reuse->lru_prev = NULL; reuse->lru_next = outnet->tcp_reuse_first; + if(outnet->tcp_reuse_first) { + outnet->tcp_reuse_first->lru_prev = reuse; + } + log_assert(reuse->lru_next != reuse); /* since it is not first, it is not the only element and * lru_next is thus not NULL and thus reuse is now not the last in * the list, so outnet->tcp_reuse_last does not need to be modified */ outnet->tcp_reuse_first = reuse; + log_assert(outnet->tcp_reuse_first != outnet->tcp_reuse_first->lru_next && + outnet->tcp_reuse_first != outnet->tcp_reuse_first->lru_prev); + log_assert((!outnet->tcp_reuse_first && !outnet->tcp_reuse_last) || + (outnet->tcp_reuse_first && outnet->tcp_reuse_last)); +} + +/** Snip the last reuse_tcp element off of the LRU list */ +struct reuse_tcp* +reuse_tcp_lru_snip(struct outside_network* outnet) +{ + struct reuse_tcp* reuse = outnet->tcp_reuse_last; + if(!reuse) return NULL; + /* snip off of LRU */ + log_assert(reuse->lru_next == NULL); + if(reuse->lru_prev) { + outnet->tcp_reuse_last = reuse->lru_prev; + reuse->lru_prev->lru_next = NULL; + } else { + outnet->tcp_reuse_last = NULL; + outnet->tcp_reuse_first = NULL; + } + log_assert((!outnet->tcp_reuse_first && !outnet->tcp_reuse_last) || + (outnet->tcp_reuse_first && outnet->tcp_reuse_last)); + reuse->item_on_lru_list = 0; + reuse->lru_next = NULL; + reuse->lru_prev = NULL; + return reuse; +} + +/** remove waiting tcp from the outnet waiting list */ +void +outnet_waiting_tcp_list_remove(struct outside_network* outnet, struct waiting_tcp* w) +{ + struct waiting_tcp* p = outnet->tcp_wait_first, *prev = NULL; + w->on_tcp_waiting_list = 0; + while(p) { + if(p == w) { + /* remove w */ + if(prev) + prev->next_waiting = w->next_waiting; + else outnet->tcp_wait_first = w->next_waiting; + if(outnet->tcp_wait_last == w) + outnet->tcp_wait_last = prev; + w->next_waiting = NULL; + return; + } + prev = p; + p = p->next_waiting; + } + /* outnet_waiting_tcp_list_remove is currently called only with items + * that are already in the waiting list. */ + log_assert(0); +} + +/** pop the first waiting tcp from the outnet waiting list */ +struct waiting_tcp* +outnet_waiting_tcp_list_pop(struct outside_network* outnet) +{ + struct waiting_tcp* w = outnet->tcp_wait_first; + if(!outnet->tcp_wait_first) return NULL; + log_assert(w->on_tcp_waiting_list); + outnet->tcp_wait_first = w->next_waiting; + if(outnet->tcp_wait_last == w) + outnet->tcp_wait_last = NULL; + w->on_tcp_waiting_list = 0; + w->next_waiting = NULL; + return w; +} + +/** add waiting_tcp element to the outnet tcp waiting list */ +void +outnet_waiting_tcp_list_add(struct outside_network* outnet, + struct waiting_tcp* w, int set_timer) +{ + struct timeval tv; + log_assert(!w->on_tcp_waiting_list); + if(w->on_tcp_waiting_list) + return; + w->next_waiting = NULL; + if(outnet->tcp_wait_last) + outnet->tcp_wait_last->next_waiting = w; + else outnet->tcp_wait_first = w; + outnet->tcp_wait_last = w; + w->on_tcp_waiting_list = 1; + if(set_timer) { +#ifndef S_SPLINT_S + tv.tv_sec = w->timeout/1000; + tv.tv_usec = (w->timeout%1000)*1000; +#endif + comm_timer_set(w->timer, &tv); + } +} + +/** add waiting_tcp element as first to the outnet tcp waiting list */ +void +outnet_waiting_tcp_list_add_first(struct outside_network* outnet, + struct waiting_tcp* w, int reset_timer) +{ + struct timeval tv; + log_assert(!w->on_tcp_waiting_list); + if(w->on_tcp_waiting_list) + return; + w->next_waiting = outnet->tcp_wait_first; + log_assert(w->next_waiting != w); + if(!outnet->tcp_wait_last) + outnet->tcp_wait_last = w; + outnet->tcp_wait_first = w; + w->on_tcp_waiting_list = 1; + if(reset_timer) { +#ifndef S_SPLINT_S + tv.tv_sec = w->timeout/1000; + tv.tv_usec = (w->timeout%1000)*1000; +#endif + comm_timer_set(w->timer, &tv); + } + log_assert( + (!outnet->tcp_reuse_first && !outnet->tcp_reuse_last) || + (outnet->tcp_reuse_first && outnet->tcp_reuse_last)); } /** call callback on waiting_tcp, if not NULL */ @@ -718,7 +933,7 @@ static void waiting_tcp_callback(struct waiting_tcp* w, struct comm_point* c, int error, struct comm_reply* reply_info) { - if(w->cb) { + if(w && w->cb) { fptr_ok(fptr_whitelist_pending_tcp(w->cb)); (void)(*w->cb)(c, w->cb_arg, error, reply_info); } @@ -729,19 +944,26 @@ static void use_free_buffer(struct outside_network* outnet) { struct waiting_tcp* w; - while(outnet->tcp_free && outnet->tcp_wait_first - && !outnet->want_to_quit) { + while(outnet->tcp_wait_first && !outnet->want_to_quit) { +#ifdef USE_DNSTAP + struct pending_tcp* pend_tcp = NULL; +#endif struct reuse_tcp* reuse = NULL; - w = outnet->tcp_wait_first; - outnet->tcp_wait_first = w->next_waiting; - if(outnet->tcp_wait_last == w) - outnet->tcp_wait_last = NULL; - w->on_tcp_waiting_list = 0; + w = outnet_waiting_tcp_list_pop(outnet); + log_assert( + (!outnet->tcp_reuse_first && !outnet->tcp_reuse_last) || + (outnet->tcp_reuse_first && outnet->tcp_reuse_last)); reuse = reuse_tcp_find(outnet, &w->addr, w->addrlen, w->ssl_upstream); + /* re-select an ID when moving to a new TCP buffer */ + w->id = tcp_select_id(outnet, reuse); + LDNS_ID_SET(w->pkt, w->id); if(reuse) { log_reuse_tcp(VERB_CLIENT, "use free buffer for waiting tcp: " "found reuse", reuse); +#ifdef USE_DNSTAP + pend_tcp = reuse->pending; +#endif reuse_tcp_lru_touch(outnet, reuse); comm_timer_disable(w->timer); w->next_waiting = (void*)reuse->pending; @@ -758,7 +980,7 @@ use_free_buffer(struct outside_network* outnet) reuse->pending->c->fd, reuse->pending, w); } - } else { + } else if(outnet->tcp_free) { struct pending_tcp* pend = w->outnet->tcp_free; rbtree_init(&pend->reuse.tree_by_id, reuse_id_cmp); pend->reuse.pending = pend; @@ -768,37 +990,47 @@ use_free_buffer(struct outside_network* outnet) waiting_tcp_callback(w, NULL, NETEVENT_CLOSED, NULL); waiting_tcp_delete(w); +#ifdef USE_DNSTAP + w = NULL; +#endif } +#ifdef USE_DNSTAP + pend_tcp = pend; +#endif + } else { + /* no reuse and no free buffer, put back at the start */ + outnet_waiting_tcp_list_add_first(outnet, w, 0); + break; + } +#ifdef USE_DNSTAP + if(outnet->dtenv && pend_tcp && w && w->sq && + (outnet->dtenv->log_resolver_query_messages || + outnet->dtenv->log_forwarder_query_messages)) { + sldns_buffer tmp; + sldns_buffer_init_frm_data(&tmp, w->pkt, w->pkt_len); + dt_msg_send_outside_query(outnet->dtenv, &w->sq->addr, + &pend_tcp->pi->addr, comm_tcp, NULL, w->sq->zone, + w->sq->zonelen, &tmp); } - } -} - -/** add waiting_tcp element to the outnet tcp waiting list */ -static void -outnet_add_tcp_waiting(struct outside_network* outnet, struct waiting_tcp* w) -{ - struct timeval tv; - if(w->on_tcp_waiting_list) - return; - w->next_waiting = NULL; - if(outnet->tcp_wait_last) - outnet->tcp_wait_last->next_waiting = w; - else outnet->tcp_wait_first = w; - outnet->tcp_wait_last = w; - w->on_tcp_waiting_list = 1; -#ifndef S_SPLINT_S - tv.tv_sec = w->timeout/1000; - tv.tv_usec = (w->timeout%1000)*1000; #endif - comm_timer_set(w->timer, &tv); + } } /** delete element from tree by id */ static void reuse_tree_by_id_delete(struct reuse_tcp* reuse, struct waiting_tcp* w) { +#ifdef UNBOUND_DEBUG + rbnode_type* rem; +#endif log_assert(w->id_node.key != NULL); +#ifdef UNBOUND_DEBUG + rem = +#else + (void) +#endif rbtree_delete(&reuse->tree_by_id, w); + log_assert(rem); /* should have been there */ w->id_node.key = NULL; } @@ -841,7 +1073,7 @@ reuse_move_writewait_away(struct outside_network* outnet, * fail the query */ w->error_count ++; reuse_tree_by_id_delete(&pend->reuse, w); - outnet_add_tcp_waiting(outnet, w); + outnet_waiting_tcp_list_add(outnet, w, 1); } while((w = reuse_write_wait_pop(&pend->reuse)) != NULL) { if(verbosity >= VERB_CLIENT && w->pkt_len > 12+2+2 && @@ -852,20 +1084,29 @@ reuse_move_writewait_away(struct outside_network* outnet, verbose(VERB_CLIENT, "reuse_move_writewait_away item %s", buf); } reuse_tree_by_id_delete(&pend->reuse, w); - outnet_add_tcp_waiting(outnet, w); + outnet_waiting_tcp_list_add(outnet, w, 1); } } /** remove reused element from tree and lru list */ -static void +void reuse_tcp_remove_tree_list(struct outside_network* outnet, struct reuse_tcp* reuse) { verbose(VERB_CLIENT, "reuse_tcp_remove_tree_list"); if(reuse->node.key) { /* delete it from reuse tree */ - (void)rbtree_delete(&outnet->tcp_reuse, reuse); + if(!rbtree_delete(&outnet->tcp_reuse, reuse)) { + /* should not be possible, it should be there */ + char buf[256]; + addr_to_str(&reuse->addr, reuse->addrlen, buf, + sizeof(buf)); + log_err("reuse tcp delete: node not present, internal error, %s ssl %d lru %d", buf, reuse->is_ssl, reuse->item_on_lru_list); + } reuse->node.key = NULL; + /* defend against loops on broken tree by zeroing the + * rbnode structure */ + memset(&reuse->node, 0, sizeof(reuse->node)); } /* delete from reuse list */ if(reuse->item_on_lru_list) { @@ -874,21 +1115,38 @@ reuse_tcp_remove_tree_list(struct outside_network* outnet, * and thus have a pending pointer to the struct */ log_assert(reuse->lru_prev->pending); reuse->lru_prev->lru_next = reuse->lru_next; + log_assert(reuse->lru_prev->lru_next != reuse->lru_prev); } else { log_assert(!reuse->lru_next || reuse->lru_next->pending); outnet->tcp_reuse_first = reuse->lru_next; + log_assert(!outnet->tcp_reuse_first || + (outnet->tcp_reuse_first != + outnet->tcp_reuse_first->lru_next && + outnet->tcp_reuse_first != + outnet->tcp_reuse_first->lru_prev)); } if(reuse->lru_next) { /* assert that members of the lru list are waiting * and thus have a pending pointer to the struct */ log_assert(reuse->lru_next->pending); reuse->lru_next->lru_prev = reuse->lru_prev; + log_assert(reuse->lru_next->lru_prev != reuse->lru_next); } else { log_assert(!reuse->lru_prev || reuse->lru_prev->pending); outnet->tcp_reuse_last = reuse->lru_prev; - } + log_assert(!outnet->tcp_reuse_last || + (outnet->tcp_reuse_last != + outnet->tcp_reuse_last->lru_next && + outnet->tcp_reuse_last != + outnet->tcp_reuse_last->lru_prev)); + } + log_assert((!outnet->tcp_reuse_first && !outnet->tcp_reuse_last) || + (outnet->tcp_reuse_first && outnet->tcp_reuse_last)); reuse->item_on_lru_list = 0; + reuse->lru_next = NULL; + reuse->lru_prev = NULL; } + reuse->pending = NULL; } /** helper function that deletes an element from the tree of readwait @@ -915,8 +1173,12 @@ decommission_pending_tcp(struct outside_network* outnet, struct pending_tcp* pend) { verbose(VERB_CLIENT, "decommission_pending_tcp"); - pend->next_free = outnet->tcp_free; - outnet->tcp_free = pend; + /* A certain code path can lead here twice for the same pending_tcp + * creating a loop in the free pending_tcp list. */ + if(outnet->tcp_free != pend) { + pend->next_free = outnet->tcp_free; + outnet->tcp_free = pend; + } if(pend->reuse.node.key) { /* needs unlink from the reuse tree to get deleted */ reuse_tcp_remove_tree_list(outnet, &pend->reuse); @@ -956,6 +1218,22 @@ static void reuse_cb_readwait_for_failure(rbtree_type* tree_by_id, int err) } } +/** mark the entry for being in the cb_and_decommission stage */ +static void mark_for_cb_and_decommission(rbnode_type* node, + void* ATTR_UNUSED(arg)) +{ + struct waiting_tcp* w = (struct waiting_tcp*)node->key; + /* Mark the waiting_tcp to signal later code (serviced_delete) that + * this item is part of the backed up tree_by_id and will be deleted + * later. */ + w->in_cb_and_decommission = 1; + /* Mark the serviced_query for deletion so that later code through + * callbacks (iter_clear .. outnet_serviced_query_stop) won't + * prematurely delete it. */ + if(w->cb) + ((struct serviced_query*)w->cb_arg)->to_be_deleted = 1; +} + /** perform callbacks for failure and also decommission pending tcp. * the callbacks remove references in sq->pending to the waiting_tcp * members of the tree_by_id in the pending tcp. The pending_tcp is @@ -971,28 +1249,31 @@ static void reuse_cb_and_decommission(struct outside_network* outnet, pend->reuse.write_wait_first = NULL; pend->reuse.write_wait_last = NULL; decommission_pending_tcp(outnet, pend); + if(store.root != NULL && store.root != RBTREE_NULL) { + traverse_postorder(&store, &mark_for_cb_and_decommission, NULL); + } reuse_cb_readwait_for_failure(&store, error); reuse_del_readwait(&store); } /** set timeout on tcp fd and setup read event to catch incoming dns msgs */ static void -reuse_tcp_setup_timeout(struct pending_tcp* pend_tcp) +reuse_tcp_setup_timeout(struct pending_tcp* pend_tcp, int tcp_reuse_timeout) { log_reuse_tcp(VERB_CLIENT, "reuse_tcp_setup_timeout", &pend_tcp->reuse); - comm_point_start_listening(pend_tcp->c, -1, REUSE_TIMEOUT); + comm_point_start_listening(pend_tcp->c, -1, tcp_reuse_timeout); } /** set timeout on tcp fd and setup read event to catch incoming dns msgs */ static void -reuse_tcp_setup_read_and_timeout(struct pending_tcp* pend_tcp) +reuse_tcp_setup_read_and_timeout(struct pending_tcp* pend_tcp, int tcp_reuse_timeout) { log_reuse_tcp(VERB_CLIENT, "reuse_tcp_setup_readtimeout", &pend_tcp->reuse); sldns_buffer_clear(pend_tcp->c->buffer); pend_tcp->c->tcp_is_reading = 1; pend_tcp->c->tcp_byte_count = 0; comm_point_stop_listening(pend_tcp->c); - comm_point_start_listening(pend_tcp->c, -1, REUSE_TIMEOUT); + comm_point_start_listening(pend_tcp->c, -1, tcp_reuse_timeout); } int @@ -1002,6 +1283,7 @@ outnet_tcp_cb(struct comm_point* c, void* arg, int error, struct pending_tcp* pend = (struct pending_tcp*)arg; struct outside_network* outnet = pend->reuse.outnet; struct waiting_tcp* w = NULL; + log_assert(pend->reuse.item_on_lru_list && pend->reuse.node.key); verbose(VERB_ALGO, "outnettcp cb"); if(error == NETEVENT_TIMEOUT) { if(pend->c->tcp_write_and_read) { @@ -1048,7 +1330,7 @@ outnet_tcp_cb(struct comm_point* c, void* arg, int error, pend->reuse.cp_more_write_again = 0; pend->c->tcp_is_reading = 1; comm_point_stop_listening(pend->c); - reuse_tcp_setup_timeout(pend); + reuse_tcp_setup_timeout(pend, outnet->tcp_reuse_timeout); } return 0; } else if(error != NETEVENT_NOERROR) { @@ -1067,6 +1349,12 @@ outnet_tcp_cb(struct comm_point* c, void* arg, int error, c->buffer)); /* find the query the reply is for */ w = reuse_tcp_by_id_find(&pend->reuse, id); + /* Make sure that the reply we got is at least for a + * sent query with the same ID; the waiting_tcp that + * gets a reply is assumed to not be waiting to be + * sent. */ + if(w && (w->on_tcp_waiting_list || w->write_wait_queued)) + w = NULL; } } if(error == NETEVENT_NOERROR && !w) { @@ -1084,6 +1372,8 @@ outnet_tcp_cb(struct comm_point* c, void* arg, int error, } } if(w) { + log_assert(!w->on_tcp_waiting_list); + log_assert(!w->write_wait_queued); reuse_tree_by_id_delete(&pend->reuse, w); verbose(VERB_CLIENT, "outnet tcp callback query err %d buflen %d", error, (int)sldns_buffer_limit(c->buffer)); @@ -1101,7 +1391,7 @@ outnet_tcp_cb(struct comm_point* c, void* arg, int error, * and there could be more bytes to read on the input */ if(pend->reuse.tree_by_id.count != 0) pend->reuse.cp_more_read_again = 1; - reuse_tcp_setup_read_and_timeout(pend); + reuse_tcp_setup_read_and_timeout(pend, outnet->tcp_reuse_timeout); return 0; } verbose(VERB_CLIENT, "outnet_tcp_cb reuse after cb: decommission it"); @@ -1143,7 +1433,7 @@ outnet_send_wait_udp(struct outside_network* outnet) { struct pending* pend; /* process waiting queries */ - while(outnet->udp_wait_first && outnet->unused_fds + while(outnet->udp_wait_first && outnet->unused_fds && !outnet->want_to_quit) { pend = outnet->udp_wait_first; outnet->udp_wait_first = pend->next_waiting; @@ -1152,8 +1442,10 @@ outnet_send_wait_udp(struct outside_network* outnet) sldns_buffer_write(outnet->udp_buff, pend->pkt, pend->pkt_len); sldns_buffer_flip(outnet->udp_buff); free(pend->pkt); /* freeing now makes get_mem correct */ - pend->pkt = NULL; + pend->pkt = NULL; pend->pkt_len = 0; + log_assert(!pend->sq->busy); + pend->sq->busy = 1; if(!randomize_and_send_udp(pend, outnet->udp_buff, pend->timeout)) { /* callback error on pending */ @@ -1163,6 +1455,8 @@ outnet_send_wait_udp(struct outside_network* outnet) NETEVENT_CLOSED, NULL); } pending_delete(outnet, pend); + } else { + pend->sq->busy = 0; } } } @@ -1188,11 +1482,11 @@ outnet_udp_cb(struct comm_point* c, void* arg, int error, /* setup lookup key */ key.id = (unsigned)LDNS_ID_WIRE(sldns_buffer_begin(c->buffer)); - memcpy(&key.addr, &reply_info->addr, reply_info->addrlen); - key.addrlen = reply_info->addrlen; + memcpy(&key.addr, &reply_info->remote_addr, reply_info->remote_addrlen); + key.addrlen = reply_info->remote_addrlen; verbose(VERB_ALGO, "Incoming reply id = %4.4x", key.id); log_addr(VERB_ALGO, "Incoming reply addr =", - &reply_info->addr, reply_info->addrlen); + &reply_info->remote_addr, reply_info->remote_addrlen); /* find it, see if this thing is a valid query response */ verbose(VERB_ALGO, "lookup size is %d entries", (int)outnet->pending->count); @@ -1273,7 +1567,6 @@ calc_num46(char** ifs, int num_ifs, int do_ip4, int do_ip6, (*num_ip4)++; } } - } void @@ -1369,7 +1662,8 @@ outside_network_create(struct comm_base *base, size_t bufsize, int numavailports, size_t unwanted_threshold, int tcp_mss, void (*unwanted_action)(void*), void* unwanted_param, int do_udp, void* sslctx, int delayclose, int tls_use_sni, struct dt_env* dtenv, - int udp_connect) + int udp_connect, int max_reuse_tcp_queries, int tcp_reuse_timeout, + int tcp_auth_query_timeout) { struct outside_network* outnet = (struct outside_network*) calloc(1, sizeof(struct outside_network)); @@ -1381,7 +1675,11 @@ outside_network_create(struct comm_base *base, size_t bufsize, comm_base_timept(base, &outnet->now_secs, &outnet->now_tv); outnet->base = base; outnet->num_tcp = num_tcp; + outnet->max_reuse_tcp_queries = max_reuse_tcp_queries; + outnet->tcp_reuse_timeout= tcp_reuse_timeout; + outnet->tcp_auth_query_timeout = tcp_auth_query_timeout; outnet->num_tcp_outgoing = 0; + outnet->num_udp_outgoing = 0; outnet->infra = infra; outnet->rnd = rnd; outnet->sslctx = sslctx; @@ -1457,7 +1755,7 @@ outside_network_create(struct comm_base *base, size_t bufsize, return NULL; } pc->cp = comm_point_create_udp(outnet->base, -1, - outnet->udp_buff, outnet_udp_cb, outnet); + outnet->udp_buff, 0, outnet_udp_cb, outnet, NULL); if(!pc->cp) { log_err("malloc failed"); free(pc); @@ -1523,16 +1821,9 @@ static void serviced_node_del(rbnode_type* node, void* ATTR_UNUSED(arg)) { struct serviced_query* sq = (struct serviced_query*)node; - struct service_callback* p = sq->cblist, *np; - free(sq->qbuf); - free(sq->zone); - free(sq->tls_auth_name); - edns_opt_list_free(sq->opt_list); - while(p) { - np = p->next; - free(p); - p = np; - } + alloc_reg_release(sq->alloc, sq->region); + if(sq->timer) + comm_timer_delete(sq->timer); free(sq); } @@ -1609,22 +1900,19 @@ outside_network_delete(struct outside_network* outnet) size_t i; for(i=0; i<outnet->num_tcp; i++) if(outnet->tcp_conns[i]) { - if(outnet->tcp_conns[i]->query && - !outnet->tcp_conns[i]->query-> - on_tcp_waiting_list) { + struct pending_tcp* pend; + pend = outnet->tcp_conns[i]; + if(pend->reuse.item_on_lru_list) { /* delete waiting_tcp elements that * the tcp conn is working on */ - struct pending_tcp* pend = - (struct pending_tcp*)outnet-> - tcp_conns[i]->query-> - next_waiting; decommission_pending_tcp(outnet, pend); } comm_point_delete(outnet->tcp_conns[i]->c); - waiting_tcp_delete(outnet->tcp_conns[i]->query); free(outnet->tcp_conns[i]); + outnet->tcp_conns[i] = NULL; } free(outnet->tcp_conns); + outnet->tcp_conns = NULL; } if(outnet->tcp_wait_first) { struct waiting_tcp* p = outnet->tcp_wait_first, *np; @@ -1742,18 +2030,18 @@ select_id(struct outside_network* outnet, struct pending* pend, sldns_buffer* packet) { int id_tries = 0; - pend->id = ((unsigned)ub_random(outnet->rnd)>>8) & 0xffff; + pend->id = GET_RANDOM_ID(outnet->rnd); LDNS_ID_SET(sldns_buffer_begin(packet), pend->id); /* insert in tree */ pend->node.key = pend; while(!rbtree_insert(outnet->pending, &pend->node)) { /* change ID to avoid collision */ - pend->id = ((unsigned)ub_random(outnet->rnd)>>8) & 0xffff; + pend->id = GET_RANDOM_ID(outnet->rnd); LDNS_ID_SET(sldns_buffer_begin(packet), pend->id); id_tries++; if(id_tries == MAX_ID_RETRY) { - pend->id=99999; /* non existant ID */ + pend->id=99999; /* non existent ID */ log_err("failed to generate unique ID, drop msg"); return 0; } @@ -1779,6 +2067,11 @@ static int udp_connect_needs_log(int err) # ifdef ENETDOWN case ENETDOWN: # endif +# ifdef EADDRNOTAVAIL + case EADDRNOTAVAIL: +# endif + case EPERM: + case EACCES: if(verbosity >= VERB_ALGO) return 1; return 0; @@ -1921,6 +2214,7 @@ randomize_and_send_udp(struct pending* pend, sldns_buffer* packet, int timeout) portcomm_loweruse(outnet, pend->pc); return 0; } + outnet->num_udp_outgoing++; /* system calls to set timeout after sending UDP to make roundtrip smaller. */ @@ -1931,11 +2225,21 @@ randomize_and_send_udp(struct pending* pend, sldns_buffer* packet, int timeout) comm_timer_set(pend->timer, &tv); #ifdef USE_DNSTAP + /* + * sending src (local service)/dst (upstream) addresses over DNSTAP + * There are no chances to get the src (local service) addr if unbound + * is not configured with specific outgoing IP-addresses. So we will + * pass 0.0.0.0 (::) to argument for + * dt_msg_send_outside_query()/dt_msg_send_outside_response() calls. + */ if(outnet->dtenv && (outnet->dtenv->log_resolver_query_messages || - outnet->dtenv->log_forwarder_query_messages)) - dt_msg_send_outside_query(outnet->dtenv, &pend->addr, comm_udp, - pend->sq->zone, pend->sq->zonelen, packet); + outnet->dtenv->log_forwarder_query_messages)) { + log_addr(VERB_ALGO, "from local addr", &pend->pc->pif->addr, pend->pc->pif->addrlen); + log_addr(VERB_ALGO, "request to upstream", &pend->addr, pend->addrlen); + dt_msg_send_outside_query(outnet->dtenv, &pend->addr, &pend->pc->pif->addr, comm_udp, NULL, + pend->sq->zone, pend->sq->zonelen, packet); + } #endif return 1; } @@ -1980,10 +2284,13 @@ pending_udp_query(struct serviced_query* sq, struct sldns_buffer* packet, sq->outnet->udp_wait_last = pend; return pend; } + log_assert(!sq->busy); + sq->busy = 1; if(!randomize_and_send_udp(pend, packet, timeout)) { pending_delete(sq->outnet, pend); return NULL; } + sq->busy = 0; return pend; } @@ -1995,7 +2302,7 @@ outnet_tcptimer(void* arg) verbose(VERB_CLIENT, "outnet_tcptimer"); if(w->on_tcp_waiting_list) { /* it is on the waiting list */ - waiting_list_remove(outnet, w); + outnet_waiting_tcp_list_remove(outnet, w); waiting_tcp_callback(w, NULL, NETEVENT_TIMEOUT, NULL); waiting_tcp_delete(w); } else { @@ -2011,24 +2318,20 @@ outnet_tcptimer(void* arg) static void reuse_tcp_close_oldest(struct outside_network* outnet) { - struct pending_tcp* pend; + struct reuse_tcp* reuse; verbose(VERB_CLIENT, "reuse_tcp_close_oldest"); - if(!outnet->tcp_reuse_last) return; - pend = outnet->tcp_reuse_last->pending; - - /* snip off of LRU */ - log_assert(pend->reuse.lru_next == NULL); - if(pend->reuse.lru_prev) { - outnet->tcp_reuse_last = pend->reuse.lru_prev; - pend->reuse.lru_prev->lru_next = NULL; - } else { - outnet->tcp_reuse_last = NULL; - outnet->tcp_reuse_first = NULL; - } - pend->reuse.item_on_lru_list = 0; - + reuse = reuse_tcp_lru_snip(outnet); + if(!reuse) return; /* free up */ - reuse_cb_and_decommission(outnet, pend, NETEVENT_CLOSED); + reuse_cb_and_decommission(outnet, reuse->pending, NETEVENT_CLOSED); +} + +static uint16_t +tcp_select_id(struct outside_network* outnet, struct reuse_tcp* reuse) +{ + if(reuse) + return reuse_tcp_select_id(reuse, outnet); + return GET_RANDOM_ID(outnet->rnd); } /** find spare ID value for reuse tcp stream. That is random and also does @@ -2044,20 +2347,20 @@ reuse_tcp_select_id(struct reuse_tcp* reuse, struct outside_network* outnet) /* make really sure the tree is not empty */ if(reuse->tree_by_id.count == 0) { - id = ((unsigned)ub_random(outnet->rnd)>>8) & 0xffff; + id = GET_RANDOM_ID(outnet->rnd); return id; } /* try to find random empty spots by picking them */ for(i = 0; i<try_random; i++) { - id = ((unsigned)ub_random(outnet->rnd)>>8) & 0xffff; + id = GET_RANDOM_ID(outnet->rnd); if(!reuse_tcp_by_id_find(reuse, id)) { return id; } } /* equally pick a random unused element from the tree that is - * not in use. Pick a the n-th index of an ununused number, + * not in use. Pick a the n-th index of an unused number, * then loop over the empty spaces in the tree and find it */ log_assert(reuse->tree_by_id.count < 0xffff); select = ub_random_max(outnet->rnd, 0xffff - reuse->tree_by_id.count); @@ -2067,7 +2370,7 @@ reuse_tcp_select_id(struct reuse_tcp* reuse, struct outside_network* outnet) node = rbtree_first(&reuse->tree_by_id); log_assert(node && node != RBTREE_NULL); /* tree not empty */ /* see if select is before first node */ - if(select < tree_by_id_get_id(node)) + if(select < (unsigned)tree_by_id_get_id(node)) return select; count += tree_by_id_get_id(node); /* perhaps select is between nodes */ @@ -2126,6 +2429,7 @@ pending_tcp_query(struct serviced_query* sq, sldns_buffer* packet, reuse_tcp_lru_touch(sq->outnet, reuse); } + log_assert(!reuse || (reuse && pend)); /* if !pend but we have reuse streams, close a reuse stream * to be able to open a new one to this target, no use waiting * to reuse a file descriptor while another query needs to use @@ -2133,6 +2437,7 @@ pending_tcp_query(struct serviced_query* sq, sldns_buffer* packet, if(!pend) { reuse_tcp_close_oldest(sq->outnet); pend = sq->outnet->tcp_free; + log_assert(!reuse || (pend == reuse->pending)); } /* allocate space to store query */ @@ -2148,9 +2453,7 @@ pending_tcp_query(struct serviced_query* sq, sldns_buffer* packet, w->pkt = (uint8_t*)w + sizeof(struct waiting_tcp); w->pkt_len = sldns_buffer_limit(packet); memmove(w->pkt, sldns_buffer_begin(packet), w->pkt_len); - if(reuse) - w->id = reuse_tcp_select_id(reuse, sq->outnet); - else w->id = ((unsigned)ub_random(sq->outnet->rnd)>>8) & 0xffff; + w->id = tcp_select_id(sq->outnet, reuse); LDNS_ID_SET(w->pkt, w->id); memcpy(&w->addr, &sq->addr, sq->addrlen); w->addrlen = sq->addrlen; @@ -2167,9 +2470,14 @@ pending_tcp_query(struct serviced_query* sq, sldns_buffer* packet, w->write_wait_next = NULL; w->write_wait_queued = 0; w->error_count = 0; +#ifdef USE_DNSTAP + w->sq = NULL; +#endif + w->in_cb_and_decommission = 0; if(pend) { /* we have a buffer available right now */ if(reuse) { + log_assert(reuse == &pend->reuse); /* reuse existing fd, write query and continue */ /* store query in tree by id */ verbose(VERB_CLIENT, "pending_tcp_query: reuse, store"); @@ -2201,20 +2509,28 @@ pending_tcp_query(struct serviced_query* sq, sldns_buffer* packet, return NULL; } } +#ifdef USE_DNSTAP + if(sq->outnet->dtenv && + (sq->outnet->dtenv->log_resolver_query_messages || + sq->outnet->dtenv->log_forwarder_query_messages)) { + /* use w->pkt, because it has the ID value */ + sldns_buffer tmp; + sldns_buffer_init_frm_data(&tmp, w->pkt, w->pkt_len); + dt_msg_send_outside_query(sq->outnet->dtenv, &sq->addr, + &pend->pi->addr, comm_tcp, NULL, sq->zone, + sq->zonelen, &tmp); + } +#endif } else { /* queue up */ /* waiting for a buffer on the outside network buffer wait * list */ verbose(VERB_CLIENT, "pending_tcp_query: queue to wait"); - outnet_add_tcp_waiting(sq->outnet, w); - } #ifdef USE_DNSTAP - if(sq->outnet->dtenv && - (sq->outnet->dtenv->log_resolver_query_messages || - sq->outnet->dtenv->log_forwarder_query_messages)) - dt_msg_send_outside_query(sq->outnet->dtenv, &sq->addr, - comm_tcp, sq->zone, sq->zonelen, packet); + w->sq = sq; #endif + outnet_waiting_tcp_list_add(sq->outnet, w, 1); + } return w; } @@ -2254,30 +2570,64 @@ lookup_serviced(struct outside_network* outnet, sldns_buffer* buff, int dnssec, return (struct serviced_query*)rbtree_search(outnet->serviced, &key); } +void +serviced_timer_cb(void* arg) +{ + struct serviced_query* sq = (struct serviced_query*)arg; + struct outside_network* outnet = sq->outnet; + verbose(VERB_ALGO, "serviced send timer"); + /* By the time this cb is called, if we don't have any registered + * callbacks for this serviced_query anymore; do not send. */ + if(!sq->cblist) + goto delete; + /* perform first network action */ + if(outnet->do_udp && !(sq->tcp_upstream || sq->ssl_upstream)) { + if(!serviced_udp_send(sq, outnet->udp_buff)) + goto delete; + } else { + if(!serviced_tcp_send(sq, outnet->udp_buff)) + goto delete; + } + /* Maybe by this time we don't have callbacks attached anymore. Don't + * proactively try to delete; let it run and maybe another callback + * will get attached by the time we get an answer. */ + return; +delete: + serviced_callbacks(sq, NETEVENT_CLOSED, NULL, NULL); +} + /** Create new serviced entry */ static struct serviced_query* serviced_create(struct outside_network* outnet, sldns_buffer* buff, int dnssec, int want_dnssec, int nocaps, int tcp_upstream, int ssl_upstream, char* tls_auth_name, struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* zone, size_t zonelen, int qtype, struct edns_option* opt_list, - size_t pad_queries_block_size) + size_t pad_queries_block_size, struct alloc_cache* alloc, + struct regional* region) { struct serviced_query* sq = (struct serviced_query*)malloc(sizeof(*sq)); + struct timeval t; #ifdef UNBOUND_DEBUG rbnode_type* ins; #endif - if(!sq) + if(!sq) { + alloc_reg_release(alloc, region); return NULL; + } sq->node.key = sq; - sq->qbuf = memdup(sldns_buffer_begin(buff), sldns_buffer_limit(buff)); + sq->alloc = alloc; + sq->region = region; + sq->qbuf = regional_alloc_init(region, sldns_buffer_begin(buff), + sldns_buffer_limit(buff)); if(!sq->qbuf) { + alloc_reg_release(alloc, region); free(sq); return NULL; } sq->qbuflen = sldns_buffer_limit(buff); - sq->zone = memdup(zone, zonelen); + sq->zone = regional_alloc_init(region, zone, zonelen); if(!sq->zone) { - free(sq->qbuf); + alloc_reg_release(alloc, region); free(sq); return NULL; } @@ -2289,10 +2639,9 @@ serviced_create(struct outside_network* outnet, sldns_buffer* buff, int dnssec, sq->tcp_upstream = tcp_upstream; sq->ssl_upstream = ssl_upstream; if(tls_auth_name) { - sq->tls_auth_name = strdup(tls_auth_name); + sq->tls_auth_name = regional_strdup(region, tls_auth_name); if(!sq->tls_auth_name) { - free(sq->zone); - free(sq->qbuf); + alloc_reg_release(alloc, region); free(sq); return NULL; } @@ -2301,17 +2650,16 @@ serviced_create(struct outside_network* outnet, sldns_buffer* buff, int dnssec, } memcpy(&sq->addr, addr, addrlen); sq->addrlen = addrlen; - sq->opt_list = NULL; - if(opt_list) { - sq->opt_list = edns_opt_copy_alloc(opt_list); - if(!sq->opt_list) { - free(sq->tls_auth_name); - free(sq->zone); - free(sq->qbuf); - free(sq); - return NULL; - } + sq->opt_list = opt_list; + sq->busy = 0; + sq->timer = comm_timer_create(outnet->base, serviced_timer_cb, sq); + if(!sq->timer) { + alloc_reg_release(alloc, region); + free(sq); + return NULL; } + memset(&t, 0, sizeof(t)); + comm_timer_set(sq->timer, &t); sq->outnet = outnet; sq->cblist = NULL; sq->pending = NULL; @@ -2320,7 +2668,7 @@ serviced_create(struct outside_network* outnet, sldns_buffer* buff, int dnssec, sq->to_be_deleted = 0; sq->padding_block_size = pad_queries_block_size; #ifdef UNBOUND_DEBUG - ins = + ins = #else (void) #endif @@ -2329,27 +2677,6 @@ serviced_create(struct outside_network* outnet, sldns_buffer* buff, int dnssec, return sq; } -/** remove waiting tcp from the outnet waiting list */ -static void -waiting_list_remove(struct outside_network* outnet, struct waiting_tcp* w) -{ - struct waiting_tcp* p = outnet->tcp_wait_first, *prev = NULL; - w->on_tcp_waiting_list = 0; - while(p) { - if(p == w) { - /* remove w */ - if(prev) - prev->next_waiting = w->next_waiting; - else outnet->tcp_wait_first = w->next_waiting; - if(outnet->tcp_wait_last == w) - outnet->tcp_wait_last = prev; - return; - } - prev = p; - p = p->next_waiting; - } -} - /** reuse tcp stream, remove serviced query from stream, * return true if the stream is kept, false if it is to be closed */ static int @@ -2386,7 +2713,7 @@ reuse_tcp_remove_serviced_keep(struct waiting_tcp* w, if(!reuse_tcp_insert(sq->outnet, pend_tcp)) { return 0; } - reuse_tcp_setup_timeout(pend_tcp); + reuse_tcp_setup_timeout(pend_tcp, sq->outnet->tcp_reuse_timeout); return 1; } return 0; @@ -2415,29 +2742,38 @@ serviced_delete(struct serviced_query* sq) struct waiting_tcp* w = (struct waiting_tcp*) sq->pending; verbose(VERB_CLIENT, "serviced_delete: TCP"); + log_assert(!(w->write_wait_queued && w->on_tcp_waiting_list)); /* if on stream-write-waiting list then * remove from waiting list and waiting_tcp_delete */ if(w->write_wait_queued) { struct pending_tcp* pend = (struct pending_tcp*)w->next_waiting; verbose(VERB_CLIENT, "serviced_delete: writewait"); - reuse_tree_by_id_delete(&pend->reuse, w); + if(!w->in_cb_and_decommission) + reuse_tree_by_id_delete(&pend->reuse, w); reuse_write_wait_remove(&pend->reuse, w); - waiting_tcp_delete(w); + if(!w->in_cb_and_decommission) + waiting_tcp_delete(w); } else if(!w->on_tcp_waiting_list) { struct pending_tcp* pend = (struct pending_tcp*)w->next_waiting; verbose(VERB_CLIENT, "serviced_delete: tcpreusekeep"); + /* w needs to stay on tree_by_id to not assign + * the same ID; remove the callback since its + * serviced_query will be gone. */ + w->cb = NULL; if(!reuse_tcp_remove_serviced_keep(w, sq)) { - reuse_cb_and_decommission(sq->outnet, - pend, NETEVENT_CLOSED); + if(!w->in_cb_and_decommission) + reuse_cb_and_decommission(sq->outnet, + pend, NETEVENT_CLOSED); use_free_buffer(sq->outnet); } sq->pending = NULL; } else { verbose(VERB_CLIENT, "serviced_delete: tcpwait"); - waiting_list_remove(sq->outnet, w); - waiting_tcp_delete(w); + outnet_waiting_tcp_list_remove(sq->outnet, w); + if(!w->in_cb_and_decommission) + waiting_tcp_delete(w); } } } @@ -2484,6 +2820,25 @@ serviced_perturb_qname(struct ub_randstate* rnd, uint8_t* qbuf, size_t len) } } +static uint16_t +serviced_query_udp_size(struct serviced_query* sq, enum serviced_query_status status) { + uint16_t udp_size; + if(status == serviced_query_UDP_EDNS_FRAG) { + if(addr_is_ip6(&sq->addr, sq->addrlen)) { + if(EDNS_FRAG_SIZE_IP6 < EDNS_ADVERTISED_SIZE) + udp_size = EDNS_FRAG_SIZE_IP6; + else udp_size = EDNS_ADVERTISED_SIZE; + } else { + if(EDNS_FRAG_SIZE_IP4 < EDNS_ADVERTISED_SIZE) + udp_size = EDNS_FRAG_SIZE_IP4; + else udp_size = EDNS_ADVERTISED_SIZE; + } + } else { + udp_size = EDNS_ADVERTISED_SIZE; + } + return udp_size; +} + /** put serviced query into a buffer */ static void serviced_encode(struct serviced_query* sq, sldns_buffer* buff, int with_edns) @@ -2504,20 +2859,10 @@ serviced_encode(struct serviced_query* sq, sldns_buffer* buff, int with_edns) edns.edns_present = 1; edns.ext_rcode = 0; edns.edns_version = EDNS_ADVERTISED_VERSION; - edns.opt_list = sq->opt_list; - if(sq->status == serviced_query_UDP_EDNS_FRAG) { - if(addr_is_ip6(&sq->addr, sq->addrlen)) { - if(EDNS_FRAG_SIZE_IP6 < EDNS_ADVERTISED_SIZE) - edns.udp_size = EDNS_FRAG_SIZE_IP6; - else edns.udp_size = EDNS_ADVERTISED_SIZE; - } else { - if(EDNS_FRAG_SIZE_IP4 < EDNS_ADVERTISED_SIZE) - edns.udp_size = EDNS_FRAG_SIZE_IP4; - else edns.udp_size = EDNS_ADVERTISED_SIZE; - } - } else { - edns.udp_size = EDNS_ADVERTISED_SIZE; - } + edns.opt_list_in = NULL; + edns.opt_list_out = sq->opt_list; + edns.opt_list_inplace_cb_out = NULL; + edns.udp_size = serviced_query_udp_size(sq, sq->status); edns.bits = 0; if(sq->dnssec & EDNS_DO) edns.bits = EDNS_DO; @@ -2527,8 +2872,8 @@ serviced_encode(struct serviced_query* sq, sldns_buffer* buff, int with_edns) padding_option.opt_code = LDNS_EDNS_PADDING; padding_option.opt_len = 0; padding_option.opt_data = NULL; - padding_option.next = edns.opt_list; - edns.opt_list = &padding_option; + padding_option.next = edns.opt_list_out; + edns.opt_list_out = &padding_option; edns.padding_block_size = sq->padding_block_size; } attach_edns_record(buff, &edns); @@ -2685,7 +3030,8 @@ serviced_callbacks(struct serviced_query* sq, int error, struct comm_point* c, * use secondary buffer to store the query. * This is a data copy, but faster than packet to server */ backlen = sldns_buffer_limit(c->buffer); - backup_p = memdup(sldns_buffer_begin(c->buffer), backlen); + backup_p = regional_alloc_init(sq->region, + sldns_buffer_begin(c->buffer), backlen); if(!backup_p) { log_err("malloc failure in serviced query callbacks"); error = NETEVENT_CLOSED; @@ -2703,10 +3049,8 @@ serviced_callbacks(struct serviced_query* sq, int error, struct comm_point* c, } fptr_ok(fptr_whitelist_serviced_query(p->cb)); (void)(*p->cb)(c, p->cb_arg, error, rep); - free(p); } if(backup_p) { - free(backup_p); sq->outnet->svcd_overhead = 0; } verbose(VERB_ALGO, "svcd callbacks end"); @@ -2720,6 +3064,15 @@ serviced_tcp_callback(struct comm_point* c, void* arg, int error, { struct serviced_query* sq = (struct serviced_query*)arg; struct comm_reply r2; +#ifdef USE_DNSTAP + struct waiting_tcp* w = (struct waiting_tcp*)sq->pending; + struct pending_tcp* pend_tcp = NULL; + struct port_if* pi = NULL; + if(w && !w->on_tcp_waiting_list && w->next_waiting) { + pend_tcp = (struct pending_tcp*)w->next_waiting; + pi = pend_tcp->pi; + } +#endif sq->pending = NULL; /* removed after this callback */ if(error != NETEVENT_NOERROR) log_addr(VERB_QUERY, "tcp error for address", @@ -2728,12 +3081,19 @@ serviced_tcp_callback(struct comm_point* c, void* arg, int error, infra_update_tcp_works(sq->outnet->infra, &sq->addr, sq->addrlen, sq->zone, sq->zonelen); #ifdef USE_DNSTAP - if(error==NETEVENT_NOERROR && sq->outnet->dtenv && + /* + * sending src (local service)/dst (upstream) addresses over DNSTAP + */ + if(error==NETEVENT_NOERROR && pi && sq->outnet->dtenv && (sq->outnet->dtenv->log_resolver_response_messages || - sq->outnet->dtenv->log_forwarder_response_messages)) + sq->outnet->dtenv->log_forwarder_response_messages)) { + log_addr(VERB_ALGO, "response from upstream", &sq->addr, sq->addrlen); + log_addr(VERB_ALGO, "to local addr", &pi->addr, pi->addrlen); dt_msg_send_outside_response(sq->outnet->dtenv, &sq->addr, - c->type, sq->zone, sq->zonelen, sq->qbuf, sq->qbuflen, - &sq->last_sent_time, sq->outnet->now_tv, c->buffer); + &pi->addr, c->type, c->ssl, sq->zone, sq->zonelen, sq->qbuf, + sq->qbuflen, &sq->last_sent_time, sq->outnet->now_tv, + c->buffer); + } #endif if(error==NETEVENT_NOERROR && sq->status == serviced_query_TCP_EDNS && (LDNS_RCODE_WIRE(sldns_buffer_begin(c->buffer)) == @@ -2791,8 +3151,8 @@ serviced_tcp_callback(struct comm_point* c, void* arg, int error, rep = &r2; r2.c = c; } - memcpy(&rep->addr, &sq->addr, sq->addrlen); - rep->addrlen = sq->addrlen; + memcpy(&rep->remote_addr, &sq->addr, sq->addrlen); + rep->remote_addrlen = sq->addrlen; serviced_callbacks(sq, error, c, rep); return 0; } @@ -2804,8 +3164,11 @@ serviced_tcp_initiate(struct serviced_query* sq, sldns_buffer* buff) sq->status==serviced_query_TCP_EDNS?"EDNS":""); serviced_encode(sq, buff, sq->status == serviced_query_TCP_EDNS); sq->last_sent_time = *sq->outnet->now_tv; - sq->pending = pending_tcp_query(sq, buff, TCP_AUTH_QUERY_TIMEOUT, + log_assert(!sq->busy); + sq->busy = 1; + sq->pending = pending_tcp_query(sq, buff, sq->outnet->tcp_auth_query_timeout, serviced_tcp_callback, sq); + sq->busy = 0; if(!sq->pending) { /* delete from tree so that a retry by above layer does not * clash with this entry */ @@ -2832,13 +3195,16 @@ serviced_tcp_send(struct serviced_query* sq, sldns_buffer* buff) sq->last_sent_time = *sq->outnet->now_tv; if(sq->tcp_upstream || sq->ssl_upstream) { timeout = rtt; - if(rtt >= UNKNOWN_SERVER_NICENESS && rtt < TCP_AUTH_QUERY_TIMEOUT) - timeout = TCP_AUTH_QUERY_TIMEOUT; + if(rtt >= UNKNOWN_SERVER_NICENESS && rtt < sq->outnet->tcp_auth_query_timeout) + timeout = sq->outnet->tcp_auth_query_timeout; } else { - timeout = TCP_AUTH_QUERY_TIMEOUT; + timeout = sq->outnet->tcp_auth_query_timeout; } + log_assert(!sq->busy); + sq->busy = 1; sq->pending = pending_tcp_query(sq, buff, timeout, serviced_tcp_callback, sq); + sq->busy = 0; return sq->pending != NULL; } @@ -2887,10 +3253,14 @@ serviced_udp_callback(struct comm_point* c, void* arg, int error, struct serviced_query* sq = (struct serviced_query*)arg; struct outside_network* outnet = sq->outnet; struct timeval now = *sq->outnet->now_tv; +#ifdef USE_DNSTAP + struct pending* p = (struct pending*)sq->pending; +#endif sq->pending = NULL; /* removed after callback */ if(error == NETEVENT_TIMEOUT) { - if(sq->status == serviced_query_UDP_EDNS && sq->last_rtt < 5000) { + if(sq->status == serviced_query_UDP_EDNS && sq->last_rtt < 5000 && + (serviced_query_udp_size(sq, serviced_query_UDP_EDNS_FRAG) < serviced_query_udp_size(sq, serviced_query_UDP_EDNS))) { /* fallback to 1480/1280 */ sq->status = serviced_query_UDP_EDNS_FRAG; log_name_addr(VERB_ALGO, "try edns1xx0", sq->qbuf+10, @@ -2924,12 +3294,20 @@ serviced_udp_callback(struct comm_point* c, void* arg, int error, return 0; } #ifdef USE_DNSTAP - if(error == NETEVENT_NOERROR && outnet->dtenv && - (outnet->dtenv->log_resolver_response_messages || - outnet->dtenv->log_forwarder_response_messages)) - dt_msg_send_outside_response(outnet->dtenv, &sq->addr, c->type, - sq->zone, sq->zonelen, sq->qbuf, sq->qbuflen, - &sq->last_sent_time, sq->outnet->now_tv, c->buffer); + /* + * sending src (local service)/dst (upstream) addresses over DNSTAP + */ + if(error == NETEVENT_NOERROR && outnet->dtenv && p->pc && + (outnet->dtenv->log_resolver_response_messages || + outnet->dtenv->log_forwarder_response_messages)) { + log_addr(VERB_ALGO, "response from upstream", &sq->addr, sq->addrlen); + log_addr(VERB_ALGO, "to local addr", &p->pc->pif->addr, + p->pc->pif->addrlen); + dt_msg_send_outside_response(outnet->dtenv, &sq->addr, + &p->pc->pif->addr, c->type, c->ssl, sq->zone, sq->zonelen, + sq->qbuf, sq->qbuflen, &sq->last_sent_time, + sq->outnet->now_tv, c->buffer); + } #endif if( (sq->status == serviced_query_UDP_EDNS ||sq->status == serviced_query_UDP_EDNS_FRAG) @@ -3018,64 +3396,116 @@ serviced_udp_callback(struct comm_point* c, void* arg, int error, struct serviced_query* outnet_serviced_query(struct outside_network* outnet, struct query_info* qinfo, uint16_t flags, int dnssec, int want_dnssec, - int nocaps, int tcp_upstream, int ssl_upstream, char* tls_auth_name, - struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* zone, - size_t zonelen, struct module_qstate* qstate, - comm_point_callback_type* callback, void* callback_arg, sldns_buffer* buff, - struct module_env* env) + int nocaps, int check_ratelimit, int tcp_upstream, int ssl_upstream, + char* tls_auth_name, struct sockaddr_storage* addr, socklen_t addrlen, + uint8_t* zone, size_t zonelen, struct module_qstate* qstate, + comm_point_callback_type* callback, void* callback_arg, + sldns_buffer* buff, struct module_env* env, int* was_ratelimited) { struct serviced_query* sq; struct service_callback* cb; struct edns_string_addr* client_string_addr; - - if(!inplace_cb_query_call(env, qinfo, flags, addr, addrlen, zone, zonelen, - qstate, qstate->region)) + struct regional* region; + struct edns_option* backed_up_opt_list = qstate->edns_opts_back_out; + struct edns_option* per_upstream_opt_list = NULL; + time_t timenow = 0; + + /* If we have an already populated EDNS option list make a copy since + * we may now add upstream specific EDNS options. */ + /* Use a region that could be attached to a serviced_query, if it needs + * to be created. If an existing one is found then this region will be + * destroyed here. */ + region = alloc_reg_obtain(env->alloc); + if(!region) return NULL; + if(qstate->edns_opts_back_out) { + per_upstream_opt_list = edns_opt_copy_region( + qstate->edns_opts_back_out, region); + if(!per_upstream_opt_list) { + alloc_reg_release(env->alloc, region); return NULL; + } + qstate->edns_opts_back_out = per_upstream_opt_list; + } + + if(!inplace_cb_query_call(env, qinfo, flags, addr, addrlen, zone, + zonelen, qstate, region)) { + alloc_reg_release(env->alloc, region); + return NULL; + } + /* Restore the option list; we can explicitly use the copied one from + * now on. */ + per_upstream_opt_list = qstate->edns_opts_back_out; + qstate->edns_opts_back_out = backed_up_opt_list; if((client_string_addr = edns_string_addr_lookup( &env->edns_strings->client_strings, addr, addrlen))) { - edns_opt_list_append(&qstate->edns_opts_back_out, + edns_opt_list_append(&per_upstream_opt_list, env->edns_strings->client_string_opcode, client_string_addr->string_len, - client_string_addr->string, qstate->region); + client_string_addr->string, region); } serviced_gen_query(buff, qinfo->qname, qinfo->qname_len, qinfo->qtype, qinfo->qclass, flags); sq = lookup_serviced(outnet, buff, dnssec, addr, addrlen, - qstate->edns_opts_back_out); - /* duplicate entries are included in the callback list, because - * there is a counterpart registration by our caller that needs to - * be doubly-removed (with callbacks perhaps). */ - if(!(cb = (struct service_callback*)malloc(sizeof(*cb)))) - return NULL; + per_upstream_opt_list); if(!sq) { + /* Check ratelimit only for new serviced_query */ + if(check_ratelimit) { + timenow = *env->now; + if(!infra_ratelimit_inc(env->infra_cache, zone, + zonelen, timenow, env->cfg->ratelimit_backoff, + &qstate->qinfo, qstate->reply)) { + /* Can we pass through with slip factor? */ + if(env->cfg->ratelimit_factor == 0 || + ub_random_max(env->rnd, + env->cfg->ratelimit_factor) != 1) { + *was_ratelimited = 1; + alloc_reg_release(env->alloc, region); + return NULL; + } + log_nametypeclass(VERB_ALGO, + "ratelimit allowed through for " + "delegation point", zone, + LDNS_RR_TYPE_NS, LDNS_RR_CLASS_IN); + } + } /* make new serviced query entry */ sq = serviced_create(outnet, buff, dnssec, want_dnssec, nocaps, tcp_upstream, ssl_upstream, tls_auth_name, addr, addrlen, zone, zonelen, (int)qinfo->qtype, - qstate->edns_opts_back_out, + per_upstream_opt_list, ( ssl_upstream && env->cfg->pad_queries - ? env->cfg->pad_queries_block_size : 0 )); + ? env->cfg->pad_queries_block_size : 0 ), + env->alloc, region); if(!sq) { - free(cb); + if(check_ratelimit) { + infra_ratelimit_dec(env->infra_cache, + zone, zonelen, timenow); + } return NULL; } - /* perform first network action */ - if(outnet->do_udp && !(tcp_upstream || ssl_upstream)) { - if(!serviced_udp_send(sq, buff)) { - (void)rbtree_delete(outnet->serviced, sq); - serviced_node_del(&sq->node, NULL); - free(cb); - return NULL; - } - } else { - if(!serviced_tcp_send(sq, buff)) { - (void)rbtree_delete(outnet->serviced, sq); - serviced_node_del(&sq->node, NULL); - free(cb); - return NULL; + if(!(cb = (struct service_callback*)regional_alloc( + sq->region, sizeof(*cb)))) { + if(check_ratelimit) { + infra_ratelimit_dec(env->infra_cache, + zone, zonelen, timenow); } + (void)rbtree_delete(outnet->serviced, sq); + serviced_node_del(&sq->node, NULL); + return NULL; + } + /* No network action at this point; it will be invoked with the + * serviced_query timer instead to run outside of the mesh. */ + } else { + /* We don't need this region anymore. */ + alloc_reg_release(env->alloc, region); + /* duplicate entries are included in the callback list, because + * there is a counterpart registration by our caller that needs + * to be doubly-removed (with callbacks perhaps). */ + if(!(cb = (struct service_callback*)regional_alloc( + sq->region, sizeof(*cb)))) { + return NULL; } } /* add callback to list of callbacks */ @@ -3095,7 +3525,6 @@ callback_list_remove(struct serviced_query* sq, void* cb_arg) if((*pp)->cb_arg == cb_arg) { struct service_callback* del = *pp; *pp = del->next; - free(del); return; } pp = &(*pp)->next; @@ -3104,13 +3533,13 @@ callback_list_remove(struct serviced_query* sq, void* cb_arg) void outnet_serviced_query_stop(struct serviced_query* sq, void* cb_arg) { - if(!sq) + if(!sq) return; callback_list_remove(sq, cb_arg); /* if callbacks() routine scheduled deletion, let it do that */ - if(!sq->cblist && !sq->to_be_deleted) { + if(!sq->cblist && !sq->busy && !sq->to_be_deleted) { (void)rbtree_delete(sq->outnet->serviced, sq); - serviced_delete(sq); + serviced_delete(sq); } } @@ -3202,8 +3631,8 @@ outnet_comm_point_for_udp(struct outside_network* outnet, if(fd == -1) { return NULL; } - cp = comm_point_create_udp(outnet->base, fd, outnet->udp_buff, - cb, cb_arg); + cp = comm_point_create_udp(outnet->base, fd, outnet->udp_buff, 0, + cb, cb_arg, NULL); if(!cp) { log_err("malloc failure"); close(fd); @@ -3290,8 +3719,8 @@ outnet_comm_point_for_tcp(struct outside_network* outnet, close(fd); return 0; } - cp->repinfo.addrlen = to_addrlen; - memcpy(&cp->repinfo.addr, to_addr, to_addrlen); + cp->repinfo.remote_addrlen = to_addrlen; + memcpy(&cp->repinfo.remote_addr, to_addr, to_addrlen); /* setup for SSL (if needed) */ if(ssl) { @@ -3309,15 +3738,28 @@ outnet_comm_point_for_tcp(struct outside_network* outnet, return cp; } +/** setup the User-Agent HTTP header based on http-user-agent configuration */ +static void +setup_http_user_agent(sldns_buffer* buf, struct config_file* cfg) +{ + if(cfg->hide_http_user_agent) return; + if(cfg->http_user_agent==NULL || cfg->http_user_agent[0] == 0) { + sldns_buffer_printf(buf, "User-Agent: %s/%s\r\n", PACKAGE_NAME, + PACKAGE_VERSION); + } else { + sldns_buffer_printf(buf, "User-Agent: %s\r\n", cfg->http_user_agent); + } +} + /** setup http request headers in buffer for sending query to destination */ static int -setup_http_request(sldns_buffer* buf, char* host, char* path) +setup_http_request(sldns_buffer* buf, char* host, char* path, + struct config_file* cfg) { sldns_buffer_clear(buf); sldns_buffer_printf(buf, "GET /%s HTTP/1.1\r\n", path); sldns_buffer_printf(buf, "Host: %s\r\n", host); - sldns_buffer_printf(buf, "User-Agent: unbound/%s\r\n", - PACKAGE_VERSION); + setup_http_user_agent(buf, cfg); /* We do not really do multiple queries per connection, * but this header setting is also not needed. * sldns_buffer_printf(buf, "Connection: close\r\n") */ @@ -3333,7 +3775,7 @@ struct comm_point* outnet_comm_point_for_http(struct outside_network* outnet, comm_point_callback_type* cb, void* cb_arg, struct sockaddr_storage* to_addr, socklen_t to_addrlen, int timeout, - int ssl, char* host, char* path) + int ssl, char* host, char* path, struct config_file* cfg) { /* cp calls cb with err=NETEVENT_DONE when transfer is done */ struct comm_point* cp; @@ -3353,8 +3795,8 @@ outnet_comm_point_for_http(struct outside_network* outnet, close(fd); return 0; } - cp->repinfo.addrlen = to_addrlen; - memcpy(&cp->repinfo.addr, to_addr, to_addrlen); + cp->repinfo.remote_addrlen = to_addrlen; + memcpy(&cp->repinfo.remote_addr, to_addr, to_addrlen); /* setup for SSL (if needed) */ if(ssl) { @@ -3369,7 +3811,7 @@ outnet_comm_point_for_http(struct outside_network* outnet, comm_point_start_listening(cp, fd, timeout); /* setup http request in cp->buffer */ - if(!setup_http_request(cp->buffer, host, path)) { + if(!setup_http_request(cp->buffer, host, path, cfg)) { log_err("error setting up http request"); comm_point_delete(cp); return NULL; diff --git a/contrib/unbound/services/outside_network.h b/contrib/unbound/services/outside_network.h index fe287af4fcce..467c81f60ca2 100644 --- a/contrib/unbound/services/outside_network.h +++ b/contrib/unbound/services/outside_network.h @@ -43,7 +43,9 @@ #ifndef OUTSIDE_NETWORK_H #define OUTSIDE_NETWORK_H +#include "util/alloc.h" #include "util/rbtree.h" +#include "util/regional.h" #include "util/netevent.h" #include "dnstap/dnstap_config.h" struct pending; @@ -63,6 +65,7 @@ struct edns_option; struct module_env; struct module_qstate; struct query_info; +struct config_file; /** * Send queries to outside servers and wait for answers from servers. @@ -110,6 +113,8 @@ struct outside_network { /** if we perform udp-connect, connect() for UDP socket to mitigate * ICMP side channel leakage */ int udp_connect; + /** number of udp packets sent. */ + size_t num_udp_outgoing; /** array of outgoing IP4 interfaces */ struct port_if* ip4_ifs; @@ -158,6 +163,12 @@ struct outside_network { size_t num_tcp; /** number of tcp communication points in use. */ size_t num_tcp_outgoing; + /** max number of queries on a reuse connection */ + size_t max_reuse_tcp_queries; + /** timeout for REUSE entries in milliseconds. */ + int tcp_reuse_timeout; + /** timeout in milliseconds for TCP queries to auth servers. */ + int tcp_auth_query_timeout; /** * tree of still-open and waiting tcp connections for reuse. * can be closed and reopened to get a new tcp connection. @@ -295,11 +306,6 @@ struct reuse_tcp { struct outside_network* outnet; }; -/** max number of queries on a reuse connection */ -#define MAX_REUSE_TCP_QUERIES 200 -/** timeout for REUSE entries in milliseconds. */ -#define REUSE_TIMEOUT 60000 - /** * A query that has an answer pending for it. */ @@ -344,6 +350,8 @@ struct pending { struct pending_tcp { /** next in list of free tcp comm points, or NULL. */ struct pending_tcp* next_free; + /** port for of the outgoing interface that is used */ + struct port_if* pi; /** tcp comm point it was sent on (and reply must come back on). */ struct comm_point* c; /** the query being serviced, NULL if the pending_tcp is unused. */ @@ -408,6 +416,12 @@ struct waiting_tcp { char* tls_auth_name; /** the packet was involved in an error, to stop looping errors */ int error_count; + /** if true, the item is at the cb_and_decommission stage */ + int in_cb_and_decommission; +#ifdef USE_DNSTAP + /** serviced query pointer for dnstap to get logging info, if nonNULL*/ + struct serviced_query* sq; +#endif }; /** @@ -504,6 +518,15 @@ struct serviced_query { void* pending; /** block size with which to pad encrypted queries (default: 128) */ size_t padding_block_size; + /** region for this serviced query. Will be cleared when this + * serviced_query will be deleted */ + struct regional* region; + /** allocation service for the region */ + struct alloc_cache* alloc; + /** flash timer to start the net I/O as a separate event */ + struct comm_timer* timer; + /** true if serviced_query is currently doing net I/O and may block */ + int busy; }; /** @@ -534,6 +557,9 @@ struct serviced_query { * @param tls_use_sni: if SNI is used for TLS connections. * @param dtenv: environment to send dnstap events with (if enabled). * @param udp_connect: if the udp_connect option is enabled. + * @param max_reuse_tcp_queries: max number of queries on a reuse connection. + * @param tcp_reuse_timeout: timeout for REUSE entries in milliseconds. + * @param tcp_auth_query_timeout: timeout in milliseconds for TCP queries to auth servers. * @return: the new structure (with no pending answers) or NULL on error. */ struct outside_network* outside_network_create(struct comm_base* base, @@ -543,7 +569,8 @@ struct outside_network* outside_network_create(struct comm_base* base, int numavailports, size_t unwanted_threshold, int tcp_mss, void (*unwanted_action)(void*), void* unwanted_param, int do_udp, void* sslctx, int delayclose, int tls_use_sni, struct dt_env *dtenv, - int udp_connect); + int udp_connect, int max_reuse_tcp_queries, int tcp_reuse_timeout, + int tcp_auth_query_timeout); /** * Delete outside_network structure. @@ -607,6 +634,7 @@ void pending_delete(struct outside_network* outnet, struct pending* p); * @param want_dnssec: signatures are needed, without EDNS the answer is * likely to be useless. * @param nocaps: ignore use_caps_for_id and use unperturbed qname. + * @param check_ratelimit: if set, will check ratelimit before sending out. * @param tcp_upstream: use TCP for upstream queries. * @param ssl_upstream: use SSL for upstream queries. * @param tls_auth_name: when ssl_upstream is true, use this name to check @@ -623,16 +651,18 @@ void pending_delete(struct outside_network* outnet, struct pending* p); * @param callback_arg: user argument to callback function. * @param buff: scratch buffer to create query contents in. Empty on exit. * @param env: the module environment. + * @param was_ratelimited: it will signal back if the query failed to pass the + * ratelimit check. * @return 0 on error, or pointer to serviced query that is used to answer * this serviced query may be shared with other callbacks as well. */ struct serviced_query* outnet_serviced_query(struct outside_network* outnet, struct query_info* qinfo, uint16_t flags, int dnssec, int want_dnssec, - int nocaps, int tcp_upstream, int ssl_upstream, char* tls_auth_name, - struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* zone, - size_t zonelen, struct module_qstate* qstate, + int nocaps, int check_ratelimit, int tcp_upstream, int ssl_upstream, + char* tls_auth_name, struct sockaddr_storage* addr, socklen_t addrlen, + uint8_t* zone, size_t zonelen, struct module_qstate* qstate, comm_point_callback_type* callback, void* callback_arg, - struct sldns_buffer* buff, struct module_env* env); + struct sldns_buffer* buff, struct module_env* env, int* was_ratelimited); /** * Remove service query callback. @@ -670,12 +700,52 @@ struct waiting_tcp* reuse_tcp_by_id_find(struct reuse_tcp* reuse, uint16_t id); /** insert element in tree by id */ void reuse_tree_by_id_insert(struct reuse_tcp* reuse, struct waiting_tcp* w); +/** insert element in tcp_reuse tree and LRU list */ +int reuse_tcp_insert(struct outside_network* outnet, + struct pending_tcp* pend_tcp); + +/** touch the LRU of the element */ +void reuse_tcp_lru_touch(struct outside_network* outnet, + struct reuse_tcp* reuse); + +/** remove element from tree and LRU list */ +void reuse_tcp_remove_tree_list(struct outside_network* outnet, + struct reuse_tcp* reuse); + +/** snip the last reuse_tcp element off of the LRU list if any */ +struct reuse_tcp* reuse_tcp_lru_snip(struct outside_network* outnet); + /** delete readwait waiting_tcp elements, deletes the elements in the list */ void reuse_del_readwait(rbtree_type* tree_by_id); +/** remove waiting tcp from the outnet waiting list */ +void outnet_waiting_tcp_list_remove(struct outside_network* outnet, + struct waiting_tcp* w); + +/** pop the first waiting tcp from the outnet waiting list */ +struct waiting_tcp* outnet_waiting_tcp_list_pop(struct outside_network* outnet); + +/** add waiting_tcp element to the outnet tcp waiting list */ +void outnet_waiting_tcp_list_add(struct outside_network* outnet, + struct waiting_tcp* w, int set_timer); + +/** add waiting_tcp element as first to the outnet tcp waiting list */ +void outnet_waiting_tcp_list_add_first(struct outside_network* outnet, + struct waiting_tcp* w, int reset_timer); + +/** pop the first element from the writewait list */ +struct waiting_tcp* reuse_write_wait_pop(struct reuse_tcp* reuse); + +/** remove the element from the writewait list */ +void reuse_write_wait_remove(struct reuse_tcp* reuse, struct waiting_tcp* w); + +/** push the element after the last on the writewait list */ +void reuse_write_wait_push_back(struct reuse_tcp* reuse, struct waiting_tcp* w); + /** get TCP file descriptor for address, returns -1 on failure, * tcp_mss is 0 or maxseg size to set for TCP packets. */ -int outnet_get_tcp_fd(struct sockaddr_storage* addr, socklen_t addrlen, int tcp_mss, int dscp); +int outnet_get_tcp_fd(struct sockaddr_storage* addr, socklen_t addrlen, + int tcp_mss, int dscp); /** * Create udp commpoint suitable for sending packets to the destination. @@ -729,12 +799,13 @@ struct comm_point* outnet_comm_point_for_tcp(struct outside_network* outnet, * @param ssl: set to true for https. * @param host: hostname to use for the destination. part of http request. * @param path: pathname to lookup, eg. name of the file on the destination. + * @param cfg: running configuration for User-Agent setup. * @return http_out commpoint, or NULL. */ struct comm_point* outnet_comm_point_for_http(struct outside_network* outnet, comm_point_callback_type* cb, void* cb_arg, struct sockaddr_storage* to_addr, socklen_t to_addrlen, int timeout, - int ssl, char* host, char* path); + int ssl, char* host, char* path, struct config_file* cfg); /** connect tcp connection to addr, 0 on failure */ int outnet_tcp_connect(int s, struct sockaddr_storage* addr, socklen_t addrlen); @@ -756,6 +827,9 @@ void pending_udp_timer_delay_cb(void *arg); /** callback for outgoing TCP timer event */ void outnet_tcptimer(void* arg); +/** callback to send serviced queries */ +void serviced_timer_cb(void *arg); + /** callback for serviced query UDP answers */ int serviced_udp_callback(struct comm_point* c, void* arg, int error, struct comm_reply* rep); diff --git a/contrib/unbound/services/rpz.c b/contrib/unbound/services/rpz.c index 2b6b0ac3fccf..f036cc5fd649 100644 --- a/contrib/unbound/services/rpz.c +++ b/contrib/unbound/services/rpz.c @@ -50,45 +50,50 @@ #include "util/data/dname.h" #include "util/locks.h" #include "util/regional.h" +#include "util/data/msgencode.h" +#include "services/cache/dns.h" +#include "iterator/iterator.h" +#include "iterator/iter_delegpt.h" +#include "daemon/worker.h" + +typedef struct resp_addr rpz_aclnode_type; + +struct matched_delegation_point { + uint8_t* dname; + size_t dname_len; +}; /** string for RPZ action enum */ const char* rpz_action_to_string(enum rpz_action a) { switch(a) { - case RPZ_NXDOMAIN_ACTION: return "nxdomain"; - case RPZ_NODATA_ACTION: return "nodata"; - case RPZ_PASSTHRU_ACTION: return "passthru"; - case RPZ_DROP_ACTION: return "drop"; - case RPZ_TCP_ONLY_ACTION: return "tcp_only"; - case RPZ_INVALID_ACTION: return "invalid"; - case RPZ_LOCAL_DATA_ACTION: return "local_data"; - case RPZ_DISABLED_ACTION: return "disabled"; - case RPZ_CNAME_OVERRIDE_ACTION: return "cname_override"; - case RPZ_NO_OVERRIDE_ACTION: return "no_override"; + case RPZ_NXDOMAIN_ACTION: return "rpz-nxdomain"; + case RPZ_NODATA_ACTION: return "rpz-nodata"; + case RPZ_PASSTHRU_ACTION: return "rpz-passthru"; + case RPZ_DROP_ACTION: return "rpz-drop"; + case RPZ_TCP_ONLY_ACTION: return "rpz-tcp-only"; + case RPZ_INVALID_ACTION: return "rpz-invalid"; + case RPZ_LOCAL_DATA_ACTION: return "rpz-local-data"; + case RPZ_DISABLED_ACTION: return "rpz-disabled"; + case RPZ_CNAME_OVERRIDE_ACTION: return "rpz-cname-override"; + case RPZ_NO_OVERRIDE_ACTION: return "rpz-no-override"; + default: return "rpz-unknown-action"; } - return "unknown"; } /** RPZ action enum for config string */ static enum rpz_action rpz_config_to_action(char* a) { - if(strcmp(a, "nxdomain") == 0) - return RPZ_NXDOMAIN_ACTION; - else if(strcmp(a, "nodata") == 0) - return RPZ_NODATA_ACTION; - else if(strcmp(a, "passthru") == 0) - return RPZ_PASSTHRU_ACTION; - else if(strcmp(a, "drop") == 0) - return RPZ_DROP_ACTION; - else if(strcmp(a, "tcp_only") == 0) - return RPZ_TCP_ONLY_ACTION; - else if(strcmp(a, "cname") == 0) - return RPZ_CNAME_OVERRIDE_ACTION; - else if(strcmp(a, "disabled") == 0) - return RPZ_DISABLED_ACTION; - return RPZ_INVALID_ACTION; + if(strcmp(a, "nxdomain") == 0) return RPZ_NXDOMAIN_ACTION; + else if(strcmp(a, "nodata") == 0) return RPZ_NODATA_ACTION; + else if(strcmp(a, "passthru") == 0) return RPZ_PASSTHRU_ACTION; + else if(strcmp(a, "drop") == 0) return RPZ_DROP_ACTION; + else if(strcmp(a, "tcp_only") == 0) return RPZ_TCP_ONLY_ACTION; + else if(strcmp(a, "cname") == 0) return RPZ_CNAME_OVERRIDE_ACTION; + else if(strcmp(a, "disabled") == 0) return RPZ_DISABLED_ACTION; + else return RPZ_INVALID_ACTION; } /** string for RPZ trigger enum */ @@ -96,14 +101,14 @@ static const char* rpz_trigger_to_string(enum rpz_trigger r) { switch(r) { - case RPZ_QNAME_TRIGGER: return "qname"; - case RPZ_CLIENT_IP_TRIGGER: return "client_ip"; - case RPZ_RESPONSE_IP_TRIGGER: return "response_ip"; - case RPZ_NSDNAME_TRIGGER: return "nsdname"; - case RPZ_NSIP_TRIGGER: return "nsip"; - case RPZ_INVALID_TRIGGER: return "invalid"; + case RPZ_QNAME_TRIGGER: return "rpz-qname"; + case RPZ_CLIENT_IP_TRIGGER: return "rpz-client-ip"; + case RPZ_RESPONSE_IP_TRIGGER: return "rpz-response-ip"; + case RPZ_NSDNAME_TRIGGER: return "rpz-nsdname"; + case RPZ_NSIP_TRIGGER: return "rpz-nsip"; + case RPZ_INVALID_TRIGGER: return "rpz-invalid"; + default: return "rpz-unknown-trigger"; } - return "unknown"; } /** @@ -138,6 +143,31 @@ get_tld_label(uint8_t* dname, size_t maxdnamelen) } /** + * The RR types that are to be ignored. + * DNSSEC RRs at the apex, and SOA and NS are ignored. + */ +static int +rpz_type_ignored(uint16_t rr_type) +{ + switch(rr_type) { + case LDNS_RR_TYPE_SOA: + case LDNS_RR_TYPE_NS: + case LDNS_RR_TYPE_DNAME: + /* all DNSSEC-related RRs must be ignored */ + case LDNS_RR_TYPE_DNSKEY: + case LDNS_RR_TYPE_DS: + case LDNS_RR_TYPE_RRSIG: + case LDNS_RR_TYPE_NSEC: + case LDNS_RR_TYPE_NSEC3: + case LDNS_RR_TYPE_NSEC3PARAM: + return 1; + default: + break; + } + return 0; +} + +/** * Classify RPZ action for RR type/rdata * @param rr_type: the RR type * @param rdatawl: RDATA with 2 bytes length @@ -162,6 +192,7 @@ rpz_rr_to_action(uint16_t rr_type, uint8_t* rdatawl, size_t rdatalen) case LDNS_RR_TYPE_RRSIG: case LDNS_RR_TYPE_NSEC: case LDNS_RR_TYPE_NSEC3: + case LDNS_RR_TYPE_NSEC3PARAM: return RPZ_INVALID_ACTION; case LDNS_RR_TYPE_CNAME: break; @@ -207,15 +238,15 @@ static enum localzone_type rpz_action_to_localzone_type(enum rpz_action a) { switch(a) { - case RPZ_NXDOMAIN_ACTION: return local_zone_always_nxdomain; - case RPZ_NODATA_ACTION: return local_zone_always_nodata; - case RPZ_DROP_ACTION: return local_zone_always_deny; - case RPZ_PASSTHRU_ACTION: return local_zone_always_transparent; + case RPZ_NXDOMAIN_ACTION: return local_zone_always_nxdomain; + case RPZ_NODATA_ACTION: return local_zone_always_nodata; + case RPZ_DROP_ACTION: return local_zone_always_deny; + case RPZ_PASSTHRU_ACTION: return local_zone_always_transparent; case RPZ_LOCAL_DATA_ACTION: /* fallthrough */ case RPZ_CNAME_OVERRIDE_ACTION: return local_zone_redirect; - case RPZ_INVALID_ACTION: /* fallthrough */ - case RPZ_TCP_ONLY_ACTION: /* fallthrough */ - default: return local_zone_invalid; + case RPZ_TCP_ONLY_ACTION: return local_zone_truncate; + case RPZ_INVALID_ACTION: /* fallthrough */ + default: return local_zone_invalid; } } @@ -223,15 +254,15 @@ enum respip_action rpz_action_to_respip_action(enum rpz_action a) { switch(a) { - case RPZ_NXDOMAIN_ACTION: return respip_always_nxdomain; - case RPZ_NODATA_ACTION: return respip_always_nodata; - case RPZ_DROP_ACTION: return respip_always_deny; - case RPZ_PASSTHRU_ACTION: return respip_always_transparent; - case RPZ_LOCAL_DATA_ACTION: /* fallthrough */ + case RPZ_NXDOMAIN_ACTION: return respip_always_nxdomain; + case RPZ_NODATA_ACTION: return respip_always_nodata; + case RPZ_DROP_ACTION: return respip_always_deny; + case RPZ_PASSTHRU_ACTION: return respip_always_transparent; + case RPZ_LOCAL_DATA_ACTION: /* fallthrough */ case RPZ_CNAME_OVERRIDE_ACTION: return respip_redirect; - case RPZ_INVALID_ACTION: /* fallthrough */ - case RPZ_TCP_ONLY_ACTION: /* fallthrough */ - default: return respip_invalid; + case RPZ_TCP_ONLY_ACTION: return respip_truncate; + case RPZ_INVALID_ACTION: /* fallthrough */ + default: return respip_invalid; } } @@ -239,14 +270,14 @@ static enum rpz_action localzone_type_to_rpz_action(enum localzone_type lzt) { switch(lzt) { - case local_zone_always_nxdomain: return RPZ_NXDOMAIN_ACTION; - case local_zone_always_nodata: return RPZ_NODATA_ACTION; - case local_zone_always_deny: return RPZ_DROP_ACTION; - case local_zone_always_transparent: return RPZ_PASSTHRU_ACTION; - case local_zone_redirect: return RPZ_LOCAL_DATA_ACTION; - case local_zone_invalid: - default: - return RPZ_INVALID_ACTION; + case local_zone_always_nxdomain: return RPZ_NXDOMAIN_ACTION; + case local_zone_always_nodata: return RPZ_NODATA_ACTION; + case local_zone_always_deny: return RPZ_DROP_ACTION; + case local_zone_always_transparent: return RPZ_PASSTHRU_ACTION; + case local_zone_redirect: return RPZ_LOCAL_DATA_ACTION; + case local_zone_truncate: return RPZ_TCP_ONLY_ACTION; + case local_zone_invalid: /* fallthrough */ + default: return RPZ_INVALID_ACTION; } } @@ -254,14 +285,14 @@ enum rpz_action respip_action_to_rpz_action(enum respip_action a) { switch(a) { - case respip_always_nxdomain: return RPZ_NXDOMAIN_ACTION; - case respip_always_nodata: return RPZ_NODATA_ACTION; - case respip_always_deny: return RPZ_DROP_ACTION; - case respip_always_transparent: return RPZ_PASSTHRU_ACTION; - case respip_redirect: return RPZ_LOCAL_DATA_ACTION; - case respip_invalid: - default: - return RPZ_INVALID_ACTION; + case respip_always_nxdomain: return RPZ_NXDOMAIN_ACTION; + case respip_always_nodata: return RPZ_NODATA_ACTION; + case respip_always_deny: return RPZ_DROP_ACTION; + case respip_always_transparent: return RPZ_PASSTHRU_ACTION; + case respip_redirect: return RPZ_LOCAL_DATA_ACTION; + case respip_truncate: return RPZ_TCP_ONLY_ACTION; + case respip_invalid: /* fallthrough */ + default: return RPZ_INVALID_ACTION; } } @@ -297,12 +328,55 @@ rpz_dname_to_trigger(uint8_t* dname, size_t dname_len) return RPZ_QNAME_TRIGGER; } -void rpz_delete(struct rpz* r) +static inline struct clientip_synthesized_rrset* +rpz_clientip_synthesized_set_create(void) +{ + struct clientip_synthesized_rrset* set = calloc(1, sizeof(*set)); + if(set == NULL) { + return NULL; + } + set->region = regional_create(); + if(set->region == NULL) { + free(set); + return NULL; + } + addr_tree_init(&set->entries); + lock_rw_init(&set->lock); + return set; +} + +static void +rpz_clientip_synthesized_rr_delete(rbnode_type* n, void* ATTR_UNUSED(arg)) +{ + struct clientip_synthesized_rr* r = (struct clientip_synthesized_rr*)n->key; + lock_rw_destroy(&r->lock); +#ifdef THREADS_DISABLED + (void)r; +#endif +} + +static inline void +rpz_clientip_synthesized_set_delete(struct clientip_synthesized_rrset* set) +{ + if(set == NULL) { + return; + } + lock_rw_destroy(&set->lock); + traverse_postorder(&set->entries, rpz_clientip_synthesized_rr_delete, NULL); + regional_destroy(set->region); + free(set); +} + +void +rpz_delete(struct rpz* r) { if(!r) return; local_zones_delete(r->local_zones); + local_zones_delete(r->nsdname_zones); respip_set_delete(r->respip_set); + rpz_clientip_synthesized_set_delete(r->client_set); + rpz_clientip_synthesized_set_delete(r->ns_set); regional_destroy(r->region); free(r->taglist); free(r->log_name); @@ -314,13 +388,31 @@ rpz_clear(struct rpz* r) { /* must hold write lock on auth_zone */ local_zones_delete(r->local_zones); + r->local_zones = NULL; + local_zones_delete(r->nsdname_zones); + r->nsdname_zones = NULL; respip_set_delete(r->respip_set); + r->respip_set = NULL; + rpz_clientip_synthesized_set_delete(r->client_set); + r->client_set = NULL; + rpz_clientip_synthesized_set_delete(r->ns_set); + r->ns_set = NULL; if(!(r->local_zones = local_zones_create())){ return 0; } + r->nsdname_zones = local_zones_create(); + if(r->nsdname_zones == NULL) { + return 0; + } if(!(r->respip_set = respip_set_create())) { return 0; } + if(!(r->client_set = rpz_clientip_synthesized_set_create())) { + return 0; + } + if(!(r->ns_set = rpz_clientip_synthesized_set_create())) { + return 0; + } return 1; } @@ -330,6 +422,14 @@ rpz_finish_config(struct rpz* r) lock_rw_wrlock(&r->respip_set->lock); addr_tree_init_parents(&r->respip_set->ip_tree); lock_rw_unlock(&r->respip_set->lock); + + lock_rw_wrlock(&r->client_set->lock); + addr_tree_init_parents(&r->client_set->entries); + lock_rw_unlock(&r->client_set->lock); + + lock_rw_wrlock(&r->ns_set->lock); + addr_tree_init_parents(&r->ns_set->entries); + lock_rw_unlock(&r->ns_set->lock); } /** new rrset containing CNAME override, does not yet contain a dname */ @@ -378,26 +478,30 @@ new_cname_override(struct regional* region, uint8_t* ct, size_t ctlen) return rrset; } -struct rpz* -rpz_create(struct config_auth* p) +/** delete the cname override */ +static void +delete_cname_override(struct rpz* r) { - struct rpz* r = calloc(1, sizeof(*r)); - if(!r) - goto err; - - r->region = regional_create_custom(sizeof(struct regional)); - if(!r->region) { - goto err; + if(r->cname_override) { + /* The cname override is what is allocated in the region. */ + regional_free_all(r->region); + r->cname_override = NULL; } +} - if(!(r->local_zones = local_zones_create())){ - goto err; - } - if(!(r->respip_set = respip_set_create())) { - goto err; +/** Apply rpz config elements to the rpz structure, false on failure. */ +static int +rpz_apply_cfg_elements(struct rpz* r, struct config_auth* p) +{ + if(p->rpz_taglist && p->rpz_taglistlen) { + r->taglistlen = p->rpz_taglistlen; + r->taglist = memdup(p->rpz_taglist, r->taglistlen); + if(!r->taglist) { + log_err("malloc failure on RPZ taglist alloc"); + return 0; + } } - r->taglistlen = p->rpz_taglistlen; - r->taglist = memdup(p->rpz_taglist, r->taglistlen); + if(p->rpz_action_override) { r->action_override = rpz_config_to_action(p->rpz_action_override); } @@ -409,35 +513,82 @@ rpz_create(struct config_auth* p) size_t nmlen = sizeof(nm); if(!p->rpz_cname) { - log_err("RPZ override with cname action found, but no " + log_err("rpz: override with cname action found, but no " "rpz-cname-override configured"); - goto err; + return 0; } if(sldns_str2wire_dname_buf(p->rpz_cname, nm, &nmlen) != 0) { - log_err("cannot parse RPZ cname override: %s", + log_err("rpz: cannot parse cname override: %s", p->rpz_cname); - goto err; + return 0; } r->cname_override = new_cname_override(r->region, nm, nmlen); if(!r->cname_override) { - goto err; + return 0; } } r->log = p->rpz_log; + r->signal_nxdomain_ra = p->rpz_signal_nxdomain_ra; if(p->rpz_log_name) { if(!(r->log_name = strdup(p->rpz_log_name))) { log_err("malloc failure on RPZ log_name strdup"); - goto err; + return 0; } } + return 1; +} + +struct rpz* +rpz_create(struct config_auth* p) +{ + struct rpz* r = calloc(1, sizeof(*r)); + if(!r) + goto err; + + r->region = regional_create_custom(sizeof(struct regional)); + if(!r->region) { + goto err; + } + + if(!(r->local_zones = local_zones_create())){ + goto err; + } + + r->nsdname_zones = local_zones_create(); + if(r->local_zones == NULL){ + goto err; + } + + if(!(r->respip_set = respip_set_create())) { + goto err; + } + + r->client_set = rpz_clientip_synthesized_set_create(); + if(r->client_set == NULL) { + goto err; + } + + r->ns_set = rpz_clientip_synthesized_set_create(); + if(r->ns_set == NULL) { + goto err; + } + + if(!rpz_apply_cfg_elements(r, p)) + goto err; return r; err: if(r) { if(r->local_zones) local_zones_delete(r->local_zones); + if(r->nsdname_zones) + local_zones_delete(r->nsdname_zones); if(r->respip_set) respip_set_delete(r->respip_set); + if(r->client_set != NULL) + rpz_clientip_synthesized_set_delete(r->client_set); + if(r->ns_set != NULL) + rpz_clientip_synthesized_set_delete(r->ns_set); if(r->taglist) free(r->taglist); if(r->region) @@ -447,6 +598,32 @@ err: return NULL; } +int +rpz_config(struct rpz* r, struct config_auth* p) +{ + /* If the zonefile changes, it is read later, after which + * rpz_clear and rpz_finish_config is called. */ + + /* free taglist, if any */ + if(r->taglist) { + free(r->taglist); + r->taglist = NULL; + r->taglistlen = 0; + } + + /* free logname, if any */ + if(r->log_name) { + free(r->log_name); + r->log_name = NULL; + } + + delete_cname_override(r); + + if(!rpz_apply_cfg_elements(r, p)) + return 0; + return 1; +} + /** * Remove RPZ zone name from dname * Copy dname to newdname, without the originlen number of trailing bytes @@ -466,127 +643,424 @@ strip_dname_origin(uint8_t* dname, size_t dnamelen, size_t originlen, return newdnamelen + 1; /* + 1 for root label */ } -/** Insert RR into RPZ's local-zone */ static void -rpz_insert_qname_trigger(struct rpz* r, uint8_t* dname, size_t dnamelen, - enum rpz_action a, uint16_t rrtype, uint16_t rrclass, uint32_t ttl, - uint8_t* rdata, size_t rdata_len, uint8_t* rr, size_t rr_len) +rpz_insert_local_zones_trigger(struct local_zones* lz, uint8_t* dname, + size_t dnamelen, enum rpz_action a, uint16_t rrtype, uint16_t rrclass, + uint32_t ttl, uint8_t* rdata, size_t rdata_len, uint8_t* rr, size_t rr_len) { struct local_zone* z; enum localzone_type tp = local_zone_always_transparent; int dnamelabs = dname_count_labels(dname); - char* rrstr; int newzone = 0; - if(a == RPZ_TCP_ONLY_ACTION || a == RPZ_INVALID_ACTION) { - verbose(VERB_ALGO, "RPZ: skipping unsupported action: %s", - rpz_action_to_string(a)); + if(a == RPZ_INVALID_ACTION) { + char str[255+1]; + if(rrtype == LDNS_RR_TYPE_SOA || rrtype == LDNS_RR_TYPE_NS || + rrtype == LDNS_RR_TYPE_DNAME || + rrtype == LDNS_RR_TYPE_DNSKEY || + rrtype == LDNS_RR_TYPE_RRSIG || + rrtype == LDNS_RR_TYPE_NSEC || + rrtype == LDNS_RR_TYPE_NSEC3PARAM || + rrtype == LDNS_RR_TYPE_NSEC3 || + rrtype == LDNS_RR_TYPE_DS) { + free(dname); + return; /* no need to log these types as unsupported */ + } + dname_str(dname, str); + verbose(VERB_ALGO, "rpz: qname trigger, %s skipping unsupported action: %s", + str, rpz_action_to_string(a)); free(dname); return; } - lock_rw_wrlock(&r->local_zones->lock); + lock_rw_wrlock(&lz->lock); /* exact match */ - z = local_zones_find(r->local_zones, dname, dnamelen, dnamelabs, - LDNS_RR_CLASS_IN); - if(z && a != RPZ_LOCAL_DATA_ACTION) { - rrstr = sldns_wire2str_rr(rr, rr_len); - if(!rrstr) { - log_err("malloc error while inserting RPZ qname " - "trigger"); + z = local_zones_find(lz, dname, dnamelen, dnamelabs, LDNS_RR_CLASS_IN); + if(z != NULL && a != RPZ_LOCAL_DATA_ACTION) { + char* rrstr = sldns_wire2str_rr(rr, rr_len); + if(rrstr == NULL) { + log_err("malloc error while inserting rpz nsdname trigger"); free(dname); - lock_rw_unlock(&r->local_zones->lock); + lock_rw_unlock(&lz->lock); return; } - verbose(VERB_ALGO, "RPZ: skipping duplicate record: '%s'", - rrstr); + if(rrstr[0]) + rrstr[strlen(rrstr)-1]=0; /* remove newline */ + verbose(VERB_ALGO, "rpz: skipping duplicate record: '%s'", rrstr); free(rrstr); free(dname); - lock_rw_unlock(&r->local_zones->lock); + lock_rw_unlock(&lz->lock); return; } - if(!z) { + if(z == NULL) { tp = rpz_action_to_localzone_type(a); - if(!(z = local_zones_add_zone(r->local_zones, dname, dnamelen, - dnamelabs, rrclass, tp))) { - log_warn("RPZ create failed"); - lock_rw_unlock(&r->local_zones->lock); + z = local_zones_add_zone(lz, dname, dnamelen, + dnamelabs, rrclass, tp); + if(z == NULL) { + log_warn("rpz: create failed"); + lock_rw_unlock(&lz->lock); /* dname will be free'd in failed local_zone_create() */ return; } newzone = 1; } if(a == RPZ_LOCAL_DATA_ACTION) { - rrstr = sldns_wire2str_rr(rr, rr_len); - if(!rrstr) { - log_err("malloc error while inserting RPZ qname " - "trigger"); + char* rrstr = sldns_wire2str_rr(rr, rr_len); + if(rrstr == NULL) { + log_err("malloc error while inserting rpz nsdname trigger"); free(dname); - lock_rw_unlock(&r->local_zones->lock); + lock_rw_unlock(&lz->lock); return; } lock_rw_wrlock(&z->lock); - local_zone_enter_rr(z, dname, dnamelen, dnamelabs, - rrtype, rrclass, ttl, rdata, rdata_len, rrstr); + local_zone_enter_rr(z, dname, dnamelen, dnamelabs, rrtype, + rrclass, ttl, rdata, rdata_len, rrstr); lock_rw_unlock(&z->lock); free(rrstr); } - if(!newzone) + if(!newzone) { free(dname); - lock_rw_unlock(&r->local_zones->lock); - return; + } + lock_rw_unlock(&lz->lock); +} + +static void +rpz_log_dname(char const* msg, uint8_t* dname, size_t dname_len) +{ + char buf[LDNS_MAX_DOMAINLEN+1]; + (void)dname_len; + dname_str(dname, buf); + verbose(VERB_ALGO, "rpz: %s: <%s>", msg, buf); +} + +static void +rpz_insert_qname_trigger(struct rpz* r, uint8_t* dname, size_t dnamelen, + enum rpz_action a, uint16_t rrtype, uint16_t rrclass, uint32_t ttl, + uint8_t* rdata, size_t rdata_len, uint8_t* rr, size_t rr_len) +{ + if(a == RPZ_INVALID_ACTION) { + verbose(VERB_ALGO, "rpz: skipping invalid action"); + free(dname); + return; + } + + rpz_insert_local_zones_trigger(r->local_zones, dname, dnamelen, a, rrtype, + rrclass, ttl, rdata, rdata_len, rr, rr_len); } -/** Insert RR into RPZ's respip_set */ static int -rpz_insert_response_ip_trigger(struct rpz* r, uint8_t* dname, size_t dnamelen, +rpz_strip_nsdname_suffix(uint8_t* dname, size_t maxdnamelen, + uint8_t** stripdname, size_t* stripdnamelen) +{ + uint8_t* tldstart = get_tld_label(dname, maxdnamelen); + uint8_t swap; + if(tldstart == NULL) { + if(dname == NULL) { + *stripdname = NULL; + *stripdnamelen = 0; + return 0; + } + *stripdname = memdup(dname, maxdnamelen); + if(!*stripdname) { + *stripdnamelen = 0; + log_err("malloc failure for rpz strip suffix"); + return 0; + } + *stripdnamelen = maxdnamelen; + return 1; + } + /* shorten the domain name briefly, + * then we allocate a new name with the correct length */ + swap = *tldstart; + *tldstart = 0; + (void)dname_count_size_labels(dname, stripdnamelen); + *stripdname = memdup(dname, *stripdnamelen); + *tldstart = swap; + if(!*stripdname) { + *stripdnamelen = 0; + log_err("malloc failure for rpz strip suffix"); + return 0; + } + return 1; +} + +static void +rpz_insert_nsdname_trigger(struct rpz* r, uint8_t* dname, size_t dnamelen, enum rpz_action a, uint16_t rrtype, uint16_t rrclass, uint32_t ttl, uint8_t* rdata, size_t rdata_len, uint8_t* rr, size_t rr_len) { + uint8_t* dname_stripped = NULL; + size_t dnamelen_stripped = 0; + + rpz_strip_nsdname_suffix(dname, dnamelen, &dname_stripped, + &dnamelen_stripped); + if(a == RPZ_INVALID_ACTION) { + verbose(VERB_ALGO, "rpz: skipping invalid action"); + free(dname_stripped); + return; + } + + /* dname_stripped is consumed or freed by the insert routine */ + rpz_insert_local_zones_trigger(r->nsdname_zones, dname_stripped, + dnamelen_stripped, a, rrtype, rrclass, ttl, rdata, rdata_len, + rr, rr_len); +} + +static int +rpz_insert_ipaddr_based_trigger(struct respip_set* set, struct sockaddr_storage* addr, + socklen_t addrlen, int net, enum rpz_action a, uint16_t rrtype, + uint16_t rrclass, uint32_t ttl, uint8_t* rdata, size_t rdata_len, + uint8_t* rr, size_t rr_len) +{ struct resp_addr* node; - struct sockaddr_storage addr; - socklen_t addrlen; - int net, af; char* rrstr; enum respip_action respa = rpz_action_to_respip_action(a); - if(a == RPZ_TCP_ONLY_ACTION || a == RPZ_INVALID_ACTION || - respa == respip_invalid) { - verbose(VERB_ALGO, "RPZ: skipping unsupported action: %s", - rpz_action_to_string(a)); + lock_rw_wrlock(&set->lock); + rrstr = sldns_wire2str_rr(rr, rr_len); + if(rrstr == NULL) { + log_err("malloc error while inserting rpz ipaddr based trigger"); + lock_rw_unlock(&set->lock); return 0; } - if(!netblockdnametoaddr(dname, dnamelen, &addr, &addrlen, &net, &af)) + node = respip_sockaddr_find_or_create(set, addr, addrlen, net, 1, rrstr); + if(node == NULL) { + lock_rw_unlock(&set->lock); + free(rrstr); return 0; + } - lock_rw_wrlock(&r->respip_set->lock); - rrstr = sldns_wire2str_rr(rr, rr_len); - if(!rrstr) { - log_err("malloc error while inserting RPZ respip trigger"); - lock_rw_unlock(&r->respip_set->lock); + lock_rw_wrlock(&node->lock); + lock_rw_unlock(&set->lock); + + node->action = respa; + + if(a == RPZ_LOCAL_DATA_ACTION) { + respip_enter_rr(set->region, node, rrtype, + rrclass, ttl, rdata, rdata_len, rrstr, ""); + } + + lock_rw_unlock(&node->lock); + free(rrstr); + return 1; +} + +static inline struct clientip_synthesized_rr* +rpz_clientip_ensure_entry(struct clientip_synthesized_rrset* set, + struct sockaddr_storage* addr, socklen_t addrlen, int net) +{ + int insert_ok; + struct clientip_synthesized_rr* node = + (struct clientip_synthesized_rr*)addr_tree_find(&set->entries, + addr, addrlen, net); + + if(node != NULL) { return node; } + + /* node does not yet exist => allocate one */ + node = regional_alloc_zero(set->region, sizeof(*node)); + if(node == NULL) { + log_err("out of memory"); + return NULL; + } + + lock_rw_init(&node->lock); + node->action = RPZ_INVALID_ACTION; + insert_ok = addr_tree_insert(&set->entries, &node->node, + addr, addrlen, net); + if (!insert_ok) { + log_warn("rpz: unexpected: unable to insert clientip address node"); + /* we can not free the just allocated node. + * theoretically a memleak */ + return NULL; + } + + return node; +} + +static void +rpz_report_rrset_error(const char* msg, uint8_t* rr, size_t rr_len) { + char* rrstr = sldns_wire2str_rr(rr, rr_len); + if(rrstr == NULL) { + log_err("malloc error while inserting rpz clientip based record"); + return; + } + log_err("rpz: unexpected: unable to insert %s: %s", msg, rrstr); + free(rrstr); +} + +/* from localzone.c; difference is we don't have a dname */ +static struct local_rrset* +rpz_clientip_new_rrset(struct regional* region, + struct clientip_synthesized_rr* raddr, uint16_t rrtype, uint16_t rrclass) +{ + struct packed_rrset_data* pd; + struct local_rrset* rrset = (struct local_rrset*) + regional_alloc_zero(region, sizeof(*rrset)); + if(rrset == NULL) { + log_err("out of memory"); + return NULL; + } + rrset->next = raddr->data; + raddr->data = rrset; + rrset->rrset = (struct ub_packed_rrset_key*) + regional_alloc_zero(region, sizeof(*rrset->rrset)); + if(rrset->rrset == NULL) { + log_err("out of memory"); + return NULL; + } + rrset->rrset->entry.key = rrset->rrset; + pd = (struct packed_rrset_data*)regional_alloc_zero(region, sizeof(*pd)); + if(pd == NULL) { + log_err("out of memory"); + return NULL; + } + pd->trust = rrset_trust_prim_noglue; + pd->security = sec_status_insecure; + rrset->rrset->entry.data = pd; + rrset->rrset->rk.type = htons(rrtype); + rrset->rrset->rk.rrset_class = htons(rrclass); + rrset->rrset->rk.dname = regional_alloc_zero(region, 1); + if(rrset->rrset->rk.dname == NULL) { + log_err("out of memory"); + return NULL; + } + rrset->rrset->rk.dname_len = 1; + return rrset; +} + +static int +rpz_clientip_enter_rr(struct regional* region, struct clientip_synthesized_rr* raddr, + uint16_t rrtype, uint16_t rrclass, time_t ttl, uint8_t* rdata, + size_t rdata_len) +{ + struct local_rrset* rrset; + if (rrtype == LDNS_RR_TYPE_CNAME && raddr->data != NULL) { + log_err("CNAME response-ip data can not co-exist with other " + "client-ip data"); return 0; } - if(!(node=respip_sockaddr_find_or_create(r->respip_set, &addr, addrlen, - net, 1, rrstr))) { - lock_rw_unlock(&r->respip_set->lock); - free(rrstr); + + rrset = rpz_clientip_new_rrset(region, raddr, rrtype, rrclass); + if(raddr->data == NULL) { + return 0; + } + + return rrset_insert_rr(region, rrset->rrset->entry.data, rdata, rdata_len, ttl, ""); +} + +static int +rpz_clientip_insert_trigger_rr(struct clientip_synthesized_rrset* set, struct sockaddr_storage* addr, + socklen_t addrlen, int net, enum rpz_action a, uint16_t rrtype, + uint16_t rrclass, uint32_t ttl, uint8_t* rdata, size_t rdata_len, + uint8_t* rr, size_t rr_len) +{ + struct clientip_synthesized_rr* node; + + lock_rw_wrlock(&set->lock); + + node = rpz_clientip_ensure_entry(set, addr, addrlen, net); + if(node == NULL) { + lock_rw_unlock(&set->lock); + rpz_report_rrset_error("client ip address", rr, rr_len); return 0; } lock_rw_wrlock(&node->lock); - lock_rw_unlock(&r->respip_set->lock); - node->action = respa; + lock_rw_unlock(&set->lock); + node->action = a; if(a == RPZ_LOCAL_DATA_ACTION) { - respip_enter_rr(r->respip_set->region, node, rrtype, - rrclass, ttl, rdata, rdata_len, rrstr, ""); + if(!rpz_clientip_enter_rr(set->region, node, rrtype, + rrclass, ttl, rdata, rdata_len)) { + verbose(VERB_ALGO, "rpz: unable to insert clientip rr"); + lock_rw_unlock(&node->lock); + return 0; + } + } + lock_rw_unlock(&node->lock); - free(rrstr); + return 1; } +static int +rpz_insert_clientip_trigger(struct rpz* r, uint8_t* dname, size_t dnamelen, + enum rpz_action a, uint16_t rrtype, uint16_t rrclass, uint32_t ttl, + uint8_t* rdata, size_t rdata_len, uint8_t* rr, size_t rr_len) +{ + struct sockaddr_storage addr; + socklen_t addrlen; + int net, af; + + if(a == RPZ_INVALID_ACTION) { + return 0; + } + + if(!netblockdnametoaddr(dname, dnamelen, &addr, &addrlen, &net, &af)) { + verbose(VERB_ALGO, "rpz: unable to parse client ip"); + return 0; + } + + return rpz_clientip_insert_trigger_rr(r->client_set, &addr, addrlen, net, + a, rrtype, rrclass, ttl, rdata, rdata_len, rr, rr_len); +} + +static int +rpz_insert_nsip_trigger(struct rpz* r, uint8_t* dname, size_t dnamelen, + enum rpz_action a, uint16_t rrtype, uint16_t rrclass, uint32_t ttl, + uint8_t* rdata, size_t rdata_len, uint8_t* rr, size_t rr_len) +{ + struct sockaddr_storage addr; + socklen_t addrlen; + int net, af; + + if(a == RPZ_INVALID_ACTION) { + return 0; + } + + if(!netblockdnametoaddr(dname, dnamelen, &addr, &addrlen, &net, &af)) { + verbose(VERB_ALGO, "rpz: unable to parse ns ip"); + return 0; + } + + return rpz_clientip_insert_trigger_rr(r->ns_set, &addr, addrlen, net, + a, rrtype, rrclass, ttl, rdata, rdata_len, rr, rr_len); +} + +/** Insert RR into RPZ's respip_set */ +static int +rpz_insert_response_ip_trigger(struct rpz* r, uint8_t* dname, size_t dnamelen, + enum rpz_action a, uint16_t rrtype, uint16_t rrclass, uint32_t ttl, + uint8_t* rdata, size_t rdata_len, uint8_t* rr, size_t rr_len) +{ + struct sockaddr_storage addr; + socklen_t addrlen; + int net, af; + + if(a == RPZ_INVALID_ACTION) { + return 0; + } + + if(!netblockdnametoaddr(dname, dnamelen, &addr, &addrlen, &net, &af)) { + verbose(VERB_ALGO, "rpz: unable to parse response ip"); + return 0; + } + + if(a == RPZ_INVALID_ACTION || + rpz_action_to_respip_action(a) == respip_invalid) { + char str[255+1]; + dname_str(dname, str); + verbose(VERB_ALGO, "rpz: respip trigger, %s skipping unsupported action: %s", + str, rpz_action_to_string(a)); + return 0; + } + + return rpz_insert_ipaddr_based_trigger(r->respip_set, &addr, addrlen, net, + a, rrtype, rrclass, ttl, rdata, rdata_len, rr, rr_len); +} + int rpz_insert_rr(struct rpz* r, uint8_t* azname, size_t aznamelen, uint8_t* dname, size_t dnamelen, uint16_t rr_type, uint16_t rr_class, uint32_t rr_ttl, @@ -598,15 +1072,19 @@ rpz_insert_rr(struct rpz* r, uint8_t* azname, size_t aznamelen, uint8_t* dname, enum rpz_action a; uint8_t* policydname; + if(rpz_type_ignored(rr_type)) { + /* this rpz action is not valid, eg. this is the SOA or NS RR */ + return 1; + } if(!dname_subdomain_c(dname, azname)) { char* dname_str = sldns_wire2str_dname(dname, dnamelen); char* azname_str = sldns_wire2str_dname(azname, aznamelen); if(dname_str && azname_str) { - log_err("RPZ: name of record (%s) to insert into RPZ is not a " + log_err("rpz: name of record (%s) to insert into RPZ is not a " "subdomain of the configured name of the RPZ zone (%s)", dname_str, azname_str); } else { - log_err("RPZ: name of record to insert into RPZ is not a " + log_err("rpz: name of record to insert into RPZ is not a " "subdomain of the configured name of the RPZ zone"); } free(dname_str); @@ -629,23 +1107,37 @@ rpz_insert_rr(struct rpz* r, uint8_t* azname, size_t aznamelen, uint8_t* dname, t = rpz_dname_to_trigger(policydname, policydnamelen); if(t == RPZ_INVALID_TRIGGER) { free(policydname); - verbose(VERB_ALGO, "RPZ: skipping invalid trigger"); + verbose(VERB_ALGO, "rpz: skipping invalid trigger"); return 1; } if(t == RPZ_QNAME_TRIGGER) { + /* policydname will be consumed, no free */ rpz_insert_qname_trigger(r, policydname, policydnamelen, a, rr_type, rr_class, rr_ttl, rdatawl, rdatalen, rr, rr_len); - } - else if(t == RPZ_RESPONSE_IP_TRIGGER) { + } else if(t == RPZ_RESPONSE_IP_TRIGGER) { rpz_insert_response_ip_trigger(r, policydname, policydnamelen, a, rr_type, rr_class, rr_ttl, rdatawl, rdatalen, rr, rr_len); free(policydname); - } - else { + } else if(t == RPZ_CLIENT_IP_TRIGGER) { + rpz_insert_clientip_trigger(r, policydname, policydnamelen, + a, rr_type, rr_class, rr_ttl, rdatawl, rdatalen, rr, + rr_len); + free(policydname); + } else if(t == RPZ_NSIP_TRIGGER) { + rpz_insert_nsip_trigger(r, policydname, policydnamelen, + a, rr_type, rr_class, rr_ttl, rdatawl, rdatalen, rr, + rr_len); free(policydname); - verbose(VERB_ALGO, "RPZ: skipping unsupported trigger: %s", + } else if(t == RPZ_NSDNAME_TRIGGER) { + rpz_insert_nsdname_trigger(r, policydname, policydnamelen, + a, rr_type, rr_class, rr_ttl, rdatawl, rdatalen, rr, + rr_len); + free(policydname); + } else { + free(policydname); + verbose(VERB_ALGO, "rpz: skipping unsupported trigger: %s", rpz_trigger_to_string(t)); } return 1; @@ -653,18 +1145,18 @@ rpz_insert_rr(struct rpz* r, uint8_t* azname, size_t aznamelen, uint8_t* dname, /** * Find RPZ local-zone by qname. - * @param r: rpz containing local-zone tree + * @param zones: local-zone tree * @param qname: qname * @param qname_len: length of qname * @param qclass: qclass - * @param only_exact: if 1 only excact (non wildcard) matches are returned + * @param only_exact: if 1 only exact (non wildcard) matches are returned * @param wr: get write lock for local-zone if 1, read lock if 0 * @param zones_keep_lock: if set do not release the r->local_zones lock, this * makes the caller of this function responsible for releasing the lock. * @return: NULL or local-zone holding rd or wr lock */ static struct local_zone* -rpz_find_zone(struct rpz* r, uint8_t* qname, size_t qname_len, uint16_t qclass, +rpz_find_zone(struct local_zones* zones, uint8_t* qname, size_t qname_len, uint16_t qclass, int only_exact, int wr, int zones_keep_lock) { uint8_t* ce; @@ -673,16 +1165,19 @@ rpz_find_zone(struct rpz* r, uint8_t* qname, size_t qname_len, uint16_t qclass, uint8_t wc[LDNS_MAX_DOMAINLEN+1]; int exact; struct local_zone* z = NULL; + if(wr) { - lock_rw_wrlock(&r->local_zones->lock); + lock_rw_wrlock(&zones->lock); } else { - lock_rw_rdlock(&r->local_zones->lock); + lock_rw_rdlock(&zones->lock); } - z = local_zones_find_le(r->local_zones, qname, qname_len, + z = local_zones_find_le(zones, qname, qname_len, dname_count_labels(qname), LDNS_RR_CLASS_IN, &exact); if(!z || (only_exact && !exact)) { - lock_rw_unlock(&r->local_zones->lock); + if(!zones_keep_lock) { + lock_rw_unlock(&zones->lock); + } return NULL; } if(wr) { @@ -691,7 +1186,7 @@ rpz_find_zone(struct rpz* r, uint8_t* qname, size_t qname_len, uint16_t qclass, lock_rw_rdlock(&z->lock); } if(!zones_keep_lock) { - lock_rw_unlock(&r->local_zones->lock); + lock_rw_unlock(&zones->lock); } if(exact) @@ -702,10 +1197,10 @@ rpz_find_zone(struct rpz* r, uint8_t* qname, size_t qname_len, uint16_t qclass, * zone match, append '*' to that and do another lookup. */ ce = dname_get_shared_topdomain(z->name, qname); - if(!ce /* should not happen */ || !*ce /* root */) { + if(!ce /* should not happen */) { lock_rw_unlock(&z->lock); if(zones_keep_lock) { - lock_rw_unlock(&r->local_zones->lock); + lock_rw_unlock(&zones->lock); } return NULL; } @@ -713,7 +1208,7 @@ rpz_find_zone(struct rpz* r, uint8_t* qname, size_t qname_len, uint16_t qclass, if(ce_len+2 > sizeof(wc)) { lock_rw_unlock(&z->lock); if(zones_keep_lock) { - lock_rw_unlock(&r->local_zones->lock); + lock_rw_unlock(&zones->lock); } return NULL; } @@ -724,15 +1219,15 @@ rpz_find_zone(struct rpz* r, uint8_t* qname, size_t qname_len, uint16_t qclass, if(!zones_keep_lock) { if(wr) { - lock_rw_wrlock(&r->local_zones->lock); + lock_rw_wrlock(&zones->lock); } else { - lock_rw_rdlock(&r->local_zones->lock); + lock_rw_rdlock(&zones->lock); } } - z = local_zones_find_le(r->local_zones, wc, + z = local_zones_find_le(zones, wc, ce_len+2, ce_labs+1, qclass, &exact); if(!z || !exact) { - lock_rw_unlock(&r->local_zones->lock); + lock_rw_unlock(&zones->lock); return NULL; } if(wr) { @@ -741,16 +1236,36 @@ rpz_find_zone(struct rpz* r, uint8_t* qname, size_t qname_len, uint16_t qclass, lock_rw_rdlock(&z->lock); } if(!zones_keep_lock) { - lock_rw_unlock(&r->local_zones->lock); + lock_rw_unlock(&zones->lock); } return z; } +/** Find entry for RR type in the list of rrsets for the clientip. */ +static struct local_rrset* +rpz_find_synthesized_rrset(uint16_t qtype, + struct clientip_synthesized_rr* data, int alias_ok) +{ + struct local_rrset* cursor = data->data, *cname = NULL; + while( cursor != NULL) { + struct packed_rrset_key* packed_rrset = &cursor->rrset->rk; + if(htons(qtype) == packed_rrset->type) { + return cursor; + } + if(ntohs(packed_rrset->type) == LDNS_RR_TYPE_CNAME && alias_ok) + cname = cursor; + cursor = cursor->next; + } + if(alias_ok) + return cname; + return NULL; +} + /** * Remove RR from RPZ's local-data * @param z: local-zone for RPZ, holding write lock * @param policydname: dname of RR to remove - * @param policydnamelen: lenth of policydname + * @param policydnamelen: length of policydname * @param rr_type: RR type of RR to remove * @param rdata: rdata of RR to remove * @param rdatalen: length of rdata @@ -828,18 +1343,18 @@ rpz_rrset_delete_rr(struct resp_addr* raddr, uint16_t rr_type, uint8_t* rdata, } -/** Remove RR from RPZ's local-zone */ +/** Remove RR from rpz localzones structure */ static void -rpz_remove_qname_trigger(struct rpz* r, uint8_t* dname, size_t dnamelen, - enum rpz_action a, uint16_t rr_type, uint16_t rr_class, - uint8_t* rdatawl, size_t rdatalen) +rpz_remove_local_zones_trigger(struct local_zones* zones, uint8_t* dname, + size_t dnamelen, enum rpz_action a, uint16_t rr_type, + uint16_t rr_class, uint8_t* rdatawl, size_t rdatalen) { struct local_zone* z; int delete_zone = 1; - z = rpz_find_zone(r, dname, dnamelen, rr_class, + z = rpz_find_zone(zones, dname, dnamelen, rr_class, 1 /* only exact */, 1 /* wr lock */, 1 /* keep lock*/); if(!z) { - verbose(VERB_ALGO, "RPZ: cannot remove RR from IXFR, " + verbose(VERB_ALGO, "rpz: cannot remove RR from IXFR, " "RPZ domain not found"); return; } @@ -848,15 +1363,24 @@ rpz_remove_qname_trigger(struct rpz* r, uint8_t* dname, size_t dnamelen, dnamelen, rr_type, rdatawl, rdatalen); else if(a != localzone_type_to_rpz_action(z->type)) { lock_rw_unlock(&z->lock); - lock_rw_unlock(&r->local_zones->lock); + lock_rw_unlock(&zones->lock); return; } lock_rw_unlock(&z->lock); if(delete_zone) { - local_zones_del_zone(r->local_zones, z); + local_zones_del_zone(zones, z); } - lock_rw_unlock(&r->local_zones->lock); - return; + lock_rw_unlock(&zones->lock); +} + +/** Remove RR from RPZ's local-zone */ +static void +rpz_remove_qname_trigger(struct rpz* r, uint8_t* dname, size_t dnamelen, + enum rpz_action a, uint16_t rr_type, uint16_t rr_class, + uint8_t* rdatawl, size_t rdatalen) +{ + rpz_remove_local_zones_trigger(r->local_zones, dname, dnamelen, + a, rr_type, rr_class, rdatawl, rdatalen); } static void @@ -875,7 +1399,7 @@ rpz_remove_response_ip_trigger(struct rpz* r, uint8_t* dname, size_t dnamelen, lock_rw_wrlock(&r->respip_set->lock); if(!(node = (struct resp_addr*)addr_tree_find( &r->respip_set->ip_tree, &addr, addrlen, net))) { - verbose(VERB_ALGO, "RPZ: cannot remove RR from IXFR, " + verbose(VERB_ALGO, "rpz: cannot remove RR from IXFR, " "RPZ domain not found"); lock_rw_unlock(&r->respip_set->lock); return; @@ -893,15 +1417,159 @@ rpz_remove_response_ip_trigger(struct rpz* r, uint8_t* dname, size_t dnamelen, lock_rw_unlock(&r->respip_set->lock); } +/** find and remove type from list of local_rrset entries*/ +static void +del_local_rrset_from_list(struct local_rrset** list_head, uint16_t dtype) +{ + struct local_rrset* prev=NULL, *p=*list_head; + while(p && ntohs(p->rrset->rk.type) != dtype) { + prev = p; + p = p->next; + } + if(!p) + return; /* rrset type not found */ + /* unlink it */ + if(prev) prev->next = p->next; + else *list_head = p->next; + /* no memory recycling for zone deletions ... */ +} + +/** Delete client-ip trigger RR from its RRset and perhaps also the rrset + * from the linked list. Returns if the local data is empty and the node can + * be deleted too, or not. */ +static int rpz_remove_clientip_rr(struct clientip_synthesized_rr* node, + uint16_t rr_type, uint8_t* rdatawl, size_t rdatalen) +{ + struct local_rrset* rrset; + struct packed_rrset_data* d; + size_t index; + rrset = rpz_find_synthesized_rrset(rr_type, node, 0); + if(rrset == NULL) + return 0; /* type not found, ignore */ + d = (struct packed_rrset_data*)rrset->rrset->entry.data; + if(!packed_rrset_find_rr(d, rdatawl, rdatalen, &index)) + return 0; /* RR not found, ignore */ + if(d->count == 1) { + /* regional alloc'd */ + /* delete the type entry from the list */ + del_local_rrset_from_list(&node->data, rr_type); + /* if the list is empty, the node can be removed too */ + if(node->data == NULL) + return 1; + } else if (d->count > 1) { + if(!local_rrset_remove_rr(d, index)) + return 0; + } + return 0; +} + +/** remove trigger RR from clientip_syntheized set tree. */ +static void +rpz_clientip_remove_trigger_rr(struct clientip_synthesized_rrset* set, + struct sockaddr_storage* addr, socklen_t addrlen, int net, + enum rpz_action a, uint16_t rr_type, uint8_t* rdatawl, size_t rdatalen) +{ + struct clientip_synthesized_rr* node; + int delete_node = 1; + + lock_rw_wrlock(&set->lock); + node = (struct clientip_synthesized_rr*)addr_tree_find(&set->entries, + addr, addrlen, net); + if(node == NULL) { + /* netblock not found */ + verbose(VERB_ALGO, "rpz: cannot remove RR from IXFR, " + "RPZ address, netblock not found"); + lock_rw_unlock(&set->lock); + return; + } + lock_rw_wrlock(&node->lock); + if(a == RPZ_LOCAL_DATA_ACTION) { + /* remove RR, signal whether entry can be removed */ + delete_node = rpz_remove_clientip_rr(node, rr_type, rdatawl, + rdatalen); + } else if(a != node->action) { + /* ignore the RR with different action specification */ + delete_node = 0; + } + if(delete_node) { + rbtree_delete(&set->entries, node->node.node.key); + } + lock_rw_unlock(&set->lock); + lock_rw_unlock(&node->lock); + if(delete_node) { + lock_rw_destroy(&node->lock); + } +} + +/** Remove clientip trigger RR from RPZ. */ +static void +rpz_remove_clientip_trigger(struct rpz* r, uint8_t* dname, size_t dnamelen, + enum rpz_action a, uint16_t rr_type, uint8_t* rdatawl, size_t rdatalen) +{ + struct sockaddr_storage addr; + socklen_t addrlen; + int net, af; + if(a == RPZ_INVALID_ACTION) + return; + if(!netblockdnametoaddr(dname, dnamelen, &addr, &addrlen, &net, &af)) + return; + rpz_clientip_remove_trigger_rr(r->client_set, &addr, addrlen, net, + a, rr_type, rdatawl, rdatalen); +} + +/** Remove nsip trigger RR from RPZ. */ +static void +rpz_remove_nsip_trigger(struct rpz* r, uint8_t* dname, size_t dnamelen, + enum rpz_action a, uint16_t rr_type, uint8_t* rdatawl, size_t rdatalen) +{ + struct sockaddr_storage addr; + socklen_t addrlen; + int net, af; + if(a == RPZ_INVALID_ACTION) + return; + if(!netblockdnametoaddr(dname, dnamelen, &addr, &addrlen, &net, &af)) + return; + rpz_clientip_remove_trigger_rr(r->ns_set, &addr, addrlen, net, + a, rr_type, rdatawl, rdatalen); +} + +/** Remove nsdname trigger RR from RPZ. */ +static void +rpz_remove_nsdname_trigger(struct rpz* r, uint8_t* dname, size_t dnamelen, + enum rpz_action a, uint16_t rr_type, uint16_t rr_class, + uint8_t* rdatawl, size_t rdatalen) +{ + uint8_t* dname_stripped = NULL; + size_t dnamelen_stripped = 0; + if(a == RPZ_INVALID_ACTION) + return; + if(!rpz_strip_nsdname_suffix(dname, dnamelen, &dname_stripped, + &dnamelen_stripped)) + return; + rpz_remove_local_zones_trigger(r->nsdname_zones, dname_stripped, + dnamelen_stripped, a, rr_type, rr_class, rdatawl, rdatalen); + free(dname_stripped); +} + void -rpz_remove_rr(struct rpz* r, size_t aznamelen, uint8_t* dname, size_t dnamelen, - uint16_t rr_type, uint16_t rr_class, uint8_t* rdatawl, size_t rdatalen) +rpz_remove_rr(struct rpz* r, uint8_t* azname, size_t aznamelen, uint8_t* dname, + size_t dnamelen, uint16_t rr_type, uint16_t rr_class, uint8_t* rdatawl, + size_t rdatalen) { size_t policydnamelen; enum rpz_trigger t; enum rpz_action a; uint8_t* policydname; + if(rpz_type_ignored(rr_type)) { + /* this rpz action is not valid, eg. this is the SOA or NS RR */ + return; + } + if(!dname_subdomain_c(dname, azname)) { + /* not subdomain of the RPZ zone. */ + return; + } + if(!(policydname = calloc(1, LDNS_MAX_DOMAINLEN + 1))) return; @@ -916,130 +1584,1150 @@ rpz_remove_rr(struct rpz* r, size_t aznamelen, uint8_t* dname, size_t dnamelen, return; } t = rpz_dname_to_trigger(policydname, policydnamelen); + if(t == RPZ_INVALID_TRIGGER) { + /* skipping invalid trigger */ + free(policydname); + return; + } if(t == RPZ_QNAME_TRIGGER) { rpz_remove_qname_trigger(r, policydname, policydnamelen, a, rr_type, rr_class, rdatawl, rdatalen); } else if(t == RPZ_RESPONSE_IP_TRIGGER) { rpz_remove_response_ip_trigger(r, policydname, policydnamelen, a, rr_type, rdatawl, rdatalen); + } else if(t == RPZ_CLIENT_IP_TRIGGER) { + rpz_remove_clientip_trigger(r, policydname, policydnamelen, a, + rr_type, rdatawl, rdatalen); + } else if(t == RPZ_NSIP_TRIGGER) { + rpz_remove_nsip_trigger(r, policydname, policydnamelen, a, + rr_type, rdatawl, rdatalen); + } else if(t == RPZ_NSDNAME_TRIGGER) { + rpz_remove_nsdname_trigger(r, policydname, policydnamelen, a, + rr_type, rr_class, rdatawl, rdatalen); } + /* else it was an unsupported trigger, also skipped. */ free(policydname); } /** print log information for an applied RPZ policy. Based on local-zone's * lz_inform_print(). + * The repinfo contains the reply address. If it is NULL, the module + * state is used to report the first IP address (if any). + * The dname is used, for the applied rpz, if NULL, addrnode is used. */ static void -log_rpz_apply(uint8_t* dname, enum rpz_action a, struct query_info* qinfo, - struct comm_reply* repinfo, char* log_name) +log_rpz_apply(char* trigger, uint8_t* dname, struct addr_tree_node* addrnode, + enum rpz_action a, struct query_info* qinfo, + struct comm_reply* repinfo, struct module_qstate* ms, char* log_name) { - char ip[128], txt[512]; + char ip[128], txt[512], portstr[32]; char dnamestr[LDNS_MAX_DOMAINLEN+1]; - uint16_t port = ntohs(((struct sockaddr_in*)&repinfo->addr)->sin_port); - dname_str(dname, dnamestr); - addr_to_str(&repinfo->addr, repinfo->addrlen, ip, sizeof(ip)); - if(log_name) - snprintf(txt, sizeof(txt), "RPZ applied [%s] %s %s %s@%u", - log_name, dnamestr, rpz_action_to_string(a), ip, - (unsigned)port); - else - snprintf(txt, sizeof(txt), "RPZ applied %s %s %s@%u", - dnamestr, rpz_action_to_string(a), ip, (unsigned)port); + uint16_t port = 0; + if(dname) { + dname_str(dname, dnamestr); + } else if(addrnode) { + char addrbuf[128]; + addr_to_str(&addrnode->addr, addrnode->addrlen, addrbuf, sizeof(addrbuf)); + snprintf(dnamestr, sizeof(dnamestr), "%s/%d", addrbuf, addrnode->net); + } else { + dnamestr[0]=0; + } + if(repinfo) { + addr_to_str(&repinfo->client_addr, repinfo->client_addrlen, ip, sizeof(ip)); + port = ntohs(((struct sockaddr_in*)&repinfo->client_addr)->sin_port); + } else if(ms && ms->mesh_info && ms->mesh_info->reply_list) { + addr_to_str(&ms->mesh_info->reply_list->query_reply.client_addr, + ms->mesh_info->reply_list->query_reply.client_addrlen, + ip, sizeof(ip)); + port = ntohs(((struct sockaddr_in*)&ms->mesh_info->reply_list->query_reply.client_addr)->sin_port); + } else { + ip[0]=0; + port = 0; + } + snprintf(portstr, sizeof(portstr), "@%u", (unsigned)port); + snprintf(txt, sizeof(txt), "rpz: applied %s%s%s%s%s%s %s %s%s", + (log_name?"[":""), (log_name?log_name:""), (log_name?"] ":""), + (strcmp(trigger,"qname")==0?"":trigger), + (strcmp(trigger,"qname")==0?"":" "), + dnamestr, rpz_action_to_string(a), + (ip[0]?ip:""), (ip[0]?portstr:"")); log_nametypeclass(0, txt, qinfo->qname, qinfo->qtype, qinfo->qclass); } -int -rpz_apply_qname_trigger(struct auth_zones* az, struct module_env* env, - struct query_info* qinfo, struct edns_data* edns, sldns_buffer* buf, - struct regional* temp, struct comm_reply* repinfo, - uint8_t* taglist, size_t taglen, struct ub_server_stats* stats) +static struct clientip_synthesized_rr* +rpz_ipbased_trigger_lookup(struct clientip_synthesized_rrset* set, + struct sockaddr_storage* addr, socklen_t addrlen, char* triggername) +{ + struct clientip_synthesized_rr* raddr = NULL; + enum rpz_action action = RPZ_INVALID_ACTION; + + lock_rw_rdlock(&set->lock); + + raddr = (struct clientip_synthesized_rr*)addr_tree_lookup(&set->entries, + addr, addrlen); + if(raddr != NULL) { + lock_rw_rdlock(&raddr->lock); + action = raddr->action; + if(verbosity >= VERB_ALGO) { + char ip[256], net[256]; + addr_to_str(addr, addrlen, ip, sizeof(ip)); + addr_to_str(&raddr->node.addr, raddr->node.addrlen, + net, sizeof(net)); + verbose(VERB_ALGO, "rpz: trigger %s %s/%d on %s action=%s", + triggername, net, raddr->node.net, ip, rpz_action_to_string(action)); + } + } + lock_rw_unlock(&set->lock); + + return raddr; +} + +static inline +struct clientip_synthesized_rr* +rpz_resolve_client_action_and_zone(struct auth_zones* az, struct query_info* qinfo, + struct comm_reply* repinfo, uint8_t* taglist, size_t taglen, + struct ub_server_stats* stats, + /* output parameters */ + struct local_zone** z_out, struct auth_zone** a_out, struct rpz** r_out) { + struct clientip_synthesized_rr* node = NULL; + struct auth_zone* a = NULL; struct rpz* r = NULL; - struct auth_zone* a; - int ret; - enum localzone_type lzt; struct local_zone* z = NULL; - struct local_data* ld = NULL; + lock_rw_rdlock(&az->rpz_lock); + for(a = az->rpz_first; a; a = a->rpz_az_next) { lock_rw_rdlock(&a->lock); r = a->rpz; - if(!r->disabled && (!r->taglist || taglist_intersect(r->taglist, - r->taglistlen, taglist, taglen))) { - z = rpz_find_zone(r, qinfo->qname, qinfo->qname_len, - qinfo->qclass, 0, 0, 0); - if(z && r->action_override == RPZ_DISABLED_ACTION) { - if(r->log) - log_rpz_apply(z->name, - r->action_override, - qinfo, repinfo, r->log_name); - /* TODO only register stats when stats_extended? - * */ - stats->rpz_action[r->action_override]++; + if(r->disabled) { + lock_rw_unlock(&a->lock); + continue; + } + if(r->taglist && !taglist_intersect(r->taglist, + r->taglistlen, taglist, taglen)) { + lock_rw_unlock(&a->lock); + continue; + } + z = rpz_find_zone(r->local_zones, qinfo->qname, qinfo->qname_len, + qinfo->qclass, 0, 0, 0); + node = rpz_ipbased_trigger_lookup(r->client_set, + &repinfo->client_addr, repinfo->client_addrlen, + "clientip"); + if((z || node) && r->action_override == RPZ_DISABLED_ACTION) { + if(r->log) + log_rpz_apply((node?"clientip":"qname"), + (z?z->name:NULL), + (node?&node->node:NULL), + r->action_override, + qinfo, repinfo, NULL, r->log_name); + stats->rpz_action[r->action_override]++; + if(z != NULL) { lock_rw_unlock(&z->lock); z = NULL; } - if(z) - break; + if(node != NULL) { + lock_rw_unlock(&node->lock); + node = NULL; + } } - lock_rw_unlock(&a->lock); /* not found in this auth_zone */ + if(z || node) { + break; + } + /* not found in this auth_zone */ + lock_rw_unlock(&a->lock); } + lock_rw_unlock(&az->rpz_lock); - if(!z) - return 0; /* not holding auth_zone.lock anymore */ - log_assert(r); - if(r->action_override == RPZ_NO_OVERRIDE_ACTION) - lzt = z->type; - else - lzt = rpz_action_to_localzone_type(r->action_override); + *r_out = r; + *a_out = a; + *z_out = z; - if(r->action_override == RPZ_CNAME_OVERRIDE_ACTION) { - qinfo->local_alias = - regional_alloc_zero(temp, sizeof(struct local_rrset)); - if(!qinfo->local_alias) { - lock_rw_unlock(&z->lock); - lock_rw_unlock(&a->lock); - return 0; /* out of memory */ + return node; +} + +static inline int +rpz_is_udp_query(struct comm_reply* repinfo) { + return repinfo != NULL + ? (repinfo->c != NULL + ? repinfo->c->type == comm_udp + : 0) + : 0; +} + +/** encode answer consisting of 1 rrset */ +static int +rpz_local_encode(struct module_env* env, struct query_info* qinfo, + struct edns_data* edns, struct comm_reply* repinfo, sldns_buffer* buf, + struct regional* temp, struct ub_packed_rrset_key* rrset, int ansec, + int rcode, struct ub_packed_rrset_key* soa_rrset) +{ + struct reply_info rep; + uint16_t udpsize; + struct ub_packed_rrset_key* rrsetlist[3]; + + memset(&rep, 0, sizeof(rep)); + rep.flags = (uint16_t)((BIT_QR | BIT_AA | BIT_RA) | rcode); + rep.qdcount = 1; + rep.rrset_count = ansec; + rep.rrsets = rrsetlist; + if(ansec > 0) { + rep.an_numrrsets = 1; + rep.rrsets[0] = rrset; + rep.ttl = ((struct packed_rrset_data*)rrset->entry.data)->rr_ttl[0]; + } + if(soa_rrset != NULL) { + rep.ar_numrrsets = 1; + rep.rrsets[rep.rrset_count] = soa_rrset; + rep.rrset_count ++; + if(rep.ttl < ((struct packed_rrset_data*)soa_rrset->entry.data)->rr_ttl[0]) { + rep.ttl = ((struct packed_rrset_data*)soa_rrset->entry.data)->rr_ttl[0]; } - qinfo->local_alias->rrset = - regional_alloc_init(temp, r->cname_override, - sizeof(*r->cname_override)); - if(!qinfo->local_alias->rrset) { - lock_rw_unlock(&z->lock); - lock_rw_unlock(&a->lock); - return 0; /* out of memory */ + } + + udpsize = edns->udp_size; + edns->edns_version = EDNS_ADVERTISED_VERSION; + edns->udp_size = EDNS_ADVERTISED_SIZE; + edns->ext_rcode = 0; + edns->bits &= EDNS_DO; + if(!inplace_cb_reply_local_call(env, qinfo, NULL, &rep, rcode, edns, + repinfo, temp, env->now_tv) || + !reply_info_answer_encode(qinfo, &rep, + *(uint16_t*)sldns_buffer_begin(buf), sldns_buffer_read_u16_at(buf, 2), + buf, 0, 0, temp, udpsize, edns, (int)(edns->bits&EDNS_DO), 0)) { + error_encode(buf, (LDNS_RCODE_SERVFAIL|BIT_AA), qinfo, + *(uint16_t*)sldns_buffer_begin(buf), + sldns_buffer_read_u16_at(buf, 2), edns); + } + + return 1; +} + +/** allocate SOA record ubrrsetkey in region */ +static struct ub_packed_rrset_key* +make_soa_ubrrset(struct auth_zone* auth_zone, struct auth_rrset* soa, + struct regional* temp) +{ + struct ub_packed_rrset_key csoa; + if(!soa) + return NULL; + memset(&csoa, 0, sizeof(csoa)); + csoa.entry.key = &csoa; + csoa.rk.rrset_class = htons(LDNS_RR_CLASS_IN); + csoa.rk.type = htons(LDNS_RR_TYPE_SOA); + csoa.rk.flags |= PACKED_RRSET_FIXEDTTL + | PACKED_RRSET_RPZ; + csoa.rk.dname = auth_zone->name; + csoa.rk.dname_len = auth_zone->namelen; + csoa.entry.hash = rrset_key_hash(&csoa.rk); + csoa.entry.data = soa->data; + return respip_copy_rrset(&csoa, temp); +} + +static void +rpz_apply_clientip_localdata_action(struct clientip_synthesized_rr* raddr, + struct module_env* env, struct query_info* qinfo, + struct edns_data* edns, struct comm_reply* repinfo, sldns_buffer* buf, + struct regional* temp, struct auth_zone* auth_zone) +{ + struct local_rrset* rrset; + enum rpz_action action = RPZ_INVALID_ACTION; + struct ub_packed_rrset_key* rp = NULL; + struct ub_packed_rrset_key* rsoa = NULL; + int rcode = LDNS_RCODE_NOERROR|BIT_AA; + int rrset_count = 1; + + /* prepare synthesized answer for client */ + action = raddr->action; + if(action == RPZ_LOCAL_DATA_ACTION && raddr->data == NULL ) { + verbose(VERB_ALGO, "rpz: bug: local-data action but no local data"); + return; + } + + /* check query type / rr type */ + rrset = rpz_find_synthesized_rrset(qinfo->qtype, raddr, 1); + if(rrset == NULL) { + verbose(VERB_ALGO, "rpz: unable to find local-data for query"); + rrset_count = 0; + goto nodata; + } + + rp = respip_copy_rrset(rrset->rrset, temp); + if(!rp) { + verbose(VERB_ALGO, "rpz: local data action: out of memory"); + return; + } + + rp->rk.flags |= PACKED_RRSET_FIXEDTTL | PACKED_RRSET_RPZ; + rp->rk.dname = qinfo->qname; + rp->rk.dname_len = qinfo->qname_len; + rp->entry.hash = rrset_key_hash(&rp->rk); +nodata: + if(auth_zone) { + struct auth_rrset* soa = NULL; + soa = auth_zone_get_soa_rrset(auth_zone); + if(soa) { + rsoa = make_soa_ubrrset(auth_zone, soa, temp); + if(!rsoa) { + verbose(VERB_ALGO, "rpz: local data action soa: out of memory"); + return; + } + } + } + + rpz_local_encode(env, qinfo, edns, repinfo, buf, temp, rp, + rrset_count, rcode, rsoa); +} + +/** Apply the cname override action, during worker request callback. + * false on failure. */ +static int +rpz_apply_cname_override_action(struct rpz* r, + struct query_info* qinfo, struct regional* temp) +{ + if(!r) + return 0; + qinfo->local_alias = regional_alloc_zero(temp, + sizeof(struct local_rrset)); + if(qinfo->local_alias == NULL) + return 0; /* out of memory */ + qinfo->local_alias->rrset = respip_copy_rrset(r->cname_override, temp); + if(qinfo->local_alias->rrset == NULL) { + qinfo->local_alias = NULL; + return 0; /* out of memory */ + } + qinfo->local_alias->rrset->rk.dname = qinfo->qname; + qinfo->local_alias->rrset->rk.dname_len = qinfo->qname_len; + return 1; +} + +/** add additional section SOA record to the reply. + * Since this gets fed into the normal iterator answer creation, it + * gets minimal-responses applied to it, that can remove the additional SOA + * again. */ +static int +rpz_add_soa(struct reply_info* rep, struct module_qstate* ms, + struct auth_zone* az) +{ + struct auth_rrset* soa = NULL; + struct ub_packed_rrset_key* rsoa = NULL; + struct ub_packed_rrset_key** prevrrsets; + if(!az) return 1; + soa = auth_zone_get_soa_rrset(az); + if(!soa) return 1; + if(!rep) return 0; + rsoa = make_soa_ubrrset(az, soa, ms->region); + if(!rsoa) return 0; + prevrrsets = rep->rrsets; + rep->rrsets = regional_alloc_zero(ms->region, + sizeof(*rep->rrsets)*(rep->rrset_count+1)); + if(!rep->rrsets) + return 0; + if(prevrrsets && rep->rrset_count > 0) + memcpy(rep->rrsets, prevrrsets, rep->rrset_count*sizeof(*rep->rrsets)); + rep->rrset_count++; + rep->ar_numrrsets++; + rep->rrsets[rep->rrset_count-1] = rsoa; + return 1; +} + +static inline struct dns_msg* +rpz_dns_msg_new(struct regional* region) +{ + struct dns_msg* msg = + (struct dns_msg*)regional_alloc(region, + sizeof(struct dns_msg)); + if(msg == NULL) { return NULL; } + memset(msg, 0, sizeof(struct dns_msg)); + + return msg; +} + +static inline struct dns_msg* +rpz_synthesize_nodata(struct rpz* ATTR_UNUSED(r), struct module_qstate* ms, + struct query_info* qinfo, struct auth_zone* az) +{ + struct dns_msg* msg = rpz_dns_msg_new(ms->region); + if(msg == NULL) { return msg; } + msg->qinfo = *qinfo; + msg->rep = construct_reply_info_base(ms->region, + LDNS_RCODE_NOERROR | BIT_QR | BIT_AA | BIT_RA, + 1, /* qd */ + 0, /* ttl */ + 0, /* prettl */ + 0, /* expttl */ + 0, /* an */ + 0, /* ns */ + 0, /* ar */ + 0, /* total */ + sec_status_insecure, + LDNS_EDE_NONE); + if(msg->rep) + msg->rep->authoritative = 1; + if(!rpz_add_soa(msg->rep, ms, az)) + return NULL; + return msg; +} + +static inline struct dns_msg* +rpz_synthesize_nxdomain(struct rpz* r, struct module_qstate* ms, + struct query_info* qinfo, struct auth_zone* az) +{ + struct dns_msg* msg = rpz_dns_msg_new(ms->region); + uint16_t flags; + if(msg == NULL) { return msg; } + msg->qinfo = *qinfo; + flags = LDNS_RCODE_NXDOMAIN | BIT_QR | BIT_AA | BIT_RA; + if(r->signal_nxdomain_ra) + flags &= ~BIT_RA; + msg->rep = construct_reply_info_base(ms->region, + flags, + 1, /* qd */ + 0, /* ttl */ + 0, /* prettl */ + 0, /* expttl */ + 0, /* an */ + 0, /* ns */ + 0, /* ar */ + 0, /* total */ + sec_status_insecure, + LDNS_EDE_NONE); + if(msg->rep) + msg->rep->authoritative = 1; + if(!rpz_add_soa(msg->rep, ms, az)) + return NULL; + return msg; +} + +static inline struct dns_msg* +rpz_synthesize_localdata_from_rrset(struct rpz* ATTR_UNUSED(r), struct module_qstate* ms, + struct query_info* qi, struct local_rrset* rrset, struct auth_zone* az) +{ + struct dns_msg* msg = NULL; + struct reply_info* new_reply_info; + struct ub_packed_rrset_key* rp; + + + msg = rpz_dns_msg_new(ms->region); + if(msg == NULL) { return NULL; } + + msg->qinfo = *qi; + new_reply_info = construct_reply_info_base(ms->region, + LDNS_RCODE_NOERROR | BIT_QR | BIT_AA | BIT_RA, + 1, /* qd */ + 0, /* ttl */ + 0, /* prettl */ + 0, /* expttl */ + 1, /* an */ + 0, /* ns */ + 0, /* ar */ + 1, /* total */ + sec_status_insecure, + LDNS_EDE_NONE); + if(new_reply_info == NULL) { + log_err("out of memory"); + return NULL; + } + new_reply_info->authoritative = 1; + rp = respip_copy_rrset(rrset->rrset, ms->region); + if(rp == NULL) { + log_err("out of memory"); + return NULL; + } + rp->rk.dname = qi->qname; + rp->rk.dname_len = qi->qname_len; + /* this rrset is from the rpz data, or synthesized. + * It is not actually from the network, so we flag it with this + * flags as a fake RRset. If later the cache is used to look up + * rrsets, then the fake ones are not returned (if you look without + * the flag). For like CNAME lookups from the iterator or A, AAAA + * lookups for nameserver targets, it would use the without flag + * actual data. So that the actual network data and fake data + * are kept track of separately. */ + rp->rk.flags |= PACKED_RRSET_RPZ; + new_reply_info->rrsets[0] = rp; + msg->rep = new_reply_info; + if(!rpz_add_soa(msg->rep, ms, az)) + return NULL; + return msg; +} + +static inline struct dns_msg* +rpz_synthesize_nsip_localdata(struct rpz* r, struct module_qstate* ms, + struct query_info* qi, struct clientip_synthesized_rr* data, + struct auth_zone* az) +{ + struct local_rrset* rrset; + + rrset = rpz_find_synthesized_rrset(qi->qtype, data, 1); + if(rrset == NULL) { + verbose(VERB_ALGO, "rpz: nsip: no matching local data found"); + return NULL; + } + + return rpz_synthesize_localdata_from_rrset(r, ms, qi, rrset, az); +} + +/* copy'n'paste from localzone.c */ +static struct local_rrset* +local_data_find_type(struct local_data* data, uint16_t type, int alias_ok) +{ + struct local_rrset* p, *cname = NULL; + type = htons(type); + for(p = data->rrsets; p; p = p->next) { + if(p->rrset->rk.type == type) + return p; + if(alias_ok && p->rrset->rk.type == htons(LDNS_RR_TYPE_CNAME)) + cname = p; + } + if(alias_ok) + return cname; + return NULL; +} + +/* based on localzone.c:local_data_answer() */ +static inline struct dns_msg* +rpz_synthesize_nsdname_localdata(struct rpz* r, struct module_qstate* ms, + struct query_info* qi, struct local_zone* z, + struct matched_delegation_point const* match, struct auth_zone* az) +{ + struct local_data key; + struct local_data* ld; + struct local_rrset* rrset; + + if(match->dname == NULL) { return NULL; } + + key.node.key = &key; + key.name = match->dname; + key.namelen = match->dname_len; + key.namelabs = dname_count_labels(match->dname); + + rpz_log_dname("nsdname local data", key.name, key.namelen); + + ld = (struct local_data*)rbtree_search(&z->data, &key.node); + if(ld == NULL) { + verbose(VERB_ALGO, "rpz: nsdname: impossible: qname not found"); + return NULL; + } + + rrset = local_data_find_type(ld, qi->qtype, 1); + if(rrset == NULL) { + verbose(VERB_ALGO, "rpz: nsdname: no matching local data found"); + return NULL; + } + + return rpz_synthesize_localdata_from_rrset(r, ms, qi, rrset, az); +} + +/* like local_data_answer for qname triggers after a cname */ +static struct dns_msg* +rpz_synthesize_qname_localdata_msg(struct rpz* r, struct module_qstate* ms, + struct query_info* qinfo, struct local_zone* z, struct auth_zone* az) +{ + struct local_data key; + struct local_data* ld; + struct local_rrset* rrset; + key.node.key = &key; + key.name = qinfo->qname; + key.namelen = qinfo->qname_len; + key.namelabs = dname_count_labels(qinfo->qname); + ld = (struct local_data*)rbtree_search(&z->data, &key.node); + if(ld == NULL) { + verbose(VERB_ALGO, "rpz: qname: name not found"); + return NULL; + } + rrset = local_data_find_type(ld, qinfo->qtype, 1); + if(rrset == NULL) { + verbose(VERB_ALGO, "rpz: qname: type not found"); + return NULL; + } + return rpz_synthesize_localdata_from_rrset(r, ms, qinfo, rrset, az); +} + +/** Synthesize a CNAME message for RPZ action override */ +static struct dns_msg* +rpz_synthesize_cname_override_msg(struct rpz* r, struct module_qstate* ms, + struct query_info* qinfo) +{ + struct dns_msg* msg = NULL; + struct reply_info* new_reply_info; + struct ub_packed_rrset_key* rp; + + msg = rpz_dns_msg_new(ms->region); + if(msg == NULL) { return NULL; } + + msg->qinfo = *qinfo; + new_reply_info = construct_reply_info_base(ms->region, + LDNS_RCODE_NOERROR | BIT_QR | BIT_AA | BIT_RA, + 1, /* qd */ + 0, /* ttl */ + 0, /* prettl */ + 0, /* expttl */ + 1, /* an */ + 0, /* ns */ + 0, /* ar */ + 1, /* total */ + sec_status_insecure, + LDNS_EDE_NONE); + if(new_reply_info == NULL) { + log_err("out of memory"); + return NULL; + } + new_reply_info->authoritative = 1; + + rp = respip_copy_rrset(r->cname_override, ms->region); + if(rp == NULL) { + log_err("out of memory"); + return NULL; + } + rp->rk.dname = qinfo->qname; + rp->rk.dname_len = qinfo->qname_len; + /* this rrset is from the rpz data, or synthesized. + * It is not actually from the network, so we flag it with this + * flags as a fake RRset. If later the cache is used to look up + * rrsets, then the fake ones are not returned (if you look without + * the flag). For like CNAME lookups from the iterator or A, AAAA + * lookups for nameserver targets, it would use the without flag + * actual data. So that the actual network data and fake data + * are kept track of separately. */ + rp->rk.flags |= PACKED_RRSET_RPZ; + new_reply_info->rrsets[0] = rp; + + msg->rep = new_reply_info; + return msg; +} + +static int +rpz_synthesize_qname_localdata(struct module_env* env, struct rpz* r, + struct local_zone* z, enum localzone_type lzt, struct query_info* qinfo, + struct edns_data* edns, sldns_buffer* buf, struct regional* temp, + struct comm_reply* repinfo, struct ub_server_stats* stats) +{ + struct local_data* ld = NULL; + int ret = 0; + if(r->action_override == RPZ_CNAME_OVERRIDE_ACTION) { + if(!rpz_apply_cname_override_action(r, qinfo, temp)) + return 0; + if(r->log) { + log_rpz_apply("qname", z->name, NULL, RPZ_CNAME_OVERRIDE_ACTION, + qinfo, repinfo, NULL, r->log_name); } - qinfo->local_alias->rrset->rk.dname = qinfo->qname; - qinfo->local_alias->rrset->rk.dname_len = qinfo->qname_len; - if(r->log) - log_rpz_apply(z->name, RPZ_CNAME_OVERRIDE_ACTION, - qinfo, repinfo, r->log_name); stats->rpz_action[RPZ_CNAME_OVERRIDE_ACTION]++; - lock_rw_unlock(&z->lock); - lock_rw_unlock(&a->lock); return 0; } if(lzt == local_zone_redirect && local_data_answer(z, env, qinfo, edns, repinfo, buf, temp, dname_count_labels(qinfo->qname), &ld, lzt, -1, NULL, 0, NULL, 0)) { - if(r->log) - log_rpz_apply(z->name, + if(r->log) { + log_rpz_apply("qname", z->name, NULL, localzone_type_to_rpz_action(lzt), qinfo, - repinfo, r->log_name); + repinfo, NULL, r->log_name); + } stats->rpz_action[localzone_type_to_rpz_action(lzt)]++; - lock_rw_unlock(&z->lock); - lock_rw_unlock(&a->lock); return !qinfo->local_alias; } ret = local_zones_zone_answer(z, env, qinfo, edns, repinfo, buf, temp, 0 /* no local data used */, lzt); - if(r->log) - log_rpz_apply(z->name, localzone_type_to_rpz_action(lzt), - qinfo, repinfo, r->log_name); + if(r->signal_nxdomain_ra && LDNS_RCODE_WIRE(sldns_buffer_begin(buf)) + == LDNS_RCODE_NXDOMAIN) + LDNS_RA_CLR(sldns_buffer_begin(buf)); + if(r->log) { + log_rpz_apply("qname", z->name, NULL, localzone_type_to_rpz_action(lzt), + qinfo, repinfo, NULL, r->log_name); + } stats->rpz_action[localzone_type_to_rpz_action(lzt)]++; + return ret; +} + +static struct clientip_synthesized_rr* +rpz_delegation_point_ipbased_trigger_lookup(struct rpz* rpz, struct iter_qstate* is) +{ + struct delegpt_addr* cursor; + struct clientip_synthesized_rr* action = NULL; + if(is->dp == NULL) { return NULL; } + for(cursor = is->dp->target_list; + cursor != NULL; + cursor = cursor->next_target) { + if(cursor->bogus) { continue; } + action = rpz_ipbased_trigger_lookup(rpz->ns_set, &cursor->addr, + cursor->addrlen, "nsip"); + if(action != NULL) { return action; } + } + return NULL; +} + +static struct dns_msg* +rpz_apply_nsip_trigger(struct module_qstate* ms, struct query_info* qchase, + struct rpz* r, struct clientip_synthesized_rr* raddr, + struct auth_zone* az) +{ + enum rpz_action action = raddr->action; + struct dns_msg* ret = NULL; + + if(r->action_override != RPZ_NO_OVERRIDE_ACTION) { + verbose(VERB_ALGO, "rpz: using override action=%s (replaces=%s)", + rpz_action_to_string(r->action_override), rpz_action_to_string(action)); + action = r->action_override; + } + + if(action == RPZ_LOCAL_DATA_ACTION && raddr->data == NULL) { + verbose(VERB_ALGO, "rpz: bug: nsip local data action but no local data"); + ret = rpz_synthesize_nodata(r, ms, qchase, az); + goto done; + } + + switch(action) { + case RPZ_NXDOMAIN_ACTION: + ret = rpz_synthesize_nxdomain(r, ms, qchase, az); + break; + case RPZ_NODATA_ACTION: + ret = rpz_synthesize_nodata(r, ms, qchase, az); + break; + case RPZ_TCP_ONLY_ACTION: + /* basically a passthru here but the tcp-only will be + * honored before the query gets sent. */ + ms->tcp_required = 1; + ret = NULL; + break; + case RPZ_DROP_ACTION: + ret = rpz_synthesize_nodata(r, ms, qchase, az); + ms->is_drop = 1; + break; + case RPZ_LOCAL_DATA_ACTION: + ret = rpz_synthesize_nsip_localdata(r, ms, qchase, raddr, az); + if(ret == NULL) { ret = rpz_synthesize_nodata(r, ms, qchase, az); } + break; + case RPZ_PASSTHRU_ACTION: + ret = NULL; + ms->rpz_passthru = 1; + break; + case RPZ_CNAME_OVERRIDE_ACTION: + ret = rpz_synthesize_cname_override_msg(r, ms, qchase); + break; + default: + verbose(VERB_ALGO, "rpz: nsip: bug: unhandled or invalid action: '%s'", + rpz_action_to_string(action)); + ret = NULL; + } + +done: + if(r->log) + log_rpz_apply("nsip", NULL, &raddr->node, + action, &ms->qinfo, NULL, ms, r->log_name); + if(ms->env->worker) + ms->env->worker->stats.rpz_action[action]++; + lock_rw_unlock(&raddr->lock); + return ret; +} + +static struct dns_msg* +rpz_apply_nsdname_trigger(struct module_qstate* ms, struct query_info* qchase, + struct rpz* r, struct local_zone* z, + struct matched_delegation_point const* match, struct auth_zone* az) +{ + struct dns_msg* ret = NULL; + enum rpz_action action = localzone_type_to_rpz_action(z->type); + + if(r->action_override != RPZ_NO_OVERRIDE_ACTION) { + verbose(VERB_ALGO, "rpz: using override action=%s (replaces=%s)", + rpz_action_to_string(r->action_override), rpz_action_to_string(action)); + action = r->action_override; + } + + switch(action) { + case RPZ_NXDOMAIN_ACTION: + ret = rpz_synthesize_nxdomain(r, ms, qchase, az); + break; + case RPZ_NODATA_ACTION: + ret = rpz_synthesize_nodata(r, ms, qchase, az); + break; + case RPZ_TCP_ONLY_ACTION: + /* basically a passthru here but the tcp-only will be + * honored before the query gets sent. */ + ms->tcp_required = 1; + ret = NULL; + break; + case RPZ_DROP_ACTION: + ret = rpz_synthesize_nodata(r, ms, qchase, az); + ms->is_drop = 1; + break; + case RPZ_LOCAL_DATA_ACTION: + ret = rpz_synthesize_nsdname_localdata(r, ms, qchase, z, match, az); + if(ret == NULL) { ret = rpz_synthesize_nodata(r, ms, qchase, az); } + break; + case RPZ_PASSTHRU_ACTION: + ret = NULL; + ms->rpz_passthru = 1; + break; + case RPZ_CNAME_OVERRIDE_ACTION: + ret = rpz_synthesize_cname_override_msg(r, ms, qchase); + break; + default: + verbose(VERB_ALGO, "rpz: nsdname: bug: unhandled or invalid action: '%s'", + rpz_action_to_string(action)); + ret = NULL; + } + + if(r->log) + log_rpz_apply("nsdname", match->dname, NULL, + action, &ms->qinfo, NULL, ms, r->log_name); + if(ms->env->worker) + ms->env->worker->stats.rpz_action[action]++; + lock_rw_unlock(&z->lock); + return ret; +} + +static struct local_zone* +rpz_delegation_point_zone_lookup(struct delegpt* dp, struct local_zones* zones, + uint16_t qclass, + /* output parameter */ + struct matched_delegation_point* match) +{ + struct delegpt_ns* nameserver; + struct local_zone* z = NULL; + + /* the rpz specs match the nameserver names (NS records), not the + * name of the delegation point itself, to the nsdname triggers */ + for(nameserver = dp->nslist; + nameserver != NULL; + nameserver = nameserver->next) { + z = rpz_find_zone(zones, nameserver->name, nameserver->namelen, + qclass, 0, 0, 0); + if(z != NULL) { + match->dname = nameserver->name; + match->dname_len = nameserver->namelen; + if(verbosity >= VERB_ALGO) { + char nm[255+1], zn[255+1]; + dname_str(match->dname, nm); + dname_str(z->name, zn); + if(strcmp(nm, zn) != 0) + verbose(VERB_ALGO, "rpz: trigger nsdname %s on %s action=%s", + zn, nm, rpz_action_to_string(localzone_type_to_rpz_action(z->type))); + else + verbose(VERB_ALGO, "rpz: trigger nsdname %s action=%s", + nm, rpz_action_to_string(localzone_type_to_rpz_action(z->type))); + } + break; + } + } + + return z; +} + +struct dns_msg* +rpz_callback_from_iterator_module(struct module_qstate* ms, struct iter_qstate* is) +{ + struct auth_zones* az; + struct auth_zone* a; + struct clientip_synthesized_rr* raddr = NULL; + struct rpz* r = NULL; + struct local_zone* z = NULL; + struct matched_delegation_point match = {0}; + + if(ms->rpz_passthru) { + verbose(VERB_ALGO, "query is rpz_passthru, no further processing"); + return NULL; + } + + if(ms->env == NULL || ms->env->auth_zones == NULL) { return 0; } + + az = ms->env->auth_zones; + + verbose(VERB_ALGO, "rpz: iterator module callback: have_rpz=%d", az->rpz_first != NULL); + + lock_rw_rdlock(&az->rpz_lock); + + /* precedence of RPZ works, loosely, like this: + * CNAMEs in order of the CNAME chain. rpzs in the order they are + * configured. In an RPZ: first client-IP addr, then QNAME, then + * response IP, then NSDNAME, then NSIP. Longest match first. Smallest + * one from a set. */ + /* we use the precedence rules for the topics and triggers that + * are pertinent at this stage of the resolve processing */ + for(a = az->rpz_first; a != NULL; a = a->rpz_az_next) { + lock_rw_rdlock(&a->lock); + r = a->rpz; + if(r->disabled) { + lock_rw_unlock(&a->lock); + continue; + } + + /* the nsdname has precedence over the nsip triggers */ + z = rpz_delegation_point_zone_lookup(is->dp, r->nsdname_zones, + is->qchase.qclass, &match); + if(z != NULL) { + lock_rw_unlock(&a->lock); + break; + } + + raddr = rpz_delegation_point_ipbased_trigger_lookup(r, is); + if(raddr != NULL) { + lock_rw_unlock(&a->lock); + break; + } + lock_rw_unlock(&a->lock); + } + + lock_rw_unlock(&az->rpz_lock); + + if(raddr == NULL && z == NULL) + return NULL; + + if(raddr != NULL) { + if(z) { + lock_rw_unlock(&z->lock); + } + return rpz_apply_nsip_trigger(ms, &is->qchase, r, raddr, a); + } + return rpz_apply_nsdname_trigger(ms, &is->qchase, r, z, &match, a); +} + +struct dns_msg* rpz_callback_from_iterator_cname(struct module_qstate* ms, + struct iter_qstate* is) +{ + struct auth_zones* az; + struct auth_zone* a = NULL; + struct rpz* r = NULL; + struct local_zone* z = NULL; + enum localzone_type lzt; + struct dns_msg* ret = NULL; + + if(ms->rpz_passthru) { + verbose(VERB_ALGO, "query is rpz_passthru, no further processing"); + return NULL; + } + + if(ms->env == NULL || ms->env->auth_zones == NULL) { return 0; } + az = ms->env->auth_zones; + + lock_rw_rdlock(&az->rpz_lock); + + for(a = az->rpz_first; a; a = a->rpz_az_next) { + lock_rw_rdlock(&a->lock); + r = a->rpz; + if(r->disabled) { + lock_rw_unlock(&a->lock); + continue; + } + z = rpz_find_zone(r->local_zones, is->qchase.qname, + is->qchase.qname_len, is->qchase.qclass, 0, 0, 0); + if(z && r->action_override == RPZ_DISABLED_ACTION) { + if(r->log) + log_rpz_apply("qname", z->name, NULL, + r->action_override, + &ms->qinfo, NULL, ms, r->log_name); + if(ms->env->worker) + ms->env->worker->stats.rpz_action[r->action_override]++; + lock_rw_unlock(&z->lock); + z = NULL; + } + if(z) { + break; + } + /* not found in this auth_zone */ + lock_rw_unlock(&a->lock); + } + lock_rw_unlock(&az->rpz_lock); + + if(z == NULL) + return NULL; + if(r->action_override == RPZ_NO_OVERRIDE_ACTION) { + lzt = z->type; + } else { + lzt = rpz_action_to_localzone_type(r->action_override); + } + + if(verbosity >= VERB_ALGO) { + char nm[255+1], zn[255+1]; + dname_str(is->qchase.qname, nm); + dname_str(z->name, zn); + if(strcmp(zn, nm) != 0) + verbose(VERB_ALGO, "rpz: qname trigger %s on %s, with action=%s", + zn, nm, rpz_action_to_string(localzone_type_to_rpz_action(lzt))); + else + verbose(VERB_ALGO, "rpz: qname trigger %s, with action=%s", + nm, rpz_action_to_string(localzone_type_to_rpz_action(lzt))); + } + switch(localzone_type_to_rpz_action(lzt)) { + case RPZ_NXDOMAIN_ACTION: + ret = rpz_synthesize_nxdomain(r, ms, &is->qchase, a); + break; + case RPZ_NODATA_ACTION: + ret = rpz_synthesize_nodata(r, ms, &is->qchase, a); + break; + case RPZ_TCP_ONLY_ACTION: + /* basically a passthru here but the tcp-only will be + * honored before the query gets sent. */ + ms->tcp_required = 1; + ret = NULL; + break; + case RPZ_DROP_ACTION: + ret = rpz_synthesize_nodata(r, ms, &is->qchase, a); + ms->is_drop = 1; + break; + case RPZ_LOCAL_DATA_ACTION: + ret = rpz_synthesize_qname_localdata_msg(r, ms, &is->qchase, z, a); + if(ret == NULL) { ret = rpz_synthesize_nodata(r, ms, &is->qchase, a); } + break; + case RPZ_PASSTHRU_ACTION: + ret = NULL; + ms->rpz_passthru = 1; + break; + default: + verbose(VERB_ALGO, "rpz: qname trigger: bug: unhandled or invalid action: '%s'", + rpz_action_to_string(localzone_type_to_rpz_action(lzt))); + ret = NULL; + } + if(r->log) + log_rpz_apply("qname", (z?z->name:NULL), NULL, + localzone_type_to_rpz_action(lzt), + &is->qchase, NULL, ms, r->log_name); + lock_rw_unlock(&z->lock); + lock_rw_unlock(&a->lock); + return ret; +} + +static int +rpz_apply_maybe_clientip_trigger(struct auth_zones* az, struct module_env* env, + struct query_info* qinfo, struct edns_data* edns, struct comm_reply* repinfo, + uint8_t* taglist, size_t taglen, struct ub_server_stats* stats, + sldns_buffer* buf, struct regional* temp, + /* output parameters */ + struct local_zone** z_out, struct auth_zone** a_out, struct rpz** r_out, + int* passthru) +{ + int ret = 0; + enum rpz_action client_action; + struct clientip_synthesized_rr* node = rpz_resolve_client_action_and_zone( + az, qinfo, repinfo, taglist, taglen, stats, z_out, a_out, r_out); + + client_action = ((node == NULL) ? RPZ_INVALID_ACTION : node->action); + if(node != NULL && *r_out && + (*r_out)->action_override != RPZ_NO_OVERRIDE_ACTION) { + client_action = (*r_out)->action_override; + } + if(client_action == RPZ_PASSTHRU_ACTION) { + if(*r_out && (*r_out)->log) + log_rpz_apply( + (node?"clientip":"qname"), + ((*z_out)?(*z_out)->name:NULL), + (node?&node->node:NULL), + client_action, qinfo, repinfo, NULL, + (*r_out)->log_name); + *passthru = 1; + ret = 0; + goto done; + } + if(*z_out == NULL || (client_action != RPZ_INVALID_ACTION && + client_action != RPZ_PASSTHRU_ACTION)) { + if(client_action == RPZ_PASSTHRU_ACTION + || client_action == RPZ_INVALID_ACTION + || (client_action == RPZ_TCP_ONLY_ACTION + && !rpz_is_udp_query(repinfo))) { + ret = 0; + goto done; + } + stats->rpz_action[client_action]++; + if(client_action == RPZ_LOCAL_DATA_ACTION) { + rpz_apply_clientip_localdata_action(node, env, qinfo, + edns, repinfo, buf, temp, *a_out); + ret = 1; + } else if(client_action == RPZ_CNAME_OVERRIDE_ACTION) { + if(!rpz_apply_cname_override_action(*r_out, qinfo, + temp)) { + ret = 0; + goto done; + } + ret = 0; + } else { + local_zones_zone_answer(*z_out /*likely NULL, no zone*/, env, qinfo, edns, + repinfo, buf, temp, 0 /* no local data used */, + rpz_action_to_localzone_type(client_action)); + if(*r_out && (*r_out)->signal_nxdomain_ra && + LDNS_RCODE_WIRE(sldns_buffer_begin(buf)) + == LDNS_RCODE_NXDOMAIN) + LDNS_RA_CLR(sldns_buffer_begin(buf)); + ret = 1; + } + if(*r_out && (*r_out)->log) + log_rpz_apply( + (node?"clientip":"qname"), + ((*z_out)?(*z_out)->name:NULL), + (node?&node->node:NULL), + client_action, qinfo, repinfo, NULL, + (*r_out)->log_name); + goto done; + } + ret = -1; +done: + if(node != NULL) { + lock_rw_unlock(&node->lock); + } + return ret; +} + +int +rpz_callback_from_worker_request(struct auth_zones* az, struct module_env* env, + struct query_info* qinfo, struct edns_data* edns, sldns_buffer* buf, + struct regional* temp, struct comm_reply* repinfo, uint8_t* taglist, + size_t taglen, struct ub_server_stats* stats, int* passthru) +{ + struct rpz* r = NULL; + struct auth_zone* a = NULL; + struct local_zone* z = NULL; + int ret; + enum localzone_type lzt; + + int clientip_trigger = rpz_apply_maybe_clientip_trigger(az, env, qinfo, + edns, repinfo, taglist, taglen, stats, buf, temp, &z, &a, &r, + passthru); + if(clientip_trigger >= 0) { + if(a) { + lock_rw_unlock(&a->lock); + } + if(z) { + lock_rw_unlock(&z->lock); + } + return clientip_trigger; + } + + if(z == NULL) { + if(a) { + lock_rw_unlock(&a->lock); + } + return 0; + } + + log_assert(r); + + if(r->action_override == RPZ_NO_OVERRIDE_ACTION) { + lzt = z->type; + } else { + lzt = rpz_action_to_localzone_type(r->action_override); + } + if(r->action_override == RPZ_PASSTHRU_ACTION || + lzt == local_zone_always_transparent /* RPZ_PASSTHRU_ACTION */) { + *passthru = 1; + } + + if(verbosity >= VERB_ALGO) { + char nm[255+1], zn[255+1]; + dname_str(qinfo->qname, nm); + dname_str(z->name, zn); + if(strcmp(zn, nm) != 0) + verbose(VERB_ALGO, "rpz: qname trigger %s on %s with action=%s", + zn, nm, rpz_action_to_string(localzone_type_to_rpz_action(lzt))); + else + verbose(VERB_ALGO, "rpz: qname trigger %s with action=%s", + nm, rpz_action_to_string(localzone_type_to_rpz_action(lzt))); + } + + ret = rpz_synthesize_qname_localdata(env, r, z, lzt, qinfo, edns, buf, temp, + repinfo, stats); + lock_rw_unlock(&z->lock); lock_rw_unlock(&a->lock); diff --git a/contrib/unbound/services/rpz.h b/contrib/unbound/services/rpz.h index d5996a6cfa26..7f409087f772 100644 --- a/contrib/unbound/services/rpz.h +++ b/contrib/unbound/services/rpz.h @@ -50,6 +50,7 @@ #include "sldns/sbuffer.h" #include "daemon/stats.h" #include "respip/respip.h" +struct iter_qstate; /** * RPZ triggers, only the QNAME trigger is currently supported in Unbound. @@ -83,6 +84,24 @@ enum rpz_action { RPZ_CNAME_OVERRIDE_ACTION, /* RPZ CNAME action override*/ }; +struct clientip_synthesized_rrset { + struct regional* region; + struct rbtree_type entries; + /** lock on the entries tree */ + lock_rw_type lock; +}; + +struct clientip_synthesized_rr { + /** node in address tree */ + struct addr_tree_node node; + /** lock on the node item */ + lock_rw_type lock; + /** action for this address span */ + enum rpz_action action; + /** "local data" for this node */ + struct local_rrset* data; +}; + /** * RPZ containing policies. Pointed to from corresponding auth-zone. Part of a * linked list to keep configuration order. Iterating or changing the linked @@ -92,12 +111,17 @@ enum rpz_action { struct rpz { struct local_zones* local_zones; struct respip_set* respip_set; + struct clientip_synthesized_rrset* client_set; + struct clientip_synthesized_rrset* ns_set; + struct local_zones* nsdname_zones; uint8_t* taglist; size_t taglistlen; enum rpz_action action_override; struct ub_packed_rrset_key* cname_override; int log; char* log_name; + /** signal NXDOMAIN blocked with unset RA flag */ + int signal_nxdomain_ra; struct regional* region; int disabled; }; @@ -125,6 +149,7 @@ int rpz_insert_rr(struct rpz* r, uint8_t* azname, size_t aznamelen, uint8_t* dna /** * Delete policy matching RR, used for IXFR. * @param r: the rpz to add the policy to. + * @param azname: dname of the auth-zone * @param aznamelen: the length of the auth-zone name * @param dname: dname of the RR * @param dnamelen: length of the dname @@ -133,9 +158,9 @@ int rpz_insert_rr(struct rpz* r, uint8_t* azname, size_t aznamelen, uint8_t* dna * @param rdatawl: rdata of the RR, prepended with the rdata size * @param rdatalen: length if the RR, including the prepended rdata size */ -void rpz_remove_rr(struct rpz* r, size_t aznamelen, uint8_t* dname, - size_t dnamelen, uint16_t rr_type, uint16_t rr_class, uint8_t* rdatawl, - size_t rdatalen); +void rpz_remove_rr(struct rpz* r, uint8_t* azname, size_t aznamelen, + uint8_t* dname, size_t dnamelen, uint16_t rr_type, uint16_t rr_class, + uint8_t* rdatawl, size_t rdatalen); /** * Walk over the RPZ zones to find and apply a QNAME trigger policy. @@ -147,14 +172,39 @@ void rpz_remove_rr(struct rpz* r, size_t aznamelen, uint8_t* dname, * @param temp: scratchpad * @param repinfo: reply info * @param taglist: taglist to lookup. - * @param taglen: lenth of taglist. + * @param taglen: length of taglist. * @param stats: worker stats struct + * @param passthru: returns if the query can passthru further rpz processing. * @return: 1 if client answer is ready, 0 to continue resolving */ -int rpz_apply_qname_trigger(struct auth_zones* az, struct module_env* env, +int rpz_callback_from_worker_request(struct auth_zones* az, struct module_env* env, struct query_info* qinfo, struct edns_data* edns, sldns_buffer* buf, struct regional* temp, struct comm_reply* repinfo, - uint8_t* taglist, size_t taglen, struct ub_server_stats* stats); + uint8_t* taglist, size_t taglen, struct ub_server_stats* stats, + int* passthru); + +/** + * Callback to process when the iterator module is about to send queries. + * Checks for nsip and nsdname triggers. + * @param qstate: the query state. + * @param iq: iterator module query state. + * @return NULL if nothing is done. Or a new message with the contents from + * the rpz, based on the delegation point. It is allocated in the + * qstate region. + */ +struct dns_msg* rpz_callback_from_iterator_module(struct module_qstate* qstate, + struct iter_qstate* iq); + +/** + * Callback to process when the iterator module has followed a cname. + * There can be a qname trigger for the new query name. + * @param qstate: the query state. + * @param iq: iterator module query state. + * @return NULL if nothing is done. Or a new message with the contents from + * the rpz, based on the iq.qchase. It is allocated in the qstate region. + */ +struct dns_msg* rpz_callback_from_iterator_cname(struct module_qstate* qstate, + struct iter_qstate* iq); /** * Delete RPZ @@ -176,6 +226,14 @@ int rpz_clear(struct rpz* r); struct rpz* rpz_create(struct config_auth* p); /** + * Change config on rpz, after reload. + * @param r: the rpz structure. + * @param p: the config that was read. + * @return false on failure. + */ +int rpz_config(struct rpz* r, struct config_auth* p); + +/** * String for RPZ action enum * @param a: RPZ action to get string for * @return: string for RPZ action @@ -186,7 +244,7 @@ enum rpz_action respip_action_to_rpz_action(enum respip_action a); /** - * Prepare RPZ after procesing feed content. + * Prepare RPZ after processing feed content. * @param r: RPZ to use */ void rpz_finish_config(struct rpz* r); diff --git a/contrib/unbound/services/view.c b/contrib/unbound/services/view.c index db48ae9545f8..72f3643184ee 100644 --- a/contrib/unbound/services/view.c +++ b/contrib/unbound/services/view.c @@ -66,8 +66,9 @@ views_create(void) return v; } -/** This prototype is defined in in respip.h, but we want to avoid - * unnecessary dependencies */ +/* \noop (ignore this comment for doxygen) + * This prototype is defined in in respip.h, but we want to avoid + * unnecessary dependencies */ void respip_set_delete(struct respip_set *set); void diff --git a/contrib/unbound/services/view.h b/contrib/unbound/services/view.h index 17778100474b..12f7a64e7171 100644 --- a/contrib/unbound/services/view.h +++ b/contrib/unbound/services/view.h @@ -126,7 +126,8 @@ void view_delete(struct view* v); */ void views_print(struct views* v); -/* Find a view by name. +/** + * Find a view by name. * @param vs: views * @param name: name of the view we are looking for * @param write: 1 for obtaining write lock on found view, 0 for read lock |