Skip to content

Commit

Permalink
usrloc HA: Improve management for replicated "labels"
Browse files Browse the repository at this point in the history
This patch aims to fix the occasional "differring rlabels"
debugging error messages which may occur after a restart in usrloc HA
scenarios, especially if the active SIP box is restarted while
processing REGISTER requests in parallel.

Since both record and contact labels are dynamic, per-instance and lost
on restart, conflicts are to be expected.  The idea behind the fix is to
simply adapt the replicated contact_id to the local instance, thus
preferring the local labels to the remote labels whenever detecting
conflict.
  • Loading branch information
liviuchircu committed Nov 27, 2024
1 parent 91c523c commit 90a0079
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 43 deletions.
4 changes: 2 additions & 2 deletions modules/usrloc/dlist.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ udomain_t* get_next_udomain(udomain_t *_d);

/*contact label may not be higher than 14 bits*/
#define CLABEL_MASK ((1<<14)-1)
#define CLABEL_INC_AND_TEST(_clabel_) ((_clabel_+1)&CLABEL_MASK)
#define CLABEL_NEXT(_clabel_) ((_clabel_+1)&CLABEL_MASK)
#define CID_GET_CLABEL(_cid) (_cid&CLABEL_MASK)
#define CID_NEXT_RLABEL(_dom, _sl) (_dom->table[_sl].next_label++)

Expand All @@ -123,7 +123,7 @@ static inline void init_urecord_labels(urecord_t *r, udomain_t *d)
static inline uint64_t
pack_indexes(unsigned short aorhash, unsigned int rlabel, unsigned short clabel)
{
return (clabel & CLABEL_MASK) +
return ((uint64_t)clabel & CLABEL_MASK) +
((uint64_t)rlabel << 14) + ((uint64_t)aorhash << 46);
}

Expand Down
18 changes: 9 additions & 9 deletions modules/usrloc/ucontact.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,10 @@ struct ct_match {
*/
typedef struct ucontact {
uint64_t contact_id; /*!< 64 bit Contact identifier
0-------0-------------0---------------0
|0 - 13 | 14 - 45 | 46 - 61 |
|aorhash| record label| contact label |
0-------0-------------0---------------0
0---------------0--------------0---------------0
| 0 - 13 | 14 - 45 | 46 - 63 |
| contact label | record label | aorhash |
0---------------0--------------0---------------0
*/
str* domain; /*!< Pointer to domain name (NULL terminated) */
str* aor; /*!< Pointer to the AOR string in record structure*/
Expand All @@ -127,7 +127,7 @@ typedef struct ucontact {
unsigned int methods; /*!< Supported methods */
str attr; /*!< Additional registration info */
struct proxy_l next_hop;/*!< SIP-wise determined next hop */
unsigned int label; /*!< label to find the contact in contact list>*/
unsigned short label; /*!< label to find the contact in contact list>*/
int sipping_latency; /*!< useconds; not restart-persistent >*/
str shtag; /*!< helps determine the logical owner node */
str cdb_key; /*!< the key of the contact in cache_db; makes
Expand All @@ -144,10 +144,10 @@ typedef struct ucontact {

typedef struct ucontact_info {
uint64_t contact_id; /*!< 64 bit Contact identifier
0-------0-------------0---------------0
|0 - 15 | 16 - 47 | 48 - 63 |
|aorhash| record label| contact label |
0-------0-------------0---------------0
0---------------0--------------0---------------0
| 0 - 13 | 14 - 45 | 46 - 63 |
| contact label | record label | aorhash |
0---------------0--------------0---------------0
*/
str* c;
str received;
Expand Down
4 changes: 2 additions & 2 deletions modules/usrloc/udomain.c
Original file line number Diff line number Diff line change
Expand Up @@ -618,7 +618,7 @@ int preload_udomain(db_con_t* _c, udomain_t* _d)
_d->table[sl].next_label = rlabel + 1;

if (r->next_clabel <= clabel || r->next_clabel == 0)
r->next_clabel = CLABEL_INC_AND_TEST(clabel);
r->next_clabel = CLABEL_NEXT(clabel);

r->label = rlabel;
}
Expand Down Expand Up @@ -658,7 +658,7 @@ int preload_udomain(db_con_t* _c, udomain_t* _d)
if (cid_regen && old_expires) {
/* rebuild the contact id for this contact */
ci->contact_id = pack_indexes(r->aorhash, r->label, r->next_clabel);
r->next_clabel = CLABEL_INC_AND_TEST(r->next_clabel);
r->next_clabel = CLABEL_NEXT(r->next_clabel);

ci->expires = old_expires;

Expand Down
47 changes: 19 additions & 28 deletions modules/usrloc/ul_cluster.c
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,7 @@ static int receive_ucontact_insert(bin_packet_t *packet)
user_agent, path, attr, st, sock, kv_str, cflags_str;
udomain_t *domain;
urecord_t *record;
ucontact_t *contact, *ct;
ucontact_t *contact;
int rc, sl;
unsigned short _, clabel;
unsigned int rlabel;
Expand Down Expand Up @@ -604,34 +604,10 @@ static int receive_ucontact_insert(bin_packet_t *packet)

record->label = rlabel;
sl = record->aorhash & (domain->size - 1);
if (domain->table[sl].next_label <= rlabel)
if (rlabel >= domain->table[sl].next_label)
domain->table[sl].next_label = rlabel + 1;
}

if (record->label != rlabel) {
int has_good_cts = 0;

for (ct = record->contacts; ct; ct = ct->next)
if (ct->expires != UL_EXPIRED_TIME) {
has_good_cts = 1;
break;
}

if (has_good_cts) {
LM_BUG("differring rlabels (%u vs. %u, ci: '%.*s')",
record->label, rlabel, callid.len, callid.s);
} else {
/* no contacts -> it's safe to inherit the active node's rlabel */
record->label = rlabel;
sl = record->aorhash & (domain->size - 1);
if (domain->table[sl].next_label <= rlabel)
domain->table[sl].next_label = rlabel + 1;
}
}

if (record->next_clabel <= clabel)
record->next_clabel = CLABEL_INC_AND_TEST(clabel);

rc = get_ucontact(record, &contact_str, &callid, ci.cseq, &cmatch,
&contact);

Expand All @@ -641,15 +617,30 @@ static int receive_ucontact_insert(bin_packet_t *packet)
case -1:
/* received data is older than what we have */
break;

case 0:
ci.contact_id = pack_indexes((unsigned short)record->aorhash,
record->label, (unsigned short)contact->label);

/* received data is newer than what we have */
if (update_ucontact(record, contact, &ci, NULL, 1) != 0) {
LM_ERR("failed to update ucontact (ci: '%.*s')\n", callid.len, callid.s);
unlock_udomain(domain, &aor);
goto error;
}
break;

case 1:
if (clabel >= record->next_clabel) {
record->next_clabel = CLABEL_NEXT(clabel);
} else {
clabel = record->next_clabel;
record->next_clabel = CLABEL_NEXT(record->next_clabel);
}

ci.contact_id = pack_indexes((unsigned short)record->aorhash,
record->label, (unsigned short)clabel);

if (insert_ucontact(record, &contact_str, &ci, NULL, 1, &contact) != 0) {
LM_ERR("failed to insert ucontact (ci: '%.*s')\n", callid.len, callid.s);
unlock_udomain(domain, &aor);
Expand Down Expand Up @@ -787,7 +778,7 @@ static int receive_ucontact_update(bin_packet_t *packet)
}

if (record->next_clabel <= clabel)
record->next_clabel = CLABEL_INC_AND_TEST(clabel);
record->next_clabel = CLABEL_NEXT(clabel);
} else {
rc = get_ucontact(record, &contact_str, &callid, ci.cseq + 1, &cmatch,
&contact);
Expand All @@ -803,7 +794,7 @@ static int receive_ucontact_update(bin_packet_t *packet)
}

if (record->next_clabel <= clabel)
record->next_clabel = CLABEL_INC_AND_TEST(clabel);
record->next_clabel = CLABEL_NEXT(clabel);

} else if (rc == 0) {
if (update_ucontact(record, contact, &ci, NULL, 1) != 0) {
Expand Down
4 changes: 2 additions & 2 deletions modules/usrloc/urecord.c
Original file line number Diff line number Diff line change
Expand Up @@ -856,7 +856,7 @@ int insert_ucontact(urecord_t* _r, str* _contact, ucontact_info_t* _ci,
pack_indexes((unsigned short)_r->aorhash,
_r->label,
((unsigned short)_r->next_clabel));
_r->next_clabel = CLABEL_INC_AND_TEST(_r->next_clabel);
_r->next_clabel = CLABEL_NEXT(_r->next_clabel);
}

if (cluster_mode == CM_FULL_SHARING_CACHEDB && !_ci->cdb_key.s) {
Expand Down Expand Up @@ -1104,7 +1104,7 @@ uint64_t next_contact_id(urecord_t* _r)
pack_indexes((unsigned short)_r->aorhash,
_r->label,
((unsigned short)_r->next_clabel));
_r->next_clabel = CLABEL_INC_AND_TEST(_r->next_clabel);
_r->next_clabel = CLABEL_NEXT(_r->next_clabel);

return contact_id;
}
Expand Down

0 comments on commit 90a0079

Please sign in to comment.