pacemaker  1.1.16-94ff4df
Scalable High-Availability cluster resource manager
membership.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 #include <crm_internal.h>
19 
20 #ifndef _GNU_SOURCE
21 # define _GNU_SOURCE
22 #endif
23 
24 #include <sys/param.h>
25 #include <sys/types.h>
26 #include <stdio.h>
27 #include <unistd.h>
28 #include <string.h>
29 #include <glib.h>
30 #include <crm/common/ipc.h>
31 #include <crm/cluster/internal.h>
32 #include <crm/msg_xml.h>
33 #include <crm/stonith-ng.h>
34 
35 /* The peer cache remembers cluster nodes that have been seen.
36  * This is managed mostly automatically by libcluster, based on
37  * cluster membership events.
38  *
39  * Because cluster nodes can have conflicting names or UUIDs,
40  * the hash table key is a uniquely generated ID.
41  */
42 GHashTable *crm_peer_cache = NULL;
43 
44 /*
45  * The remote peer cache tracks pacemaker_remote nodes. While the
46  * value has the same type as the peer cache's, it is tracked separately for
47  * three reasons: pacemaker_remote nodes can't have conflicting names or UUIDs,
48  * so the name (which is also the UUID) is used as the hash table key; there
49  * is no equivalent of membership events, so management is not automatic; and
50  * most users of the peer cache need to exclude pacemaker_remote nodes.
51  *
52  * That said, using a single cache would be more logical and less error-prone,
53  * so it would be a good idea to merge them one day.
54  *
55  * libcluster provides two avenues for populating the cache:
56  * crm_remote_peer_get(), crm_remote_peer_cache_add() and
57  * crm_remote_peer_cache_remove() directly manage it,
58  * while crm_remote_peer_cache_refresh() populates it via the CIB.
59  */
60 GHashTable *crm_remote_peer_cache = NULL;
61 
62 unsigned long long crm_peer_seq = 0;
63 gboolean crm_have_quorum = FALSE;
64 static gboolean crm_autoreap = TRUE;
65 
66 int
68 {
69  if (crm_remote_peer_cache == NULL) {
70  return 0;
71  }
72  return g_hash_table_size(crm_remote_peer_cache);
73 }
74 
86 crm_node_t *
87 crm_remote_peer_get(const char *node_name)
88 {
89  crm_node_t *node;
90 
91  if (node_name == NULL) {
92  errno = -EINVAL;
93  return NULL;
94  }
95 
96  /* Return existing cache entry if one exists */
97  node = g_hash_table_lookup(crm_remote_peer_cache, node_name);
98  if (node) {
99  return node;
100  }
101 
102  /* Allocate a new entry */
103  node = calloc(1, sizeof(crm_node_t));
104  if (node == NULL) {
105  return NULL;
106  }
107 
108  /* Populate the essential information */
109  node->flags = crm_remote_node;
110  node->uuid = strdup(node_name);
111  if (node->uuid == NULL) {
112  free(node);
113  errno = -ENOMEM;
114  return NULL;
115  }
116 
117  /* Add the new entry to the cache */
118  g_hash_table_replace(crm_remote_peer_cache, node->uuid, node);
119  crm_trace("added %s to remote cache", node_name);
120 
121  /* Update the entry's uname, ensuring peer status callbacks are called */
122  crm_update_peer_uname(node, node_name);
123  return node;
124 }
125 
134 void
135 crm_remote_peer_cache_add(const char *node_name)
136 {
137  CRM_ASSERT(crm_remote_peer_get(node_name) != NULL);
138 }
139 
140 void
141 crm_remote_peer_cache_remove(const char *node_name)
142 {
143  if (g_hash_table_remove(crm_remote_peer_cache, node_name)) {
144  crm_trace("removed %s from remote peer cache", node_name);
145  }
146 }
147 
159 static const char *
160 remote_state_from_cib(xmlNode *node_state)
161 {
162  const char *status;
163 
164  status = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
165  if (status && !crm_is_true(status)) {
166  status = CRM_NODE_LOST;
167  } else {
168  status = CRM_NODE_MEMBER;
169  }
170  return status;
171 }
172 
173 /* user data for looping through remote node xpath searches */
174 struct refresh_data {
175  const char *field; /* XML attribute to check for node name */
176  gboolean has_state; /* whether to update node state based on XML */
177 };
178 
186 static void
187 remote_cache_refresh_helper(xmlNode *result, void *user_data)
188 {
189  struct refresh_data *data = user_data;
190  const char *remote = crm_element_value(result, data->field);
191  const char *state = NULL;
192  crm_node_t *node;
193 
194  CRM_CHECK(remote != NULL, return);
195 
196  /* Determine node's state, if the result has it */
197  if (data->has_state) {
198  state = remote_state_from_cib(result);
199  }
200 
201  /* Check whether cache already has entry for node */
202  node = g_hash_table_lookup(crm_remote_peer_cache, remote);
203 
204  if (node == NULL) {
205  /* Node is not in cache, so add a new entry for it */
206  node = crm_remote_peer_get(remote);
207  CRM_ASSERT(node);
208  if (state) {
209  crm_update_peer_state(__FUNCTION__, node, state, 0);
210  }
211 
212  } else if (is_set(node->flags, crm_node_dirty)) {
213  /* Node is in cache and hasn't been updated already, so mark it clean */
215  if (state) {
216  crm_update_peer_state(__FUNCTION__, node, state, 0);
217  }
218  }
219 }
220 
221 static void
222 mark_dirty(gpointer key, gpointer value, gpointer user_data)
223 {
224  set_bit(((crm_node_t*)value)->flags, crm_node_dirty);
225 }
226 
227 static gboolean
228 is_dirty(gpointer key, gpointer value, gpointer user_data)
229 {
230  return is_set(((crm_node_t*)value)->flags, crm_node_dirty);
231 }
232 
233 /* search string to find CIB resources entries for guest nodes */
234 #define XPATH_GUEST_NODE_CONFIG \
235  "//" XML_TAG_CIB "//" XML_CIB_TAG_CONFIGURATION "//" XML_CIB_TAG_RESOURCE \
236  "//" XML_TAG_META_SETS "//" XML_CIB_TAG_NVPAIR \
237  "[@name='" XML_RSC_ATTR_REMOTE_NODE "']"
238 
239 /* search string to find CIB resources entries for remote nodes */
240 #define XPATH_REMOTE_NODE_CONFIG \
241  "//" XML_TAG_CIB "//" XML_CIB_TAG_CONFIGURATION "//" XML_CIB_TAG_RESOURCE \
242  "[@type='remote'][@provider='pacemaker']"
243 
244 /* search string to find CIB node status entries for pacemaker_remote nodes */
245 #define XPATH_REMOTE_NODE_STATUS \
246  "//" XML_TAG_CIB "//" XML_CIB_TAG_STATUS "//" XML_CIB_TAG_STATE \
247  "[@" XML_NODE_IS_REMOTE "='true']"
248 
254 void
256 {
257  struct refresh_data data;
258 
259  crm_peer_init();
260 
261  /* First, we mark all existing cache entries as dirty,
262  * so that later we can remove any that weren't in the CIB.
263  * We don't empty the cache, because we need to detect changes in state.
264  */
265  g_hash_table_foreach(crm_remote_peer_cache, mark_dirty, NULL);
266 
267  /* Look for guest nodes and remote nodes in the status section */
268  data.field = "id";
269  data.has_state = TRUE;
271  remote_cache_refresh_helper, &data);
272 
273  /* Look for guest nodes and remote nodes in the configuration section,
274  * because they may have just been added and not have a status entry yet.
275  * In that case, the cached node state will be left NULL, so that the
276  * peer status callback isn't called until we're sure the node started
277  * successfully.
278  */
279  data.field = "value";
280  data.has_state = FALSE;
282  remote_cache_refresh_helper, &data);
283  data.field = "id";
284  data.has_state = FALSE;
286  remote_cache_refresh_helper, &data);
287 
288  /* Remove all old cache entries that weren't seen in the CIB */
289  g_hash_table_foreach_remove(crm_remote_peer_cache, is_dirty, NULL);
290 }
291 
292 gboolean
294 {
295  if(node == NULL) {
296  return FALSE;
297  }
298 
299  if (is_set(node->flags, crm_remote_node)) {
300  /* remote nodes are never considered active members. This
301  * guarantees they will never be considered for DC membership.*/
302  return FALSE;
303  }
304 #if SUPPORT_COROSYNC
305  if (is_openais_cluster()) {
306  return crm_is_corosync_peer_active(node);
307  }
308 #endif
309 #if SUPPORT_HEARTBEAT
310  if (is_heartbeat_cluster()) {
311  return crm_is_heartbeat_peer_active(node);
312  }
313 #endif
314  crm_err("Unhandled cluster type: %s", name_for_cluster_type(get_cluster_type()));
315  return FALSE;
316 }
317 
318 static gboolean
319 crm_reap_dead_member(gpointer key, gpointer value, gpointer user_data)
320 {
321  crm_node_t *node = value;
322  crm_node_t *search = user_data;
323 
324  if (search == NULL) {
325  return FALSE;
326 
327  } else if (search->id && node->id != search->id) {
328  return FALSE;
329 
330  } else if (search->id == 0 && safe_str_neq(node->uname, search->uname)) {
331  return FALSE;
332 
333  } else if (crm_is_peer_active(value) == FALSE) {
334  crm_info("Removing node with name %s and id %u from membership cache",
335  (node->uname? node->uname : "unknown"), node->id);
336  return TRUE;
337  }
338  return FALSE;
339 }
340 
349 guint
350 reap_crm_member(uint32_t id, const char *name)
351 {
352  int matches = 0;
353  crm_node_t search;
354 
355  if (crm_peer_cache == NULL) {
356  crm_trace("Membership cache not initialized, ignoring purge request");
357  return 0;
358  }
359 
360  search.id = id;
361  search.uname = name ? strdup(name) : NULL;
362  matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, &search);
363  if(matches) {
364  crm_notice("Purged %d peers with id=%u%s%s from the membership cache",
365  matches, search.id,
366  (search.uname? " and/or uname=" : ""),
367  (search.uname? search.uname : ""));
368 
369  } else {
370  crm_info("No peers with id=%u%s%s to purge from the membership cache",
371  search.id, (search.uname? " and/or uname=" : ""),
372  (search.uname? search.uname : ""));
373  }
374 
375  free(search.uname);
376  return matches;
377 }
378 
379 static void
380 crm_count_peer(gpointer key, gpointer value, gpointer user_data)
381 {
382  guint *count = user_data;
383  crm_node_t *node = value;
384 
385  if (crm_is_peer_active(node)) {
386  *count = *count + 1;
387  }
388 }
389 
390 guint
392 {
393  guint count = 0;
394 
395  if (crm_peer_cache) {
396  g_hash_table_foreach(crm_peer_cache, crm_count_peer, &count);
397  }
398  return count;
399 }
400 
401 static void
402 destroy_crm_node(gpointer data)
403 {
404  crm_node_t *node = data;
405 
406  crm_trace("Destroying entry for node %u: %s", node->id, node->uname);
407 
408  free(node->addr);
409  free(node->uname);
410  free(node->state);
411  free(node->uuid);
412  free(node->expected);
413  free(node);
414 }
415 
416 void
418 {
419  if (crm_peer_cache == NULL) {
420  crm_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, free, destroy_crm_node);
421  }
422 
423  if (crm_remote_peer_cache == NULL) {
424  crm_remote_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, NULL, destroy_crm_node);
425  }
426 }
427 
428 void
430 {
431  if (crm_peer_cache != NULL) {
432  crm_trace("Destroying peer cache with %d members", g_hash_table_size(crm_peer_cache));
433  g_hash_table_destroy(crm_peer_cache);
434  crm_peer_cache = NULL;
435  }
436 
437  if (crm_remote_peer_cache != NULL) {
438  crm_trace("Destroying remote peer cache with %d members", g_hash_table_size(crm_remote_peer_cache));
439  g_hash_table_destroy(crm_remote_peer_cache);
440  crm_remote_peer_cache = NULL;
441  }
442 }
443 
444 void (*crm_status_callback) (enum crm_status_type, crm_node_t *, const void *) = NULL;
445 
456 void
457 crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *))
458 {
459  crm_status_callback = dispatch;
460 }
461 
473 void
474 crm_set_autoreap(gboolean autoreap)
475 {
476  crm_autoreap = autoreap;
477 }
478 
479 static void crm_dump_peer_hash(int level, const char *caller)
480 {
481  GHashTableIter iter;
482  const char *id = NULL;
483  crm_node_t *node = NULL;
484 
485  g_hash_table_iter_init(&iter, crm_peer_cache);
486  while (g_hash_table_iter_next(&iter, (gpointer *) &id, (gpointer *) &node)) {
487  do_crm_log(level, "%s: Node %u/%s = %p - %s", caller, node->id, node->uname, node, id);
488  }
489 }
490 
491 static gboolean crm_hash_find_by_data(gpointer key, gpointer value, gpointer user_data)
492 {
493  if(value == user_data) {
494  return TRUE;
495  }
496  return FALSE;
497 }
498 
499 crm_node_t *
500 crm_find_peer_full(unsigned int id, const char *uname, int flags)
501 {
502  crm_node_t *node = NULL;
503 
504  CRM_ASSERT(id > 0 || uname != NULL);
505 
506  crm_peer_init();
507 
508  if (flags & CRM_GET_PEER_REMOTE) {
509  node = g_hash_table_lookup(crm_remote_peer_cache, uname);
510  }
511 
512  if (node == NULL && (flags & CRM_GET_PEER_CLUSTER)) {
513  node = crm_find_peer(id, uname);
514  }
515  return node;
516 }
517 
518 crm_node_t *
519 crm_get_peer_full(unsigned int id, const char *uname, int flags)
520 {
521  crm_node_t *node = NULL;
522 
523  CRM_ASSERT(id > 0 || uname != NULL);
524 
525  crm_peer_init();
526 
527  if (flags & CRM_GET_PEER_REMOTE) {
528  node = g_hash_table_lookup(crm_remote_peer_cache, uname);
529  }
530 
531  if (node == NULL && (flags & CRM_GET_PEER_CLUSTER)) {
532  node = crm_get_peer(id, uname);
533  }
534  return node;
535 }
536 
537 crm_node_t *
538 crm_find_peer(unsigned int id, const char *uname)
539 {
540  GHashTableIter iter;
541  crm_node_t *node = NULL;
542  crm_node_t *by_id = NULL;
543  crm_node_t *by_name = NULL;
544 
545  CRM_ASSERT(id > 0 || uname != NULL);
546 
547  crm_peer_init();
548 
549  if (uname != NULL) {
550  g_hash_table_iter_init(&iter, crm_peer_cache);
551  while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
552  if(node->uname && strcasecmp(node->uname, uname) == 0) {
553  crm_trace("Name match: %s = %p", node->uname, node);
554  by_name = node;
555  break;
556  }
557  }
558  }
559 
560  if (id > 0) {
561  g_hash_table_iter_init(&iter, crm_peer_cache);
562  while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
563  if(node->id == id) {
564  crm_trace("ID match: %u = %p", node->id, node);
565  by_id = node;
566  break;
567  }
568  }
569  }
570 
571  node = by_id; /* Good default */
572  if(by_id == by_name) {
573  /* Nothing to do if they match (both NULL counts) */
574  crm_trace("Consistent: %p for %u/%s", by_id, id, uname);
575 
576  } else if(by_id == NULL && by_name) {
577  crm_trace("Only one: %p for %u/%s", by_name, id, uname);
578 
579  if(id && by_name->id) {
580  crm_dump_peer_hash(LOG_WARNING, __FUNCTION__);
581  crm_crit("Node %u and %u share the same name '%s'",
582  id, by_name->id, uname);
583  node = NULL; /* Create a new one */
584 
585  } else {
586  node = by_name;
587  }
588 
589  } else if(by_name == NULL && by_id) {
590  crm_trace("Only one: %p for %u/%s", by_id, id, uname);
591 
592  if(uname && by_id->uname) {
593  crm_dump_peer_hash(LOG_WARNING, __FUNCTION__);
594  crm_crit("Node '%s' and '%s' share the same cluster nodeid %u: assuming '%s' is correct",
595  uname, by_id->uname, id, uname);
596  }
597 
598  } else if(uname && by_id->uname) {
599  if(safe_str_eq(uname, by_id->uname)) {
600  crm_notice("Node '%s' has changed its ID from %u to %u", by_id->uname, by_name->id, by_id->id);
601  g_hash_table_foreach_remove(crm_peer_cache, crm_hash_find_by_data, by_name);
602 
603  } else {
604  crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u %s", by_id->uname, by_name->uname, id, uname);
605  crm_dump_peer_hash(LOG_INFO, __FUNCTION__);
606  crm_abort(__FILE__, __FUNCTION__, __LINE__, "member weirdness", TRUE, TRUE);
607  }
608 
609  } else if(id && by_name->id) {
610  crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname);
611 
612  } else {
613  /* Simple merge */
614 
615  /* Only corosync based clusters use nodeid's
616  *
617  * The functions that call crm_update_peer_state() only know nodeid
618  * so 'by_id' is authorative when merging
619  *
620  * Same for crm_update_peer_proc()
621  */
622  crm_dump_peer_hash(LOG_DEBUG, __FUNCTION__);
623 
624  crm_info("Merging %p into %p", by_name, by_id);
625  g_hash_table_foreach_remove(crm_peer_cache, crm_hash_find_by_data, by_name);
626  }
627 
628  return node;
629 }
630 
631 #if SUPPORT_COROSYNC
632 static guint
633 crm_remove_conflicting_peer(crm_node_t *node)
634 {
635  int matches = 0;
636  GHashTableIter iter;
637  crm_node_t *existing_node = NULL;
638 
639  if (node->id == 0 || node->uname == NULL) {
640  return 0;
641  }
642 
643 # if !SUPPORT_PLUGIN
644  if (corosync_cmap_has_config("nodelist") != 0) {
645  return 0;
646  }
647 # endif
648 
649  g_hash_table_iter_init(&iter, crm_peer_cache);
650  while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) {
651  if (existing_node->id > 0
652  && existing_node->id != node->id
653  && existing_node->uname != NULL
654  && strcasecmp(existing_node->uname, node->uname) == 0) {
655 
656  if (crm_is_peer_active(existing_node)) {
657  continue;
658  }
659 
660  crm_warn("Removing cached offline node %u/%s which has conflicting uname with %u",
661  existing_node->id, existing_node->uname, node->id);
662 
663  g_hash_table_iter_remove(&iter);
664  matches++;
665  }
666  }
667 
668  return matches;
669 }
670 #endif
671 
672 /* coverity[-alloc] Memory is referenced in one or both hashtables */
673 crm_node_t *
674 crm_get_peer(unsigned int id, const char *uname)
675 {
676  crm_node_t *node = NULL;
677  char *uname_lookup = NULL;
678 
679  CRM_ASSERT(id > 0 || uname != NULL);
680 
681  crm_peer_init();
682 
683  node = crm_find_peer(id, uname);
684 
685  /* if uname wasn't provided, and find_peer did not turn up a uname based on id.
686  * we need to do a lookup of the node name using the id in the cluster membership. */
687  if ((node == NULL || node->uname == NULL) && (uname == NULL)) {
688  uname_lookup = get_node_name(id);
689  }
690 
691  if (uname_lookup) {
692  uname = uname_lookup;
693  crm_trace("Inferred a name of '%s' for node %u", uname, id);
694 
695  /* try to turn up the node one more time now that we know the uname. */
696  if (node == NULL) {
697  node = crm_find_peer(id, uname);
698  }
699  }
700 
701 
702  if (node == NULL) {
703  char *uniqueid = crm_generate_uuid();
704 
705  node = calloc(1, sizeof(crm_node_t));
706  CRM_ASSERT(node);
707 
708  crm_info("Created entry %s/%p for node %s/%u (%d total)",
709  uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache));
710  g_hash_table_replace(crm_peer_cache, uniqueid, node);
711  }
712 
713  if(id > 0 && uname && (node->id == 0 || node->uname == NULL)) {
714  crm_info("Node %u is now known as %s", id, uname);
715  }
716 
717  if(id > 0 && node->id == 0) {
718  node->id = id;
719  }
720 
721  if (uname && (node->uname == NULL)) {
722  crm_update_peer_uname(node, uname);
723  }
724 
725  if(node->uuid == NULL) {
726  const char *uuid = crm_peer_uuid(node);
727 
728  if (uuid) {
729  crm_info("Node %u has uuid %s", id, uuid);
730 
731  } else {
732  crm_info("Cannot obtain a UUID for node %u/%s", id, node->uname);
733  }
734  }
735 
736  free(uname_lookup);
737 
738  return node;
739 }
740 
752 crm_node_t *
753 crm_update_peer(const char *source, unsigned int id, uint64_t born, uint64_t seen, int32_t votes,
754  uint32_t children, const char *uuid, const char *uname, const char *addr,
755  const char *state)
756 {
757 #if SUPPORT_PLUGIN
758  gboolean addr_changed = FALSE;
759  gboolean votes_changed = FALSE;
760 #endif
761  crm_node_t *node = NULL;
762 
763  id = get_corosync_id(id, uuid);
764  node = crm_get_peer(id, uname);
765 
766  CRM_ASSERT(node != NULL);
767 
768  if (node->uuid == NULL) {
769  if (is_openais_cluster()) {
770  /* Yes, overrule whatever was passed in */
771  crm_peer_uuid(node);
772 
773  } else if (uuid != NULL) {
774  node->uuid = strdup(uuid);
775  }
776  }
777 
778  if (children > 0) {
779  if (crm_update_peer_proc(source, node, children, state) == NULL) {
780  return NULL;
781  }
782  }
783 
784  if (state != NULL) {
785  if (crm_update_peer_state(source, node, state, seen) == NULL) {
786  return NULL;
787  }
788  }
789 #if SUPPORT_HEARTBEAT
790  if (born != 0) {
791  node->born = born;
792  }
793 #endif
794 
795 #if SUPPORT_PLUGIN
796  /* These were only used by the plugin */
797  if (born != 0) {
798  node->born = born;
799  }
800 
801  if (votes > 0 && node->votes != votes) {
802  votes_changed = TRUE;
803  node->votes = votes;
804  }
805 
806  if (addr != NULL) {
807  if (node->addr == NULL || crm_str_eq(node->addr, addr, FALSE) == FALSE) {
808  addr_changed = TRUE;
809  free(node->addr);
810  node->addr = strdup(addr);
811  }
812  }
813  if (addr_changed || votes_changed) {
814  crm_info("%s: Node %s: id=%u state=%s addr=%s%s votes=%d%s born=" U64T " seen=" U64T
815  " proc=%.32x", source, node->uname, node->id, node->state,
816  node->addr, addr_changed ? " (new)" : "", node->votes,
817  votes_changed ? " (new)" : "", node->born, node->last_seen, node->processes);
818  }
819 #endif
820 
821  return node;
822 }
823 
835 void
837 {
838  int i, len = strlen(uname);
839 
840  for (i = 0; i < len; i++) {
841  if (uname[i] >= 'A' && uname[i] <= 'Z') {
842  crm_warn("Node names with capitals are discouraged, consider changing '%s'",
843  uname);
844  break;
845  }
846  }
847 
848  free(node->uname);
849  node->uname = strdup(uname);
850  if (crm_status_callback) {
852  }
853 
854 #if SUPPORT_COROSYNC
855  if (is_openais_cluster() && !is_set(node->flags, crm_remote_node)) {
856  crm_remove_conflicting_peer(node);
857  }
858 #endif
859 }
860 
877 crm_node_t *
878 crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const char *status)
879 {
880  uint32_t last = 0;
881  gboolean changed = FALSE;
882 
883  CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL",
884  source, peer2text(flag), status); return NULL);
885 
886  /* Pacemaker doesn't spawn processes on remote nodes */
887  if (is_set(node->flags, crm_remote_node)) {
888  return node;
889  }
890 
891  last = node->processes;
892  if (status == NULL) {
893  node->processes = flag;
894  if (node->processes != last) {
895  changed = TRUE;
896  }
897 
898  } else if (safe_str_eq(status, ONLINESTATUS)) {
899  if ((node->processes & flag) != flag) {
900  set_bit(node->processes, flag);
901  changed = TRUE;
902  }
903 #if SUPPORT_PLUGIN
904  } else if (safe_str_eq(status, CRM_NODE_MEMBER)) {
905  if (flag > 0 && node->processes != flag) {
906  node->processes = flag;
907  changed = TRUE;
908  }
909 #endif
910 
911  } else if (node->processes & flag) {
912  clear_bit(node->processes, flag);
913  changed = TRUE;
914  }
915 
916  if (changed) {
917  if (status == NULL && flag <= crm_proc_none) {
918  crm_info("%s: Node %s[%u] - all processes are now offline", source, node->uname,
919  node->id);
920  } else {
921  crm_info("%s: Node %s[%u] - %s is now %s", source, node->uname, node->id,
922  peer2text(flag), status);
923  }
924 
925  /* Call the client callback first, then update the peer state,
926  * in case the node will be reaped
927  */
928  if (crm_status_callback) {
930  }
931 
932  /* The client callback shouldn't touch the peer caches,
933  * but as a safety net, bail if the peer cache was destroyed.
934  */
935  if (crm_peer_cache == NULL) {
936  return NULL;
937  }
938 
939  if (crm_autoreap) {
940  node = crm_update_peer_state(__FUNCTION__, node,
941  is_set(node->processes, crm_get_cluster_proc())?
943  }
944  } else {
945  crm_trace("%s: Node %s[%u] - %s is unchanged (%s)", source, node->uname, node->id,
946  peer2text(flag), status);
947  }
948  return node;
949 }
950 
951 void
952 crm_update_peer_expected(const char *source, crm_node_t * node, const char *expected)
953 {
954  char *last = NULL;
955  gboolean changed = FALSE;
956 
957  CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected);
958  return);
959 
960  /* Remote nodes don't participate in joins */
961  if (is_set(node->flags, crm_remote_node)) {
962  return;
963  }
964 
965  last = node->expected;
966  if (expected != NULL && safe_str_neq(node->expected, expected)) {
967  node->expected = strdup(expected);
968  changed = TRUE;
969  }
970 
971  if (changed) {
972  crm_info("%s: Node %s[%u] - expected state is now %s (was %s)", source, node->uname, node->id,
973  expected, last);
974  free(last);
975  } else {
976  crm_trace("%s: Node %s[%u] - expected state is unchanged (%s)", source, node->uname,
977  node->id, expected);
978  }
979 }
980 
997 static crm_node_t *
998 crm_update_peer_state_iter(const char *source, crm_node_t * node, const char *state, int membership, GHashTableIter *iter)
999 {
1000  gboolean is_member;
1001 
1002  CRM_CHECK(node != NULL,
1003  crm_err("Could not set state for unknown host to %s"
1004  CRM_XS " source=%s", state, source);
1005  return NULL);
1006 
1007  is_member = safe_str_eq(state, CRM_NODE_MEMBER);
1008  if (membership && is_member) {
1009  node->last_seen = membership;
1010  }
1011 
1012  if (state && safe_str_neq(node->state, state)) {
1013  char *last = node->state;
1014  enum crm_status_type status_type = is_set(node->flags, crm_remote_node)?
1016 
1017  node->state = strdup(state);
1018  crm_notice("Node %s state is now %s " CRM_XS
1019  " nodeid=%u previous=%s source=%s", node->uname, state,
1020  node->id, (last? last : "unknown"), source);
1021  if (crm_status_callback) {
1022  crm_status_callback(status_type, node, last);
1023  }
1024  free(last);
1025 
1026  if (crm_autoreap && !is_member && !is_set(node->flags, crm_remote_node)) {
1027  /* We only autoreap from the peer cache, not the remote peer cache,
1028  * because the latter should be managed only by
1029  * crm_remote_peer_cache_refresh().
1030  */
1031  if(iter) {
1032  crm_notice("Purged 1 peer with id=%u and/or uname=%s from the membership cache", node->id, node->uname);
1033  g_hash_table_iter_remove(iter);
1034 
1035  } else {
1036  reap_crm_member(node->id, node->uname);
1037  }
1038  node = NULL;
1039  }
1040 
1041  } else {
1042  crm_trace("Node %s state is unchanged (%s) " CRM_XS
1043  " nodeid=%u source=%s", node->uname, state, node->id, source);
1044  }
1045  return node;
1046 }
1047 
1063 crm_node_t *
1064 crm_update_peer_state(const char *source, crm_node_t * node, const char *state, int membership)
1065 {
1066  return crm_update_peer_state_iter(source, node, state, membership, NULL);
1067 }
1068 
1075 void
1076 crm_reap_unseen_nodes(uint64_t membership)
1077 {
1078  GHashTableIter iter;
1079  crm_node_t *node = NULL;
1080 
1081  crm_trace("Reaping unseen nodes...");
1082  g_hash_table_iter_init(&iter, crm_peer_cache);
1083  while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) {
1084  if (node->last_seen != membership) {
1085  if (node->state) {
1086  /*
1087  * Calling crm_update_peer_state_iter() allows us to
1088  * remove the node from crm_peer_cache without
1089  * invalidating our iterator
1090  */
1091  crm_update_peer_state_iter(__FUNCTION__, node, CRM_NODE_LOST, membership, &iter);
1092 
1093  } else {
1094  crm_info("State of node %s[%u] is still unknown",
1095  node->uname, node->id);
1096  }
1097  }
1098  }
1099 }
1100 
1101 int
1102 crm_terminate_member(int nodeid, const char *uname, void *unused)
1103 {
1104  /* Always use the synchronous, non-mainloop version */
1105  return stonith_api_kick(nodeid, uname, 120, TRUE);
1106 }
1107 
1108 int
1109 crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection)
1110 {
1111  return stonith_api_kick(nodeid, uname, 120, TRUE);
1112 }
uint32_t votes
Definition: internal.h:50
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:164
void crm_remote_peer_cache_add(const char *node_name)
Add a node to the remote peer cache.
Definition: membership.c:135
void crm_reap_unseen_nodes(uint64_t membership)
Definition: membership.c:1076
#define crm_notice(fmt, args...)
Definition: logging.h:250
#define CRM_NODE_LOST
Definition: cluster.h:43
#define XPATH_REMOTE_NODE_STATUS
Definition: membership.c:245
GHashTable * crm_peer_cache
Definition: membership.c:42
gboolean is_openais_cluster(void)
Definition: cluster.c:630
#define crm_crit(fmt, args...)
Definition: logging.h:247
gboolean safe_str_neq(const char *a, const char *b)
Definition: strings.c:150
char * crm_generate_uuid(void)
Definition: utils.c:2078
uint64_t flags
Definition: cluster.h:76
void crm_peer_destroy(void)
Definition: membership.c:429
uint32_t id
Definition: cluster.h:73
gboolean is_heartbeat_cluster(void)
Definition: cluster.c:645
uint64_t born
Definition: cluster.h:74
char * uuid
Definition: cluster.h:83
int stonith_api_kick(uint32_t nodeid, const char *uname, int timeout, bool off)
Definition: st_client.c:2566
crm_node_t * crm_find_peer(unsigned int id, const char *uname)
Definition: membership.c:538
int get_corosync_id(int id, const char *uuid)
Definition: cluster.c:96
gboolean crm_have_quorum
Definition: membership.c:63
crm_node_t * crm_find_peer_full(unsigned int id, const char *uname, int flags)
Definition: membership.c:500
GHashTable * crm_remote_peer_cache
Definition: membership.c:60
char * addr
Definition: cluster.h:87
#define clear_bit(word, bit)
Definition: crm_internal.h:192
unsigned long long crm_peer_seq
Definition: membership.c:62
char * get_node_name(uint32_t nodeid)
Definition: cluster.c:301
void crm_set_autoreap(gboolean autoreap)
Tell the library whether to automatically reap lost nodes.
Definition: membership.c:474
void crm_peer_init(void)
Definition: membership.c:417
void crm_remote_peer_cache_remove(const char *node_name)
Definition: membership.c:141
gboolean crm_is_corosync_peer_active(const crm_node_t *node)
Definition: corosync.c:468
char uname[MAX_NAME]
Definition: internal.h:53
int crm_remote_peer_cache_size(void)
Definition: membership.c:67
#define crm_warn(fmt, args...)
Definition: logging.h:249
#define set_bit(word, bit)
Definition: crm_internal.h:191
uint32_t processes
Definition: cluster.h:79
crm_node_t * crm_get_peer_full(unsigned int id, const char *uname, int flags)
Definition: membership.c:519
crm_node_t * crm_update_peer(const char *source, unsigned int id, uint64_t born, uint64_t seen, int32_t votes, uint32_t children, const char *uuid, const char *uname, const char *addr, const char *state)
Definition: membership.c:753
guint reap_crm_member(uint32_t id, const char *name)
Remove all peer cache entries matching a node ID and/or uname.
Definition: membership.c:350
gboolean crm_is_peer_active(const crm_node_t *node)
Definition: membership.c:293
uint32_t id
Definition: internal.h:48
guint crm_strcase_hash(gconstpointer v)
Definition: strings.c:280
#define XPATH_GUEST_NODE_CONFIG
Definition: membership.c:234
crm_status_type
Definition: cluster.h:198
void crm_update_peer_expected(const char *source, crm_node_t *node, const char *expected)
Definition: membership.c:952
#define crm_trace(fmt, args...)
Definition: logging.h:254
#define do_crm_log(level, fmt, args...)
Log a message.
Definition: logging.h:129
const char * crm_element_value(xmlNode *data, const char *name)
Definition: xml.c:4987
int corosync_cmap_has_config(const char *prefix)
Definition: corosync.c:597
#define CRM_NODE_MEMBER
Definition: cluster.h:44
void crm_update_peer_uname(crm_node_t *node, const char *uname)
Definition: membership.c:836
gboolean crm_str_eq(const char *a, const char *b, gboolean use_case)
Definition: strings.c:213
void crm_set_status_callback(void(*dispatch)(enum crm_status_type, crm_node_t *, const void *))
Set a client function that will be called after peer status changes.
Definition: membership.c:457
const char * name_for_cluster_type(enum cluster_type_e type)
Definition: cluster.c:468
int crm_terminate_member(int nodeid, const char *uname, void *unused)
Definition: membership.c:1102
char * expected
Definition: cluster.h:85
void(* crm_status_callback)(enum crm_status_type, crm_node_t *, const void *)
Definition: membership.c:444
#define CRM_XS
Definition: logging.h:42
void crm_remote_peer_cache_refresh(xmlNode *cib)
Repopulate the remote peer cache based on CIB XML.
Definition: membership.c:255
guint crm_active_peers(void)
Definition: membership.c:391
crm_node_t * crm_remote_peer_get(const char *node_name)
Get a remote node peer cache entry, creating it if necessary.
Definition: membership.c:87
#define crm_err(fmt, args...)
Definition: logging.h:248
Fencing aka. STONITH.
#define uint32_t
Definition: stdint.in.h:158
int crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection)
Definition: membership.c:1109
#define CRM_ASSERT(expr)
Definition: error.h:35
char data[0]
Definition: internal.h:58
char * state
Definition: cluster.h:84
void crm_foreach_xpath_result(xmlNode *xml, const char *xpath, void(*helper)(xmlNode *, void *), void *user_data)
Run a supplied function for each result of an xpath search.
Definition: xpath.c:179
#define U64T
Definition: config.h:631
Wrappers for and extensions to libqb IPC.
crm_node_t * crm_update_peer_proc(const char *source, crm_node_t *node, uint32_t flag, const char *status)
Definition: membership.c:878
int32_t votes
Definition: cluster.h:78
char * uname
Definition: cluster.h:82
uint64_t last_seen
Definition: cluster.h:75
#define XML_NODE_IN_CLUSTER
Definition: msg_xml.h:251
gboolean crm_is_true(const char *s)
Definition: strings.c:165
#define safe_str_eq(a, b)
Definition: util.h:63
#define ONLINESTATUS
Definition: util.h:48
void crm_abort(const char *file, const char *function, int line, const char *condition, gboolean do_core, gboolean do_fork)
Definition: utils.c:956
crm_node_t * crm_get_peer(unsigned int id, const char *uname)
Definition: membership.c:674
#define XPATH_REMOTE_NODE_CONFIG
Definition: membership.c:240
#define crm_info(fmt, args...)
Definition: logging.h:251
const char * crm_peer_uuid(crm_node_t *node)
Definition: cluster.c:135
uint64_t flags
Definition: remote.c:121
#define int32_t
Definition: stdint.in.h:157
enum cluster_type_e get_cluster_type(void)
Definition: cluster.c:513
crm_node_t * crm_update_peer_state(const char *source, crm_node_t *node, const char *state, int membership)
Update a node&#39;s state and membership information.
Definition: membership.c:1064