diff --git a/Makefile.am b/Makefile.am index fc7a4f19..cbc47d0b 100644 --- a/Makefile.am +++ b/Makefile.am @@ -39,6 +39,7 @@ EXTRA_DIST = autogen.sh conf/corosync.conf.example $(SPEC).in \ build-aux/release.mk \ conf/lenses/tests/test_corosync.aug \ conf/lenses/corosync.aug \ + conf/corosync.conf.example.udpu \ .version AUTOMAKE_OPTIONS = foreign diff --git a/conf/corosync.conf.example.udpu b/conf/corosync.conf.example.udpu new file mode 100644 index 00000000..8bfbc6e8 --- /dev/null +++ b/conf/corosync.conf.example.udpu @@ -0,0 +1,75 @@ +# Please read the corosync.conf.5 manual page +compatibility: whitetank + +totem { + version: 2 + secauth: off + interface { + member { + memberaddr: 10.16.35.101 + } + member { + memberaddr: 10.16.35.102 + } + member { + memberaddr: 10.16.35.103 + } + member { + memberaddr: 10.16.35.104 + } + member { + memberaddr: 10.16.35.105 + } + member { + memberaddr: 10.16.35.106 + } + member { + memberaddr: 10.16.35.107 + } + member { + memberaddr: 10.16.35.108 + } + member { + memberaddr: 10.16.35.109 + } + member { + memberaddr: 10.16.35.110 + } + member { + memberaddr: 10.16.35.111 + } + member { + memberaddr: 10.16.35.112 + } + member { + memberaddr: 10.16.35.113 + } + member { + memberaddr: 10.16.35.114 + } + member { + memberaddr: 10.16.35.115 + } + member { + memberaddr: 10.16.35.116 + } + ringnumber: 0 + bindnetaddr: 10.16.35.0 + mcastport: 5405 + } + transport: udpu +} + +logging { + fileline: off + to_logfile: yes + to_syslog: yes + debug: on + logfile: /var/log/cluster/corosync.log + debug: off + timestamp: on + logger_subsys { + subsys: AMF + debug: off + } +} diff --git a/corosync.spec.in b/corosync.spec.in index ea46f7b3..1040579a 100644 --- a/corosync.spec.in +++ b/corosync.spec.in @@ -111,6 +111,7 @@ fi %dir %{_sysconfdir}/corosync/service.d %dir %{_sysconfdir}/corosync/uidgid.d %config(noreplace) %{_sysconfdir}/corosync/corosync.conf.example +%config(noreplace) %{_sysconfdir}/corosync/corosync.conf.example.udpu %{_initrddir}/corosync %dir %{_libexecdir}/lcrso %{_libexecdir}/lcrso/coroparse.lcrso diff --git a/exec/Makefile.am b/exec/Makefile.am index 938237cf..39a72139 100644 --- a/exec/Makefile.am +++ b/exec/Makefile.am @@ -36,8 +36,8 @@ AM_CFLAGS = -fPIC INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include $(nss_CFLAGS) $(rdmacm_CFLAGS) $(ibverbs_CFLAGS) TOTEM_SRC = coropoll.c totemip.c totemnet.c totemudp.c \ - totemrrp.c totemsrp.c totemmrp.c totempg.c \ - crypto.c wthread.c tsafe.c + totemudpu.c totemrrp.c totemsrp.c totemmrp.c \ + totempg.c crypto.c wthread.c tsafe.c if BUILD_RDMA TOTEM_SRC += totemiba.c endif @@ -72,10 +72,10 @@ SHARED_LIBS_SO = $(SHARED_LIBS:%.so.$(SONAME)=%.so) SHARED_LIBS_SO_TWO = $(SHARED_LIBS:%.so.$(SONAME)=%.so.$(SOMAJOR)) noinst_HEADERS = apidef.h crypto.h mainconfig.h main.h tsafe.h \ - quorum.h service.h sync.h timer.h tlist.h totemconfig.h \ - totemmrp.h totemnet.h totemudp.h totemiba.h totemrrp.h \ - totemsrp.h util.h vsf.h wthread.h schedwrk.h \ - evil.h syncv2.h fsm.h + quorum.h service.h sync.h timer.h tlist.h \ + totemconfig.h totemmrp.h totemnet.h totemudp.h \ + totemudpu.h totemiba.h totemrrp.h totemsrp.h util.h \ + vsf.h wthread.h schedwrk.h evil.h syncv2.h fsm.h EXTRA_DIST = $(LCRSO_SRC) diff --git a/exec/totemconfig.c b/exec/totemconfig.c index ad1b1166..7a30afdb 100644 --- a/exec/totemconfig.c +++ b/exec/totemconfig.c @@ -270,10 +270,13 @@ extern int totem_config_read ( int res = 0; hdb_handle_t object_totem_handle; hdb_handle_t object_interface_handle; + hdb_handle_t object_member_handle; const char *str; unsigned int ringnumber = 0; hdb_handle_t object_find_interface_handle; + hdb_handle_t object_find_member_handle; const char *transport_type; + int member_count = 0; res = totem_handle_find (objdb, &object_totem_handle); if (res == -1) { @@ -349,6 +352,8 @@ printf ("couldn't find totem handle\n"); object_find_interface_handle, &object_interface_handle) == 0) { + member_count = 0; + objdb_get_int (objdb, object_interface_handle, "ringnumber", &ringnumber); /* @@ -384,6 +389,22 @@ printf ("couldn't find totem handle\n"); res = totemip_parse (&totem_config->interfaces[ringnumber].bindnet, str, totem_config->interfaces[ringnumber].mcast_addr.family); } + objdb->object_find_create ( + object_interface_handle, + "member", + strlen ("member"), + &object_find_member_handle); + + while (objdb->object_find_next ( + object_find_member_handle, + &object_member_handle) == 0) { + + if (!objdb_get_string (objdb, object_member_handle, "memberaddr", &str)) { + res = totemip_parse (&totem_config->interfaces[ringnumber].member_list[member_count++], str, 0); + } + + } + totem_config->interfaces[ringnumber].member_count = member_count; totem_config->interface_count++; } @@ -391,11 +412,16 @@ printf ("couldn't find totem handle\n"); add_totem_config_notification(objdb, totem_config, object_totem_handle); - totem_config->transport_number = 0; + totem_config->transport_number = TOTEM_TRANSPORT_UDP; objdb_get_string (objdb, object_totem_handle, "transport", &transport_type); + if (transport_type) { + if (strcmp (transport_type, "udpu") == 0) { + totem_config->transport_number = TOTEM_TRANSPORT_UDPU; + } + } if (transport_type) { if (strcmp (transport_type, "iba") == 0) { - totem_config->transport_number = 1; + totem_config->transport_number = TOTEM_TRANSPORT_RDMA; } } @@ -425,7 +451,8 @@ int totem_config_validate ( struct totem_ip_address null_addr; memset (&null_addr, 0, sizeof (struct totem_ip_address)); - if (memcmp (&totem_config->interfaces[i].mcast_addr, &null_addr, + if ((totem_config->transport_number == 0) && + memcmp (&totem_config->interfaces[i].mcast_addr, &null_addr, sizeof (struct totem_ip_address)) == 0) { error_reason = "No multicast address specified"; goto parse_error; @@ -443,7 +470,7 @@ int totem_config_validate ( goto parse_error; } - if (totem_config->broadcast_use == 0) { + if (totem_config->broadcast_use == 0 && totem_config->transport_number == 0) { if (totem_config->interfaces[i].mcast_addr.family != totem_config->interfaces[i].bindnet.family) { error_reason = "Multicast address family does not match bind address family"; goto parse_error; diff --git a/exec/totemmrp.c b/exec/totemmrp.c index dceb44ab..75ddf986 100644 --- a/exec/totemmrp.c +++ b/exec/totemmrp.c @@ -245,3 +245,25 @@ extern void totemmrp_service_ready_register ( totemsrp_context, totem_service_ready); } + +int totemmrp_member_add ( + const struct totem_ip_address *member, + int ring_no) +{ + int res; + + res = totemsrp_member_add (totemsrp_context, member, ring_no); + + return (res); +} + +int totemmrp_member_remove ( + const struct totem_ip_address *member, + int ring_no) +{ + int res; + + res = totemsrp_member_remove (totemsrp_context, member, ring_no); + + return (res); +} diff --git a/exec/totemmrp.h b/exec/totemmrp.h index f9b19111..d3510723 100644 --- a/exec/totemmrp.h +++ b/exec/totemmrp.h @@ -116,4 +116,12 @@ extern int totemmrp_ring_reenable (void); extern void totemmrp_service_ready_register ( void (*totem_service_ready) (void)); +extern int totemmrp_member_add ( + const struct totem_ip_address *member, + int ring_no); + +extern int totemmrp_member_remove ( + const struct totem_ip_address *member, + int ring_no); + #endif /* TOTEMMRP_H_DEFINED */ diff --git a/exec/totemnet.c b/exec/totemnet.c index 5853e665..c7670f93 100644 --- a/exec/totemnet.c +++ b/exec/totemnet.c @@ -39,6 +39,7 @@ #include #endif #include +#include #include #define LOGSYS_UTILS_ONLY 1 @@ -112,11 +113,19 @@ struct transport { int (*recv_mcast_empty) ( void *transport_context); + + int (*member_add) ( + void *transport_context, + const struct totem_ip_address *member); + + int (*member_remove) ( + void *transport_context, + const struct totem_ip_address *member); }; struct transport transport_entries[] = { { - .name = "UDP/IP", + .name = "UDP/IP Multicast", .initialize = totemudp_initialize, .processor_count_set = totemudp_processor_count_set, .token_send = totemudp_token_send, @@ -133,6 +142,26 @@ struct transport transport_entries[] = { .crypto_set = totemudp_crypto_set, .recv_mcast_empty = totemudp_recv_mcast_empty }, + { + .name = "UDP/IP Unicast", + .initialize = totemudpu_initialize, + .processor_count_set = totemudpu_processor_count_set, + .token_send = totemudpu_token_send, + .mcast_flush_send = totemudpu_mcast_flush_send, + .mcast_noflush_send = totemudpu_mcast_noflush_send, + .recv_flush = totemudpu_recv_flush, + .send_flush = totemudpu_send_flush, + .iface_check = totemudpu_iface_check, + .finalize = totemudpu_finalize, + .net_mtu_adjust = totemudpu_net_mtu_adjust, + .iface_print = totemudpu_iface_print, + .iface_get = totemudpu_iface_get, + .token_target_set = totemudpu_token_target_set, + .crypto_set = totemudpu_crypto_set, + .recv_mcast_empty = totemudpu_recv_mcast_empty, + .member_add = totemudpu_member_add, + .member_remove = totemudpu_member_remove + }, #ifdef HAVE_RDMA { .name = "Infiniband/IP", @@ -192,13 +221,7 @@ static void totemnet_instance_initialize ( instance->totemnet_subsys_id = config->totem_logging_configuration.log_subsys_id; - transport = 0; - -#ifdef HAVE_RDMA - if (config->transport_number == 1) { - transport = 1; - } -#endif + transport = config->transport_number; log_printf (LOGSYS_LEVEL_NOTICE, "Initializing transport (%s).\n", transport_entries[transport].name); @@ -403,3 +426,35 @@ extern int totemnet_recv_mcast_empty ( return (res); } + +extern int totemnet_member_add ( + void *net_context, + const struct totem_ip_address *member) +{ + struct totemnet_instance *instance = (struct totemnet_instance *)net_context; + unsigned int res = 0; + + if (instance->transport->member_add) { + res = instance->transport->member_add ( + instance->transport_context, + member); + } + + return (res); +} + +extern int totemnet_member_remove ( + void *net_context, + const struct totem_ip_address *member) +{ + struct totemnet_instance *instance = (struct totemnet_instance *)net_context; + unsigned int res = 0; + + if (instance->transport->member_remove) { + res = instance->transport->member_remove ( + instance->transport_context, + member); + } + + return (res); +} diff --git a/exec/totemnet.h b/exec/totemnet.h index 96f063d2..7e6374c3 100644 --- a/exec/totemnet.h +++ b/exec/totemnet.h @@ -115,4 +115,12 @@ extern int totemnet_crypto_set ( extern int totemnet_recv_mcast_empty ( void *net_context); +extern int totemnet_member_add ( + void *net_context, + const struct totem_ip_address *member); + +extern int totemnet_member_remove ( + void *net_context, + const struct totem_ip_address *member); + #endif /* TOTEMNET_H_DEFINED */ diff --git a/exec/totempg.c b/exec/totempg.c index 10effc8b..fb216953 100644 --- a/exec/totempg.c +++ b/exec/totempg.c @@ -1377,3 +1377,10 @@ extern void totempg_service_ready_register ( totemmrp_service_ready_register (totem_service_ready); } +extern int totempg_member_add ( + const struct totem_ip_address *member, + int ring_no); + +extern int totempg_member_remove ( + const struct totem_ip_address *member, + int ring_no); diff --git a/exec/totemrrp.c b/exec/totemrrp.c index 0b803512..a8ebd087 100644 --- a/exec/totemrrp.c +++ b/exec/totemrrp.c @@ -166,6 +166,17 @@ struct rrp_algo { int (*mcast_recv_empty) ( struct totemrrp_instance *instance); + + int (*member_add) ( + struct totemrrp_instance *instance, + const struct totem_ip_address *member, + unsigned int iface_no); + + int (*member_remove) ( + struct totemrrp_instance *instance, + const struct totem_ip_address *member, + unsigned int iface_no); + }; struct totemrrp_instance { @@ -289,6 +300,15 @@ static void none_ring_reenable ( static int none_mcast_recv_empty ( struct totemrrp_instance *instance); +static int none_member_add ( + struct totemrrp_instance *instance, + const struct totem_ip_address *member, + unsigned int iface_no); + +static int none_member_remove ( + struct totemrrp_instance *instance, + const struct totem_ip_address *member, + unsigned int iface_no); /* * Passive Replication Forward Declerations */ @@ -350,6 +370,15 @@ static void passive_ring_reenable ( static int passive_mcast_recv_empty ( struct totemrrp_instance *instance); +static int passive_member_add ( + struct totemrrp_instance *instance, + const struct totem_ip_address *member, + unsigned int iface_no); + +static int passive_member_remove ( + struct totemrrp_instance *instance, + const struct totem_ip_address *member, + unsigned int iface_no); /* * Active Replication Forward Definitions */ @@ -411,6 +440,16 @@ static void active_ring_reenable ( static int active_mcast_recv_empty ( struct totemrrp_instance *instance); +static int active_member_add ( + struct totemrrp_instance *instance, + const struct totem_ip_address *member, + unsigned int iface_no); + +static int active_member_remove ( + struct totemrrp_instance *instance, + const struct totem_ip_address *member, + unsigned int iface_no); + static void active_timer_expired_token_start ( struct active_instance *active_instance); @@ -437,7 +476,9 @@ struct rrp_algo none_algo = { .processor_count_set = none_processor_count_set, .token_target_set = none_token_target_set, .ring_reenable = none_ring_reenable, - .mcast_recv_empty = none_mcast_recv_empty + .mcast_recv_empty = none_mcast_recv_empty, + .member_add = none_member_add, + .member_remove = none_member_remove }; struct rrp_algo passive_algo = { @@ -454,7 +495,9 @@ struct rrp_algo passive_algo = { .processor_count_set = passive_processor_count_set, .token_target_set = passive_token_target_set, .ring_reenable = passive_ring_reenable, - .mcast_recv_empty = passive_mcast_recv_empty + .mcast_recv_empty = passive_mcast_recv_empty, + .member_add = passive_member_add, + .member_remove = passive_member_remove }; struct rrp_algo active_algo = { @@ -471,7 +514,9 @@ struct rrp_algo active_algo = { .processor_count_set = active_processor_count_set, .token_target_set = active_token_target_set, .ring_reenable = active_ring_reenable, - .mcast_recv_empty = active_mcast_recv_empty + .mcast_recv_empty = active_mcast_recv_empty, + .member_add = active_member_add, + .member_remove = active_member_remove }; struct rrp_algo *rrp_algos[] = { @@ -598,6 +643,27 @@ static int none_mcast_recv_empty ( return (res); } +static int none_member_add ( + struct totemrrp_instance *instance, + const struct totem_ip_address *member, + unsigned int iface_no) +{ + int res; + res = totemnet_member_add (instance->net_handles[0], member); + return (res); +} + +static int none_member_remove ( + struct totemrrp_instance *instance, + const struct totem_ip_address *member, + unsigned int iface_no) +{ + int res; + res = totemnet_member_remove (instance->net_handles[0], member); + return (res); +} + + /* * Passive Replication Implementation */ @@ -947,6 +1013,27 @@ static int passive_mcast_recv_empty ( return (msgs_emptied); } +static int passive_member_add ( + struct totemrrp_instance *instance, + const struct totem_ip_address *member, + unsigned int iface_no) +{ + int res; + res = totemnet_member_add (instance->net_handles[iface_no], member); + return (res); +} + +static int passive_member_remove ( + struct totemrrp_instance *instance, + const struct totem_ip_address *member, + unsigned int iface_no) +{ + int res; + res = totemnet_member_remove (instance->net_handles[iface_no], member); + return (res); +} + + static void passive_ring_reenable ( struct totemrrp_instance *instance) { @@ -1264,6 +1351,26 @@ static void active_send_flush (struct totemrrp_instance *instance) } } +static int active_member_add ( + struct totemrrp_instance *instance, + const struct totem_ip_address *member, + unsigned int iface_no) +{ + int res; + res = totemnet_member_add (instance->net_handles[iface_no], member); + return (res); +} + +static int active_member_remove ( + struct totemrrp_instance *instance, + const struct totem_ip_address *member, + unsigned int iface_no) +{ + int res; + res = totemnet_member_remove (instance->net_handles[iface_no], member); + return (res); +} + static void active_iface_check (struct totemrrp_instance *instance) { struct active_instance *rrp_algo_instance = (struct active_instance *)instance->rrp_algo_instance; @@ -1550,7 +1657,6 @@ int totemrrp_initialize ( totemnet_net_mtu_adjust (instance->net_handles[i], totem_config); } - *rrp_context = instance; return (0); @@ -1707,3 +1813,28 @@ extern int totemrrp_mcast_recv_empty ( return (res); } +int totemrrp_member_add ( + void *rrp_context, + const struct totem_ip_address *member, + int iface_no) +{ + struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context; + int res; + + res = instance->rrp_algo->member_add (instance, member, iface_no); + + return (res); +} + +int totemrrp_member_remove ( + void *rrp_context, + const struct totem_ip_address *member, + int iface_no) +{ + struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context; + int res; + + res = instance->rrp_algo->member_remove (instance, member, iface_no); + + return (res); +} diff --git a/exec/totemrrp.h b/exec/totemrrp.h index 5fa0a0ff..da79ed2a 100644 --- a/exec/totemrrp.h +++ b/exec/totemrrp.h @@ -128,4 +128,14 @@ extern int totemrrp_ring_reenable ( extern int totemrrp_mcast_recv_empty ( void *rrp_context); +extern int totemrrp_member_add ( + void *net_context, + const struct totem_ip_address *member, + int iface_no); + +extern int totemrrp_member_remove ( + void *net_context, + const struct totem_ip_address *member, + int iface_no); + #endif /* TOTEMRRP_H_DEFINED */ diff --git a/exec/totemsrp.c b/exec/totemsrp.c index 9d15ade4..f7a66383 100644 --- a/exec/totemsrp.c +++ b/exec/totemsrp.c @@ -4352,6 +4352,7 @@ void main_iface_change_fn ( unsigned int iface_no) { struct totemsrp_instance *instance = context; + int i; totemip_copy (&instance->my_id.addr[iface_no], iface_addr); assert (instance->my_id.addr[iface_no].nodeid); @@ -4365,6 +4366,12 @@ void main_iface_change_fn ( "Created or loaded sequence id %lld.%s for this ring.\n", instance->my_ring_id.seq, totemip_print (&instance->my_ring_id.rep)); + for (i = 0; i < instance->totem_config->interfaces[iface_no].member_count; i++) { + totemsrp_member_add (instance, + &instance->totem_config->interfaces[iface_no].member_list[i], + iface_no); + + } if (instance->totemsrp_service_ready_fn) { instance->totemsrp_service_ready_fn (); } @@ -4387,3 +4394,29 @@ void totemsrp_service_ready_register ( instance->totemsrp_service_ready_fn = totem_service_ready; } + +int totemsrp_member_add ( + void *context, + const struct totem_ip_address *member, + int ring_no) +{ + struct totemsrp_instance *instance = (struct totemsrp_instance *)context; + int res; + + res = totemrrp_member_add (instance->totemrrp_context, member, ring_no); + + return (res); +} + +int totemsrp_member_remove ( + void *context, + const struct totem_ip_address *member, + int ring_no) +{ + struct totemsrp_instance *instance = (struct totemsrp_instance *)context; + int res; + + res = totemrrp_member_remove (instance->totemrrp_context, member, ring_no); + + return (res); +} diff --git a/exec/totemsrp.h b/exec/totemsrp.h index 37ef5887..55f7ce28 100644 --- a/exec/totemsrp.h +++ b/exec/totemsrp.h @@ -120,4 +120,14 @@ void totemsrp_service_ready_register ( void *srp_context, void (*totem_service_ready) (void)); +extern int totemsrp_member_add ( + void *srp_context, + const struct totem_ip_address *member, + int ring_no); + +extern int totemsrp_member_remove ( + void *srp_context, + const struct totem_ip_address *member, + int ring_no); + #endif /* TOTEMSRP_H_DEFINED */ diff --git a/exec/totemudpu.c b/exec/totemudpu.c new file mode 100644 index 00000000..dc30a125 --- /dev/null +++ b/exec/totemudpu.c @@ -0,0 +1,1711 @@ +/* + * Copyright (c) 2005 MontaVista Software, Inc. + * Copyright (c) 2006-2009 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Steven Dake (sdake@redhat.com) + + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#define LOGSYS_UTILS_ONLY 1 +#include +#include "totemudpu.h" + +#include "crypto.h" + +#ifdef HAVE_LIBNSS +#include +#include +#include +#include +#endif + +#ifndef MSG_NOSIGNAL +#define MSG_NOSIGNAL 0 +#endif + +#define MCAST_SOCKET_BUFFER_SIZE (TRANSMITS_ALLOWED * FRAME_SIZE_MAX) +#define NETIF_STATE_REPORT_UP 1 +#define NETIF_STATE_REPORT_DOWN 2 + +#define BIND_STATE_UNBOUND 0 +#define BIND_STATE_REGULAR 1 +#define BIND_STATE_LOOPBACK 2 + +#define HMAC_HASH_SIZE 20 +struct security_header { + unsigned char hash_digest[HMAC_HASH_SIZE]; /* The hash *MUST* be first in the data structure */ + unsigned char salt[16]; /* random number */ + char msg[0]; +} __attribute__((packed)); + +struct totemudpu_member { + struct list_head list; + struct totem_ip_address member; + int fd; +}; + +struct totemudpu_instance { + hmac_state totemudpu_hmac_state; + + prng_state totemudpu_prng_state; + +#ifdef HAVE_LIBNSS + PK11SymKey *nss_sym_key; + PK11SymKey *nss_sym_key_sign; +#endif + + unsigned char totemudpu_private_key[1024]; + + unsigned int totemudpu_private_key_len; + + hdb_handle_t totemudpu_poll_handle; + + struct totem_interface *totem_interface; + + int netif_state_report; + + int netif_bind_state; + + void *context; + + void (*totemudpu_deliver_fn) ( + void *context, + const void *msg, + unsigned int msg_len); + + void (*totemudpu_iface_change_fn) ( + void *context, + const struct totem_ip_address *iface_address); + + void (*totemudpu_target_set_completed) (void *context); + + /* + * Function and data used to log messages + */ + int totemudpu_log_level_security; + + int totemudpu_log_level_error; + + int totemudpu_log_level_warning; + + int totemudpu_log_level_notice; + + int totemudpu_log_level_debug; + + int totemudpu_subsys_id; + + void (*totemudpu_log_printf) ( + unsigned int rec_ident, + const char *function, + const char *file, + int line, + const char *format, + ...)__attribute__((format(printf, 5, 6))); + + void *udpu_context; + + char iov_buffer[FRAME_SIZE_MAX]; + + char iov_buffer_flush[FRAME_SIZE_MAX]; + + struct iovec totemudpu_iov_recv; + + struct iovec totemudpu_iov_recv_flush; + + struct list_head member_list; + + int stats_sent; + + int stats_recv; + + int stats_delv; + + int stats_remcasts; + + int stats_orf_token; + + struct timeval stats_tv_start; + + struct totem_ip_address my_id; + + int firstrun; + + poll_timer_handle timer_netif_check_timeout; + + unsigned int my_memb_entries; + + int flushing; + + struct totem_config *totem_config; + + struct totem_ip_address token_target; + + int token_socket; +}; + +struct work_item { + const void *msg; + unsigned int msg_len; + struct totemudpu_instance *instance; +}; + +static int totemudpu_build_sockets ( + struct totemudpu_instance *instance, + struct totem_ip_address *bindnet_address, + struct totem_ip_address *bound_to); + +static struct totem_ip_address localhost; + +static void totemudpu_instance_initialize (struct totemudpu_instance *instance) +{ + memset (instance, 0, sizeof (struct totemudpu_instance)); + + instance->netif_state_report = NETIF_STATE_REPORT_UP | NETIF_STATE_REPORT_DOWN; + + instance->totemudpu_iov_recv.iov_base = instance->iov_buffer; + + instance->totemudpu_iov_recv.iov_len = FRAME_SIZE_MAX; //sizeof (instance->iov_buffer); + instance->totemudpu_iov_recv_flush.iov_base = instance->iov_buffer_flush; + + instance->totemudpu_iov_recv_flush.iov_len = FRAME_SIZE_MAX; //sizeof (instance->iov_buffer); + + /* + * There is always atleast 1 processor + */ + instance->my_memb_entries = 1; + + list_init (&instance->member_list); +} + +#define log_printf(level, format, args...) \ +do { \ + instance->totemudpu_log_printf ( \ + LOGSYS_ENCODE_RECID(level, \ + instance->totemudpu_subsys_id, \ + LOGSYS_RECID_LOG), \ + __FUNCTION__, __FILE__, __LINE__, \ + (const char *)format, ##args); \ +} while (0); + + +static int authenticate_and_decrypt_sober ( + struct totemudpu_instance *instance, + struct iovec *iov, + unsigned int iov_len) +{ + unsigned char keys[48]; + struct security_header *header = (struct security_header *)iov[0].iov_base; + prng_state keygen_prng_state; + prng_state stream_prng_state; + unsigned char *hmac_key = &keys[32]; + unsigned char *cipher_key = &keys[16]; + unsigned char *initial_vector = &keys[0]; + unsigned char digest_comparison[HMAC_HASH_SIZE]; + unsigned long len; + + /* + * Generate MAC, CIPHER, IV keys from private key + */ + memset (keys, 0, sizeof (keys)); + sober128_start (&keygen_prng_state); + sober128_add_entropy (instance->totemudpu_private_key, + instance->totemudpu_private_key_len, &keygen_prng_state); + sober128_add_entropy (header->salt, sizeof (header->salt), &keygen_prng_state); + + sober128_read (keys, sizeof (keys), &keygen_prng_state); + + /* + * Setup stream cipher + */ + sober128_start (&stream_prng_state); + sober128_add_entropy (cipher_key, 16, &stream_prng_state); + sober128_add_entropy (initial_vector, 16, &stream_prng_state); + + /* + * Authenticate contents of message + */ + hmac_init (&instance->totemudpu_hmac_state, DIGEST_SHA1, hmac_key, 16); + + hmac_process (&instance->totemudpu_hmac_state, + (unsigned char *)iov->iov_base + HMAC_HASH_SIZE, + iov->iov_len - HMAC_HASH_SIZE); + + len = hash_descriptor[DIGEST_SHA1]->hashsize; + assert (HMAC_HASH_SIZE >= len); + hmac_done (&instance->totemudpu_hmac_state, digest_comparison, &len); + + if (memcmp (digest_comparison, header->hash_digest, len) != 0) { + return (-1); + } + + /* + * Decrypt the contents of the message with the cipher key + */ + sober128_read ((unsigned char*)iov->iov_base + + sizeof (struct security_header), + iov->iov_len - sizeof (struct security_header), + &stream_prng_state); + + return (0); +} + +static void init_sober_crypto( + struct totemudpu_instance *instance) +{ + log_printf(instance->totemudpu_log_level_notice, + "Initializing transmit/receive security: libtomcrypt SOBER128/SHA1HMAC (mode 0).\n"); + rng_make_prng (128, PRNG_SOBER, &instance->totemudpu_prng_state, NULL); +} + +#ifdef HAVE_LIBNSS + +static unsigned char *copy_from_iovec( + const struct iovec *iov, + unsigned int iov_len, + size_t *buf_size) +{ + int i; + size_t bufptr; + size_t buflen = 0; + unsigned char *newbuf; + + for (i=0; i buf_size) { + copylen = buf_size - bufptr; + } + memcpy(iov[i].iov_base, buf+bufptr, copylen); + bufptr += copylen; + if (iov[i].iov_len != copylen) { + iov[i].iov_len = copylen; + return; + } + } +} + +static void init_nss_crypto( + struct totemudpu_instance *instance) +{ + PK11SlotInfo* aes_slot = NULL; + PK11SlotInfo* sha1_slot = NULL; + SECItem key_item; + SECStatus rv; + + log_printf(instance->totemudpu_log_level_notice, + "Initializing transmit/receive security: NSS AES128CBC/SHA1HMAC (mode 1).\n"); + rv = NSS_NoDB_Init("."); + if (rv != SECSuccess) + { + log_printf(instance->totemudpu_log_level_security, "NSS initialization failed (err %d)\n", + PR_GetError()); + goto out; + } + + aes_slot = PK11_GetBestSlot(instance->totem_config->crypto_crypt_type, NULL); + if (aes_slot == NULL) + { + log_printf(instance->totemudpu_log_level_security, "Unable to find security slot (err %d)\n", + PR_GetError()); + goto out; + } + + sha1_slot = PK11_GetBestSlot(CKM_SHA_1_HMAC, NULL); + if (sha1_slot == NULL) + { + log_printf(instance->totemudpu_log_level_security, "Unable to find security slot (err %d)\n", + PR_GetError()); + goto out; + } + /* + * Make the private key into a SymKey that we can use + */ + key_item.type = siBuffer; + key_item.data = instance->totem_config->private_key; + key_item.len = 32; /* Use 128 bits */ + + instance->nss_sym_key = PK11_ImportSymKey(aes_slot, + instance->totem_config->crypto_crypt_type, + PK11_OriginUnwrap, CKA_ENCRYPT|CKA_DECRYPT, + &key_item, NULL); + if (instance->nss_sym_key == NULL) + { + log_printf(instance->totemudpu_log_level_security, "Failure to import key into NSS (err %d)\n", + PR_GetError()); + goto out; + } + + instance->nss_sym_key_sign = PK11_ImportSymKey(sha1_slot, + CKM_SHA_1_HMAC, + PK11_OriginUnwrap, CKA_SIGN, + &key_item, NULL); + if (instance->nss_sym_key_sign == NULL) { + log_printf(instance->totemudpu_log_level_security, "Failure to import key into NSS (err %d)\n", + PR_GetError()); + goto out; + } +out: + return; +} + +static int encrypt_and_sign_nss ( + struct totemudpu_instance *instance, + unsigned char *buf, + size_t *buf_len, + const struct iovec *iovec, + unsigned int iov_len) +{ + PK11Context* enc_context = NULL; + SECStatus rv1, rv2; + int tmp1_outlen; + unsigned int tmp2_outlen; + unsigned char *inbuf; + unsigned char *data; + unsigned char *outdata; + size_t datalen; + SECItem no_params; + SECItem iv_item; + struct security_header *header; + SECItem *nss_sec_param; + unsigned char nss_iv_data[16]; + SECStatus rv; + + no_params.type = siBuffer; + no_params.data = 0; + no_params.len = 0; + + tmp1_outlen = tmp2_outlen = 0; + inbuf = copy_from_iovec(iovec, iov_len, &datalen); + if (!inbuf) { + log_printf(instance->totemudpu_log_level_security, "malloc error copying buffer from iovec\n"); + return -1; + } + + data = inbuf + sizeof (struct security_header); + datalen -= sizeof (struct security_header); + + outdata = buf + sizeof (struct security_header); + header = (struct security_header *)buf; + + rv = PK11_GenerateRandom ( + nss_iv_data, + sizeof (nss_iv_data)); + if (rv != SECSuccess) { + log_printf(instance->totemudpu_log_level_security, + "Failure to generate a random number %d\n", + PR_GetError()); + } + + memcpy(header->salt, nss_iv_data, sizeof(nss_iv_data)); + iv_item.type = siBuffer; + iv_item.data = nss_iv_data; + iv_item.len = sizeof (nss_iv_data); + + nss_sec_param = PK11_ParamFromIV ( + instance->totem_config->crypto_crypt_type, + &iv_item); + if (nss_sec_param == NULL) { + log_printf(instance->totemudpu_log_level_security, + "Failure to set up PKCS11 param (err %d)\n", + PR_GetError()); + free (inbuf); + return (-1); + } + + /* + * Create cipher context for encryption + */ + enc_context = PK11_CreateContextBySymKey ( + instance->totem_config->crypto_crypt_type, + CKA_ENCRYPT, + instance->nss_sym_key, + nss_sec_param); + if (!enc_context) { + char err[1024]; + PR_GetErrorText(err); + err[PR_GetErrorTextLength()] = 0; + log_printf(instance->totemudpu_log_level_security, + "PK11_CreateContext failed (encrypt) crypt_type=%d (err %d): %s\n", + instance->totem_config->crypto_crypt_type, + PR_GetError(), err); + free(inbuf); + return -1; + } + rv1 = PK11_CipherOp(enc_context, outdata, + &tmp1_outlen, FRAME_SIZE_MAX - sizeof(struct security_header), + data, datalen); + rv2 = PK11_DigestFinal(enc_context, outdata + tmp1_outlen, &tmp2_outlen, + FRAME_SIZE_MAX - tmp1_outlen); + PK11_DestroyContext(enc_context, PR_TRUE); + + *buf_len = tmp1_outlen + tmp2_outlen; + free(inbuf); +// memcpy(&outdata[*buf_len], nss_iv_data, sizeof(nss_iv_data)); + + if (rv1 != SECSuccess || rv2 != SECSuccess) + goto out; + + /* Now do the digest */ + enc_context = PK11_CreateContextBySymKey(CKM_SHA_1_HMAC, + CKA_SIGN, instance->nss_sym_key_sign, &no_params); + if (!enc_context) { + char err[1024]; + PR_GetErrorText(err); + err[PR_GetErrorTextLength()] = 0; + log_printf(instance->totemudpu_log_level_security, "encrypt: PK11_CreateContext failed (digest) err %d: %s\n", + PR_GetError(), err); + return -1; + } + + + PK11_DigestBegin(enc_context); + + rv1 = PK11_DigestOp(enc_context, outdata - 16, *buf_len + 16); + rv2 = PK11_DigestFinal(enc_context, header->hash_digest, &tmp2_outlen, sizeof(header->hash_digest)); + + PK11_DestroyContext(enc_context, PR_TRUE); + + if (rv1 != SECSuccess || rv2 != SECSuccess) + goto out; + + + *buf_len = *buf_len + sizeof(struct security_header); + SECITEM_FreeItem(nss_sec_param, PR_TRUE); + return 0; + +out: + return -1; +} + + +static int authenticate_and_decrypt_nss ( + struct totemudpu_instance *instance, + struct iovec *iov, + unsigned int iov_len) +{ + PK11Context* enc_context = NULL; + SECStatus rv1, rv2; + int tmp1_outlen; + unsigned int tmp2_outlen; + unsigned char outbuf[FRAME_SIZE_MAX]; + unsigned char digest[HMAC_HASH_SIZE]; + unsigned char *outdata; + int result_len; + unsigned char *data; + unsigned char *inbuf; + size_t datalen; + struct security_header *header = (struct security_header *)iov[0].iov_base; + SECItem no_params; + SECItem ivdata; + + no_params.type = siBuffer; + no_params.data = 0; + no_params.len = 0; + + tmp1_outlen = tmp2_outlen = 0; + if (iov_len > 1) { + inbuf = copy_from_iovec(iov, iov_len, &datalen); + if (!inbuf) { + log_printf(instance->totemudpu_log_level_security, "malloc error copying buffer from iovec\n"); + return -1; + } + } + else { + inbuf = (unsigned char *)iov[0].iov_base; + datalen = iov[0].iov_len; + } + data = inbuf + sizeof (struct security_header) - 16; + datalen = datalen - sizeof (struct security_header) + 16; + + outdata = outbuf + sizeof (struct security_header); + + /* Check the digest */ + enc_context = PK11_CreateContextBySymKey ( + CKM_SHA_1_HMAC, CKA_SIGN, + instance->nss_sym_key_sign, + &no_params); + if (!enc_context) { + char err[1024]; + PR_GetErrorText(err); + err[PR_GetErrorTextLength()] = 0; + log_printf(instance->totemudpu_log_level_security, "PK11_CreateContext failed (check digest) err %d: %s\n", + PR_GetError(), err); + free (inbuf); + return -1; + } + + PK11_DigestBegin(enc_context); + + rv1 = PK11_DigestOp(enc_context, data, datalen); + rv2 = PK11_DigestFinal(enc_context, digest, &tmp2_outlen, sizeof(digest)); + + PK11_DestroyContext(enc_context, PR_TRUE); + + if (rv1 != SECSuccess || rv2 != SECSuccess) { + log_printf(instance->totemudpu_log_level_security, "Digest check failed\n"); + return -1; + } + + if (memcmp(digest, header->hash_digest, tmp2_outlen) != 0) { + log_printf(instance->totemudpu_log_level_error, "Digest does not match\n"); + return -1; + } + + /* + * Get rid of salt + */ + data += 16; + datalen -= 16; + + /* Create cipher context for decryption */ + ivdata.type = siBuffer; + ivdata.data = header->salt; + ivdata.len = sizeof(header->salt); + + enc_context = PK11_CreateContextBySymKey( + instance->totem_config->crypto_crypt_type, + CKA_DECRYPT, + instance->nss_sym_key, &ivdata); + if (!enc_context) { + log_printf(instance->totemudpu_log_level_security, + "PK11_CreateContext (decrypt) failed (err %d)\n", + PR_GetError()); + return -1; + } + + rv1 = PK11_CipherOp(enc_context, outdata, &tmp1_outlen, + sizeof(outbuf) - sizeof (struct security_header), + data, datalen); + if (rv1 != SECSuccess) { + log_printf(instance->totemudpu_log_level_security, + "PK11_CipherOp (decrypt) failed (err %d)\n", + PR_GetError()); + } + rv2 = PK11_DigestFinal(enc_context, outdata + tmp1_outlen, &tmp2_outlen, + sizeof(outbuf) - tmp1_outlen); + PK11_DestroyContext(enc_context, PR_TRUE); + result_len = tmp1_outlen + tmp2_outlen + sizeof (struct security_header); + + /* Copy it back to the buffer */ + copy_to_iovec(iov, iov_len, outbuf, result_len); + if (iov_len > 1) + free(inbuf); + + if (rv1 != SECSuccess || rv2 != SECSuccess) + return -1; + + return 0; +} +#endif + +static int encrypt_and_sign_sober ( + struct totemudpu_instance *instance, + unsigned char *buf, + size_t *buf_len, + const struct iovec *iovec, + unsigned int iov_len) +{ + int i; + unsigned char *addr; + unsigned char keys[48]; + struct security_header *header; + unsigned char *hmac_key = &keys[32]; + unsigned char *cipher_key = &keys[16]; + unsigned char *initial_vector = &keys[0]; + unsigned long len; + size_t outlen = 0; + hmac_state hmac_st; + prng_state keygen_prng_state; + prng_state stream_prng_state; + prng_state *prng_state_in = &instance->totemudpu_prng_state; + + header = (struct security_header *)buf; + addr = buf + sizeof (struct security_header); + + memset (keys, 0, sizeof (keys)); + memset (header->salt, 0, sizeof (header->salt)); + + /* + * Generate MAC, CIPHER, IV keys from private key + */ + sober128_read (header->salt, sizeof (header->salt), prng_state_in); + sober128_start (&keygen_prng_state); + sober128_add_entropy (instance->totemudpu_private_key, + instance->totemudpu_private_key_len, + &keygen_prng_state); + sober128_add_entropy (header->salt, sizeof (header->salt), + &keygen_prng_state); + + sober128_read (keys, sizeof (keys), &keygen_prng_state); + + /* + * Setup stream cipher + */ + sober128_start (&stream_prng_state); + sober128_add_entropy (cipher_key, 16, &stream_prng_state); + sober128_add_entropy (initial_vector, 16, &stream_prng_state); + + outlen = sizeof (struct security_header); + /* + * Copy remainder of message, then encrypt it + */ + for (i = 1; i < iov_len; i++) { + memcpy (addr, iovec[i].iov_base, iovec[i].iov_len); + addr += iovec[i].iov_len; + outlen += iovec[i].iov_len; + } + + /* + * Encrypt message by XORing stream cipher data + */ + sober128_read (buf + sizeof (struct security_header), + outlen - sizeof (struct security_header), + &stream_prng_state); + + memset (&hmac_st, 0, sizeof (hmac_st)); + + /* + * Sign the contents of the message with the hmac key and store signature in message + */ + hmac_init (&hmac_st, DIGEST_SHA1, hmac_key, 16); + + hmac_process (&hmac_st, + buf + HMAC_HASH_SIZE, + outlen - HMAC_HASH_SIZE); + + len = hash_descriptor[DIGEST_SHA1]->hashsize; + + hmac_done (&hmac_st, header->hash_digest, &len); + + *buf_len = outlen; + + return 0; +} + +static int encrypt_and_sign_worker ( + struct totemudpu_instance *instance, + unsigned char *buf, + size_t *buf_len, + const struct iovec *iovec, + unsigned int iov_len) +{ + if (instance->totem_config->crypto_type == TOTEM_CRYPTO_SOBER || + instance->totem_config->crypto_accept == TOTEM_CRYPTO_ACCEPT_OLD) + return encrypt_and_sign_sober(instance, buf, buf_len, iovec, iov_len); +#ifdef HAVE_LIBNSS + if (instance->totem_config->crypto_type == TOTEM_CRYPTO_NSS) + return encrypt_and_sign_nss(instance, buf, buf_len, iovec, iov_len); +#endif + return -1; +} + +static int authenticate_and_decrypt ( + struct totemudpu_instance *instance, + struct iovec *iov, + unsigned int iov_len) +{ + unsigned char type; + unsigned char *endbuf = (unsigned char *)iov[iov_len-1].iov_base; + int res = -1; + + /* + * Get the encryption type and remove it from the buffer + */ + type = endbuf[iov[iov_len-1].iov_len-1]; + iov[iov_len-1].iov_len -= 1; + + if (type == TOTEM_CRYPTO_SOBER) + res = authenticate_and_decrypt_sober(instance, iov, iov_len); + + /* + * Only try higher crypto options if NEW has been requested + */ + if (instance->totem_config->crypto_accept == TOTEM_CRYPTO_ACCEPT_NEW) { +#ifdef HAVE_LIBNSS + if (type == TOTEM_CRYPTO_NSS) + res = authenticate_and_decrypt_nss(instance, iov, iov_len); +#endif + } + + /* + * If it failed, then try decrypting the whole packet as it might be + * from aisexec + */ + if (res == -1) { + iov[iov_len-1].iov_len += 1; + res = authenticate_and_decrypt_sober(instance, iov, iov_len); + } + + return res; +} + +static void init_crypto( + struct totemudpu_instance *instance) +{ + /* + * If we are expecting NEW crypto type then initialise all available + * crypto options. For OLD then we only need SOBER128. + */ + + init_sober_crypto(instance); + + if (instance->totem_config->crypto_accept == TOTEM_CRYPTO_ACCEPT_OLD) + return; + +#ifdef HAVE_LIBNSS + init_nss_crypto(instance); +#endif +} + +int totemudpu_crypto_set ( + void *udpu_context, + unsigned int type) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + int res = 0; + + /* + * Can't set crypto type if OLD is selected + */ + if (instance->totem_config->crypto_accept == TOTEM_CRYPTO_ACCEPT_OLD) { + res = -1; + } else { + /* + * Validate crypto algorithm + */ + switch (type) { + case TOTEM_CRYPTO_SOBER: + log_printf(instance->totemudpu_log_level_security, + "Transmit security set to: libtomcrypt SOBER128/SHA1HMAC (mode 0)"); + break; + case TOTEM_CRYPTO_NSS: + log_printf(instance->totemudpu_log_level_security, + "Transmit security set to: NSS AES128CBC/SHA1HMAC (mode 1)"); + break; + default: + res = -1; + break; + } + } + + return (res); +} + + +static inline void ucast_sendmsg ( + struct totemudpu_instance *instance, + struct totem_ip_address *system_to, + const void *msg, + unsigned int msg_len) +{ + struct msghdr msg_ucast; + int res = 0; + size_t buf_len; + unsigned char sheader[sizeof (struct security_header)]; + unsigned char encrypt_data[FRAME_SIZE_MAX]; + struct iovec iovec_encrypt[2]; + const struct iovec *iovec_sendmsg; + struct sockaddr_storage sockaddr; + struct iovec iovec; + unsigned int iov_len; + int addrlen; + + if (instance->totem_config->secauth == 1) { + iovec_encrypt[0].iov_base = (void *)sheader; + iovec_encrypt[0].iov_len = sizeof (struct security_header); + iovec_encrypt[1].iov_base = (void *)msg; + iovec_encrypt[1].iov_len = msg_len; + + /* + * Encrypt and digest the message + */ + encrypt_and_sign_worker ( + instance, + encrypt_data, + &buf_len, + iovec_encrypt, + 2); + + if (instance->totem_config->crypto_accept == TOTEM_CRYPTO_ACCEPT_NEW) { + encrypt_data[buf_len++] = instance->totem_config->crypto_type; + } + else { + encrypt_data[buf_len++] = 0; + } + + iovec_encrypt[0].iov_base = (void *)encrypt_data; + iovec_encrypt[0].iov_len = buf_len; + iovec_sendmsg = &iovec_encrypt[0]; + iov_len = 1; + } else { + iovec.iov_base = (void *)msg; + iovec.iov_len = msg_len; + iovec_sendmsg = &iovec; + iov_len = 1; + } + + /* + * Build unicast message + */ + totemip_totemip_to_sockaddr_convert(system_to, + instance->totem_interface->ip_port, &sockaddr, &addrlen); + msg_ucast.msg_name = &sockaddr; + msg_ucast.msg_namelen = addrlen; + msg_ucast.msg_iov = (void *) iovec_sendmsg; + msg_ucast.msg_iovlen = iov_len; +#if !defined(COROSYNC_SOLARIS) + msg_ucast.msg_control = 0; + msg_ucast.msg_controllen = 0; + msg_ucast.msg_flags = 0; +#else + msg_ucast.msg_accrights = NULL; + msg_ucast.msg_accrightslen = 0; +#endif + + + /* + * Transmit unicast message + * An error here is recovered by totemsrp + */ + res = sendmsg (instance->token_socket, &msg_ucast, MSG_NOSIGNAL); + if (res < 0) { + char error_str[100]; + strerror_r (errno, error_str, sizeof(error_str)); + log_printf (instance->totemudpu_log_level_debug, + "sendmsg(ucast) failed (non-critical): %s\n", error_str); + } +} + +static inline void mcast_sendmsg ( + struct totemudpu_instance *instance, + const void *msg, + unsigned int msg_len) +{ + struct msghdr msg_mcast; + int res = 0; + size_t buf_len; + unsigned char sheader[sizeof (struct security_header)]; + unsigned char encrypt_data[FRAME_SIZE_MAX]; + struct iovec iovec_encrypt[2]; + struct iovec iovec; + const struct iovec *iovec_sendmsg; + struct sockaddr_storage sockaddr; + unsigned int iov_len; + int addrlen; + struct list_head *list; + struct totemudpu_member *member; + + if (instance->totem_config->secauth == 1) { + iovec_encrypt[0].iov_base = (void *)sheader; + iovec_encrypt[0].iov_len = sizeof (struct security_header); + iovec_encrypt[1].iov_base = (void *)msg; + iovec_encrypt[1].iov_len = msg_len; + + /* + * Encrypt and digest the message + */ + encrypt_and_sign_worker ( + instance, + encrypt_data, + &buf_len, + iovec_encrypt, + 2); + + if (instance->totem_config->crypto_accept == TOTEM_CRYPTO_ACCEPT_NEW) { + encrypt_data[buf_len++] = instance->totem_config->crypto_type; + } + else { + encrypt_data[buf_len++] = 0; + } + + iovec_encrypt[0].iov_base = (void *)encrypt_data; + iovec_encrypt[0].iov_len = buf_len; + iovec_sendmsg = &iovec_encrypt[0]; + iov_len = 1; + } else { + iovec.iov_base = (void *)msg; + iovec.iov_len = msg_len; + + iovec_sendmsg = &iovec; + iov_len = 1; + } + + /* + * Build multicast message + */ + for (list = instance->member_list.next; + list != &instance->member_list; + list = list->next) { + + member = list_entry (list, + struct totemudpu_member, + list); + + totemip_totemip_to_sockaddr_convert(&member->member, + instance->totem_interface->ip_port, &sockaddr, &addrlen); + msg_mcast.msg_name = &sockaddr; + msg_mcast.msg_namelen = addrlen; + msg_mcast.msg_iov = (void *) iovec_sendmsg; + msg_mcast.msg_iovlen = iov_len; + #if !defined(COROSYNC_SOLARIS) + msg_mcast.msg_control = 0; + msg_mcast.msg_controllen = 0; + msg_mcast.msg_flags = 0; + #else + msg_mcast.msg_accrights = NULL; + msg_mcast.msg_accrightslen = 0; + #endif + + /* + * Transmit multicast message + * An error here is recovered by totemsrp + */ + res = sendmsg (member->fd, &msg_mcast, MSG_NOSIGNAL); + if (res < 0) { + char error_str[100]; + strerror_r (errno, error_str, sizeof(error_str)); + log_printf (instance->totemudpu_log_level_debug, + "sendmsg(mcast) failed (non-critical): %s\n", error_str); + } + } +} + +int totemudpu_finalize ( + void *udpu_context) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + int res = 0; + + if (instance->token_socket > 0) { + close (instance->token_socket); + poll_dispatch_delete (instance->totemudpu_poll_handle, + instance->token_socket); + } + + return (res); +} + +static int net_deliver_fn ( + hdb_handle_t handle, + int fd, + int revents, + void *data) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)data; + struct msghdr msg_recv; + struct iovec *iovec; + struct security_header *security_header; + struct sockaddr_storage system_from; + int bytes_received; + int res = 0; + unsigned char *msg_offset; + unsigned int size_delv; + + if (instance->flushing == 1) { + iovec = &instance->totemudpu_iov_recv_flush; + } else { + iovec = &instance->totemudpu_iov_recv; + } + + /* + * Receive datagram + */ + msg_recv.msg_name = &system_from; + msg_recv.msg_namelen = sizeof (struct sockaddr_storage); + msg_recv.msg_iov = iovec; + msg_recv.msg_iovlen = 1; +#if !defined(COROSYNC_SOLARIS) + msg_recv.msg_control = 0; + msg_recv.msg_controllen = 0; + msg_recv.msg_flags = 0; +#else + msg_recv.msg_accrights = NULL; + msg_recv.msg_accrightslen = 0; +#endif + + bytes_received = recvmsg (fd, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT); + if (bytes_received == -1) { + return (0); + } else { + instance->stats_recv += bytes_received; + } + + if ((instance->totem_config->secauth == 1) && + (bytes_received < sizeof (struct security_header))) { + + log_printf (instance->totemudpu_log_level_security, "Received message is too short... ignoring %d.\n", bytes_received); + return (0); + } + + security_header = (struct security_header *)iovec->iov_base; + + iovec->iov_len = bytes_received; + if (instance->totem_config->secauth == 1) { + /* + * Authenticate and if authenticated, decrypt datagram + */ + + res = authenticate_and_decrypt (instance, iovec, 1); + if (res == -1) { + log_printf (instance->totemudpu_log_level_security, "Received message has invalid digest... ignoring.\n"); + log_printf (instance->totemudpu_log_level_security, + "Invalid packet data\n"); + iovec->iov_len = FRAME_SIZE_MAX; + return 0; + } + msg_offset = (unsigned char *)iovec->iov_base + + sizeof (struct security_header); + size_delv = bytes_received - sizeof (struct security_header); + } else { + msg_offset = (void *)iovec->iov_base; + size_delv = bytes_received; + } + + /* + * Handle incoming message + */ + instance->totemudpu_deliver_fn ( + instance->context, + msg_offset, + size_delv); + + iovec->iov_len = FRAME_SIZE_MAX; + return (0); +} + +static int netif_determine ( + struct totemudpu_instance *instance, + struct totem_ip_address *bindnet, + struct totem_ip_address *bound_to, + int *interface_up, + int *interface_num) +{ + int res; + + res = totemip_iface_check (bindnet, bound_to, + interface_up, interface_num, + instance->totem_config->clear_node_high_bit); + + + return (res); +} + + +/* + * If the interface is up, the sockets for totem are built. If the interface is down + * this function is requeued in the timer list to retry building the sockets later. + */ +static void timer_function_netif_check_timeout ( + void *data) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)data; + int res; + int interface_up; + int interface_num; + struct totem_ip_address *bind_address; + + /* + * Build sockets for every interface + */ + netif_determine (instance, + &instance->totem_interface->bindnet, + &instance->totem_interface->boundto, + &interface_up, &interface_num); + /* + * If the network interface isn't back up and we are already + * in loopback mode, add timer to check again and return + */ + if ((instance->netif_bind_state == BIND_STATE_LOOPBACK && + interface_up == 0) || + + (instance->my_memb_entries == 1 && + instance->netif_bind_state == BIND_STATE_REGULAR && + interface_up == 1)) { + + poll_timer_add (instance->totemudpu_poll_handle, + instance->totem_config->downcheck_timeout, + (void *)instance, + timer_function_netif_check_timeout, + &instance->timer_netif_check_timeout); + + /* + * Add a timer to check for a downed regular interface + */ + return; + } + + if (instance->token_socket > 0) { + close (instance->token_socket); + poll_dispatch_delete (instance->totemudpu_poll_handle, + instance->token_socket); + } + + if (interface_up == 0) { + /* + * Interface is not up + */ + instance->netif_bind_state = BIND_STATE_LOOPBACK; + bind_address = &localhost; + + /* + * Add a timer to retry building interfaces and request memb_gather_enter + */ + poll_timer_add (instance->totemudpu_poll_handle, + instance->totem_config->downcheck_timeout, + (void *)instance, + timer_function_netif_check_timeout, + &instance->timer_netif_check_timeout); + } else { + /* + * Interface is up + */ + instance->netif_bind_state = BIND_STATE_REGULAR; + bind_address = &instance->totem_interface->bindnet; + } + /* + * Create and bind the multicast and unicast sockets + */ + res = totemudpu_build_sockets (instance, + bind_address, + &instance->totem_interface->boundto); + + poll_dispatch_add ( + instance->totemudpu_poll_handle, + instance->token_socket, + POLLIN, instance, net_deliver_fn); + + totemip_copy (&instance->my_id, &instance->totem_interface->boundto); + + /* + * This reports changes in the interface to the user and totemsrp + */ + if (instance->netif_bind_state == BIND_STATE_REGULAR) { + if (instance->netif_state_report & NETIF_STATE_REPORT_UP) { + log_printf (instance->totemudpu_log_level_notice, + "The network interface [%s] is now up.\n", + totemip_print (&instance->totem_interface->boundto)); + instance->netif_state_report = NETIF_STATE_REPORT_DOWN; + instance->totemudpu_iface_change_fn (instance->context, &instance->my_id); + } + /* + * Add a timer to check for interface going down in single membership + */ + if (instance->my_memb_entries == 1) { + poll_timer_add (instance->totemudpu_poll_handle, + instance->totem_config->downcheck_timeout, + (void *)instance, + timer_function_netif_check_timeout, + &instance->timer_netif_check_timeout); + } + + } else { + if (instance->netif_state_report & NETIF_STATE_REPORT_DOWN) { + log_printf (instance->totemudpu_log_level_notice, + "The network interface is down.\n"); + instance->totemudpu_iface_change_fn (instance->context, &instance->my_id); + } + instance->netif_state_report = NETIF_STATE_REPORT_UP; + + } +} + +/* Set the socket priority to INTERACTIVE to ensure + that our messages don't get queued behind anything else */ +static void totemudpu_traffic_control_set(struct totemudpu_instance *instance, int sock) +{ +#ifdef SO_PRIORITY + int prio = 6; /* TC_PRIO_INTERACTIVE */ + char error_str[100]; + + if (setsockopt(sock, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(int))) { + strerror_r (errno, error_str, 100); + log_printf (instance->totemudpu_log_level_warning, + "Could not set traffic priority. (%s)\n", error_str); + } +#endif +} + +static int totemudpu_build_sockets_ip ( + struct totemudpu_instance *instance, + struct totem_ip_address *bindnet_address, + struct totem_ip_address *bound_to, + int interface_num) +{ + struct sockaddr_storage sockaddr; + int addrlen; + int res; + + /* + * Setup unicast socket + */ + instance->token_socket = socket (bindnet_address->family, SOCK_DGRAM, 0); + if (instance->token_socket == -1) { + perror ("socket2"); + return (-1); + } + + totemip_nosigpipe (instance->token_socket); + res = fcntl (instance->token_socket, F_SETFL, O_NONBLOCK); + if (res == -1) { + char error_str[100]; + strerror_r (errno, error_str, 100); + log_printf (instance->totemudpu_log_level_warning, + "Could not set non-blocking operation on token socket: %s\n", error_str); + return (-1); + } + + /* + * Bind to unicast socket used for token send/receives + * This has the side effect of binding to the correct interface + */ + totemip_totemip_to_sockaddr_convert(bound_to, instance->totem_interface->ip_port, &sockaddr, &addrlen); + res = bind (instance->token_socket, (struct sockaddr *)&sockaddr, addrlen); + if (res == -1) { + perror ("bind token socket failed"); + return (-1); + } + + return 0; +} + +static int totemudpu_build_sockets ( + struct totemudpu_instance *instance, + struct totem_ip_address *bindnet_address, + struct totem_ip_address *bound_to) +{ + int interface_num; + int interface_up; + int res; + + /* + * Determine the ip address bound to and the interface name + */ + res = netif_determine (instance, + bindnet_address, + bound_to, + &interface_up, + &interface_num); + + if (res == -1) { + return (-1); + } + + totemip_copy(&instance->my_id, bound_to); + + res = totemudpu_build_sockets_ip (instance, + bindnet_address, bound_to, interface_num); + + /* We only send out of the token socket */ + totemudpu_traffic_control_set(instance, instance->token_socket); + return res; +} + +/* + * Totem Network interface - also does encryption/decryption + * depends on poll abstraction, POSIX, IPV4 + */ + +/* + * Create an instance + */ +int totemudpu_initialize ( + hdb_handle_t poll_handle, + void **udpu_context, + struct totem_config *totem_config, + int interface_no, + void *context, + + void (*deliver_fn) ( + void *context, + const void *msg, + unsigned int msg_len), + + void (*iface_change_fn) ( + void *context, + const struct totem_ip_address *iface_address), + + void (*target_set_completed) ( + void *context)) +{ + struct totemudpu_instance *instance; + + instance = malloc (sizeof (struct totemudpu_instance)); + if (instance == NULL) { + return (-1); + } + + totemudpu_instance_initialize (instance); + + instance->totem_config = totem_config; + /* + * Configure logging + */ + instance->totemudpu_log_level_security = 1; //totem_config->totem_logging_configuration.log_level_security; + instance->totemudpu_log_level_error = totem_config->totem_logging_configuration.log_level_error; + instance->totemudpu_log_level_warning = totem_config->totem_logging_configuration.log_level_warning; + instance->totemudpu_log_level_notice = totem_config->totem_logging_configuration.log_level_notice; + instance->totemudpu_log_level_debug = totem_config->totem_logging_configuration.log_level_debug; + instance->totemudpu_subsys_id = totem_config->totem_logging_configuration.log_subsys_id; + instance->totemudpu_log_printf = totem_config->totem_logging_configuration.log_printf; + + /* + * Initialize random number generator for later use to generate salt + */ + memcpy (instance->totemudpu_private_key, totem_config->private_key, + totem_config->private_key_len); + + instance->totemudpu_private_key_len = totem_config->private_key_len; + + init_crypto(instance); + + /* + * Initialize local variables for totemudpu + */ + instance->totem_interface = &totem_config->interfaces[interface_no]; + memset (instance->iov_buffer, 0, FRAME_SIZE_MAX); + + instance->totemudpu_poll_handle = poll_handle; + + instance->totem_interface->bindnet.nodeid = instance->totem_config->node_id; + + instance->context = context; + instance->totemudpu_deliver_fn = deliver_fn; + + instance->totemudpu_iface_change_fn = iface_change_fn; + + instance->totemudpu_target_set_completed = target_set_completed; + + totemip_localhost (AF_INET, &localhost); + localhost.nodeid = instance->totem_config->node_id; + + /* + * RRP layer isn't ready to receive message because it hasn't + * initialized yet. Add short timer to check the interfaces. + */ + poll_timer_add (instance->totemudpu_poll_handle, + 100, + (void *)instance, + timer_function_netif_check_timeout, + &instance->timer_netif_check_timeout); + + *udpu_context = instance; + return (0); +} + +int totemudpu_processor_count_set ( + void *udpu_context, + int processor_count) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + int res = 0; + + instance->my_memb_entries = processor_count; + poll_timer_delete (instance->totemudpu_poll_handle, + instance->timer_netif_check_timeout); + if (processor_count == 1) { + poll_timer_add (instance->totemudpu_poll_handle, + instance->totem_config->downcheck_timeout, + (void *)instance, + timer_function_netif_check_timeout, + &instance->timer_netif_check_timeout); + } + + return (res); +} + +int totemudpu_recv_flush (void *udpu_context) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + struct pollfd ufd; + int nfds; + int res = 0; + + instance->flushing = 1; + + do { + ufd.fd = instance->token_socket; + ufd.events = POLLIN; + nfds = poll (&ufd, 1, 0); + if (nfds == 1 && ufd.revents & POLLIN) { + net_deliver_fn (0, instance->token_socket, + ufd.revents, instance); + } + } while (nfds == 1); + + instance->flushing = 0; + + return (res); +} + +int totemudpu_send_flush (void *udpu_context) +{ + int res = 0; + + return (res); +} + +int totemudpu_token_send ( + void *udpu_context, + const void *msg, + unsigned int msg_len) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + int res = 0; + + ucast_sendmsg (instance, &instance->token_target, msg, msg_len); + + return (res); +} +int totemudpu_mcast_flush_send ( + void *udpu_context, + const void *msg, + unsigned int msg_len) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + int res = 0; + + mcast_sendmsg (instance, msg, msg_len); + + return (res); +} + +int totemudpu_mcast_noflush_send ( + void *udpu_context, + const void *msg, + unsigned int msg_len) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + int res = 0; + + mcast_sendmsg (instance, msg, msg_len); + + return (res); +} + +extern int totemudpu_iface_check (void *udpu_context) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + int res = 0; + + timer_function_netif_check_timeout (instance); + + return (res); +} + +extern void totemudpu_net_mtu_adjust (void *udpu_context, struct totem_config *totem_config) +{ +#define UDPIP_HEADER_SIZE (20 + 8) /* 20 bytes for ip 8 bytes for udp */ + if (totem_config->secauth == 1) { + totem_config->net_mtu -= sizeof (struct security_header) + + UDPIP_HEADER_SIZE; + } else { + totem_config->net_mtu -= UDPIP_HEADER_SIZE; + } +} + +const char *totemudpu_iface_print (void *udpu_context) { + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + const char *ret_char; + + ret_char = totemip_print (&instance->my_id); + + return (ret_char); +} + +int totemudpu_iface_get ( + void *udpu_context, + struct totem_ip_address *addr) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + int res = 0; + + memcpy (addr, &instance->my_id, sizeof (struct totem_ip_address)); + + return (res); +} + +int totemudpu_token_target_set ( + void *udpu_context, + const struct totem_ip_address *token_target) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + int res = 0; + + memcpy (&instance->token_target, token_target, + sizeof (struct totem_ip_address)); + + instance->totemudpu_target_set_completed (instance->context); + + return (res); +} + +extern int totemudpu_recv_mcast_empty ( + void *udpu_context) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + unsigned int res; + struct sockaddr_storage system_from; + struct msghdr msg_recv; + struct pollfd ufd; + int nfds; + int msg_processed = 0; + + /* + * Receive datagram + */ + msg_recv.msg_name = &system_from; + msg_recv.msg_namelen = sizeof (struct sockaddr_storage); + msg_recv.msg_iov = &instance->totemudpu_iov_recv_flush; + msg_recv.msg_iovlen = 1; +#if !defined(COROSYNC_SOLARIS) + msg_recv.msg_control = 0; + msg_recv.msg_controllen = 0; + msg_recv.msg_flags = 0; +#else + msg_recv.msg_accrights = NULL; + msg_recv.msg_accrightslen = 0; +#endif + + do { + ufd.fd = instance->token_socket; + ufd.events = POLLIN; + nfds = poll (&ufd, 1, 0); + if (nfds == 1 && ufd.revents & POLLIN) { + res = recvmsg (instance->token_socket, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT); + if (res != -1) { + msg_processed = 1; + } else { + msg_processed = -1; + } + } + } while (nfds == 1); + + return (msg_processed); +} + +int totemudpu_member_add ( + void *udpu_context, + const struct totem_ip_address *member) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + + struct totemudpu_member *new_member; + int res; + char error_str[100]; + + new_member = malloc (sizeof (struct totemudpu_member)); + if (new_member == NULL) { + return (-1); + } + list_init (&new_member->list); + list_add_tail (&new_member->list, &instance->member_list); + memcpy (&new_member->member, member, sizeof (struct totem_ip_address)); + new_member->fd = socket (member->family, SOCK_DGRAM, 0); + if (new_member->fd == -1) { + strerror_r (errno, error_str, 100); + log_printf (instance->totemudpu_log_level_warning, + "Could not create socket for new member: %s\n", error_str); + return (-1); + } + totemip_nosigpipe (new_member->fd); + res = fcntl (new_member->fd, F_SETFL, O_NONBLOCK); + if (res == -1) { + strerror_r (errno, error_str, 100); + log_printf (instance->totemudpu_log_level_warning, + "Could not set non-blocking operation on token socket: %s\n", error_str); + return (-1); + } + return (0); +} + +int totemudpu_member_remove ( + void *udpu_context, + const struct totem_ip_address *token_target) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + + instance = NULL; + return (0); +} diff --git a/exec/totemudpu.h b/exec/totemudpu.h new file mode 100644 index 00000000..2dcad248 --- /dev/null +++ b/exec/totemudpu.h @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2005 MontaVista Software, Inc. + * Copyright (c) 2006-2010 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Steven Dake (sdake@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef TOTEMUDPU_H_DEFINED +#define TOTEMUDPU_H_DEFINED + +#include +#include +#include + +#include + +/* + * Create an instance + */ +extern int totemudpu_initialize ( + hdb_handle_t poll_handle, + void **udpu_context, + struct totem_config *totem_config, + int interface_no, + void *context, + + void (*deliver_fn) ( + void *context, + const void *msg, + unsigned int msg_len), + + void (*iface_change_fn) ( + void *context, + const struct totem_ip_address *iface_address), + + void (*target_set_completed) ( + void *context)); + +extern int totemudpu_processor_count_set ( + void *udpu_context, + int processor_count); + +extern int totemudpu_token_send ( + void *udpu_context, + const void *msg, + unsigned int msg_len); + +extern int totemudpu_mcast_flush_send ( + void *udpu_context, + const void *msg, + unsigned int msg_len); + +extern int totemudpu_mcast_noflush_send ( + void *udpu_context, + const void *msg, + unsigned int msg_len); + +extern int totemudpu_recv_flush (void *udpu_context); + +extern int totemudpu_send_flush (void *udpu_context); + +extern int totemudpu_iface_check (void *udpu_context); + +extern int totemudpu_finalize (void *udpu_context); + +extern void totemudpu_net_mtu_adjust (void *udpu_context, struct totem_config *totem_config); + +extern const char *totemudpu_iface_print (void *udpu_context); + +extern int totemudpu_iface_get ( + void *udpu_context, + struct totem_ip_address *addr); + +extern int totemudpu_token_target_set ( + void *udpu_context, + const struct totem_ip_address *token_target); + +extern int totemudpu_crypto_set ( + void *udpu_context, + unsigned int type); + +extern int totemudpu_recv_mcast_empty ( + void *udpu_context); + +extern int totemudpu_member_add ( + void *udpu_context, + const struct totem_ip_address *member); + +extern int totemudpu_member_remove ( + void *udpu_context, + const struct totem_ip_address *member); + +#endif /* TOTEMUDPU_H_DEFINED */ diff --git a/include/corosync/totem/totem.h b/include/corosync/totem/totem.h index 80bbf5c1..b84d9ba5 100644 --- a/include/corosync/totem/totem.h +++ b/include/corosync/totem/totem.h @@ -57,6 +57,9 @@ struct totem_interface { struct totem_ip_address boundto; struct totem_ip_address mcast_addr; uint16_t ip_port; + int member_count; + struct totem_ip_address member_list[PROCESSOR_COUNT_MAX]; + }; struct totem_logging_configuration { @@ -79,6 +82,12 @@ struct totem_logging_configuration { enum { TOTEM_PRIVATE_KEY_LEN = 128 }; enum { TOTEM_RRP_MODE_BYTES = 64 }; +typedef enum { + TOTEM_TRANSPORT_UDP = 0, + TOTEM_TRANSPORT_UDPU = 1, + TOTEM_TRANSPORT_RDMA = 2 +} totem_transport_t; + struct totem_config { int version; @@ -164,7 +173,7 @@ struct totem_config { int crypto_crypt_type; int crypto_sign_type; - int transport_number; + totem_transport_t transport_number; }; #define TOTEM_CONFIGURATION_TYPE diff --git a/include/corosync/totem/totempg.h b/include/corosync/totem/totempg.h index fbf71fb3..26447aa6 100644 --- a/include/corosync/totem/totempg.h +++ b/include/corosync/totem/totempg.h @@ -160,6 +160,14 @@ extern int totempg_ring_reenable (void); extern void totempg_service_ready_register ( void (*totem_service_ready) (void)); +extern int totempg_member_add ( + const struct totem_ip_address *member, + int ring_no); + +extern int totempg_member_remove ( + const struct totem_ip_address *member, + int ring_no); + #ifdef __cplusplus } #endif diff --git a/man/corosync.conf.5 b/man/corosync.conf.5 index 5d3f7b5e..8fc3dc6f 100644 --- a/man/corosync.conf.5 +++ b/man/corosync.conf.5 @@ -1,6 +1,6 @@ .\"/* .\" * Copyright (c) 2005 MontaVista Software, Inc. -.\" * Copyright (c) 2006-2009 Red Hat, Inc. +.\" * Copyright (c) 2006-2010 Red Hat, Inc. .\" * .\" * All rights reserved. .\" * @@ -77,7 +77,8 @@ option which is required: .PP Within the .B interface -sub-directive of totem there are four parameters which are required: +sub-directive of totem there are four parameters which are required. There is +one parameter which is optional. .TP ringnumber @@ -126,6 +127,13 @@ mcastport - 1 (for mcast sends). If you have multiple clusters on the same network using the same mcastaddr please configure the mcastports with a gap. +.TP +member +This specifies a member on the interface and used with the udpu transport only. +Every node that should be a member of the membership should be specified as +a separate member directive. Within the member directive there is a parameter +memberaddr which specifies the ip address of one of the nodes. + .PP .PP Within the @@ -258,11 +266,13 @@ The default is ykd. The vsftype can also be set to none. .TP transport This directive controls the transport mechanism used. If the interface to -which corosync is binding is Infiniband, you can specify the "iba" option. Any -other option is ignored. Note Infiniband interfaces will use RDMA transport -techniques and perform at higher bandwidths and lower latency than gige networks. +which corosync is binding is an RDMA interface such as RoCEE or Infiniband, the +"iba" parameter may be specified. To avoid the use of multicast entirely, a +unicast transport parameter "udpu" can be specified. This requires specifying +the list of members that could potentially make up the membership before +deployment. -The default is udp. The transport type can also be set to iba. +The default is udp. The transport type can also be set to udpu or iba. Within the .B totem