From bb05aed93f6bd378c0c7378f6070bb5080ded0a8 Mon Sep 17 00:00:00 2001 From: Steven Dake Date: Thu, 18 Nov 2010 09:31:49 -0700 Subject: [PATCH 1/2] Add the UDPU transport The UDPU transport is useful for those deployments which can't use multicast. UDPU works by using UDP unicast, which is fully supported by every switch manufacturer by default and doesn't rely on a functional IGMP implementation. An example of the UDPU transport is contained in the corosync.conf.example.udpu file which shows a 16 node cluster. This file should be copied to each node in the cluster and IP addresses changed as appropriate. Amended to remove dead udpu REUSEADDR socket option. Signed-off-by: Steven Dake --- Makefile.am | 5 +- conf/corosync.conf.example.udpu | 75 ++ corosync.spec.in | 1 + exec/Makefile.am | 12 +- exec/totemconfig.c | 35 +- exec/totemmrp.c | 22 + exec/totemmrp.h | 8 + exec/totemnet.c | 71 +- exec/totemnet.h | 8 + exec/totempg.c | 7 + exec/totemrrp.c | 139 ++- exec/totemrrp.h | 10 + exec/totemsrp.c | 33 + exec/totemsrp.h | 10 + exec/totemudpu.c | 1720 ++++++++++++++++++++++++++++++ exec/totemudpu.h | 120 +++ include/corosync/totem/totem.h | 11 +- include/corosync/totem/totempg.h | 8 + man/corosync.conf.5 | 22 +- 19 files changed, 2286 insertions(+), 31 deletions(-) create mode 100644 conf/corosync.conf.example.udpu create mode 100644 exec/totemudpu.c create mode 100644 exec/totemudpu.h diff --git a/Makefile.am b/Makefile.am index c944d8eb..d7072353 100644 --- a/Makefile.am +++ b/Makefile.am @@ -33,8 +33,9 @@ SPEC = $(PACKAGE_NAME).spec TARFILE = $(PACKAGE_NAME)-$(VERSION).tar.gz -EXTRA_DIST = autogen.sh conf/corosync.conf.example $(SPEC).in \ - conf/lenses/tests/test_corosync.aug conf/lenses/corosync.aug +EXTRA_DIST = autogen.sh conf/corosync.conf.example conf/corosync.conf.example.udpu \ + $(SPEC).in conf/lenses/tests/test_corosync.aug \ + conf/lenses/corosync.aug AUTOMAKE_OPTIONS = foreign diff --git a/conf/corosync.conf.example.udpu b/conf/corosync.conf.example.udpu new file mode 100644 index 00000000..8bfbc6e8 --- /dev/null +++ b/conf/corosync.conf.example.udpu @@ -0,0 +1,75 @@ +# Please read the corosync.conf.5 manual page +compatibility: whitetank + +totem { + version: 2 + secauth: off + interface { + member { + memberaddr: 10.16.35.101 + } + member { + memberaddr: 10.16.35.102 + } + member { + memberaddr: 10.16.35.103 + } + member { + memberaddr: 10.16.35.104 + } + member { + memberaddr: 10.16.35.105 + } + member { + memberaddr: 10.16.35.106 + } + member { + memberaddr: 10.16.35.107 + } + member { + memberaddr: 10.16.35.108 + } + member { + memberaddr: 10.16.35.109 + } + member { + memberaddr: 10.16.35.110 + } + member { + memberaddr: 10.16.35.111 + } + member { + memberaddr: 10.16.35.112 + } + member { + memberaddr: 10.16.35.113 + } + member { + memberaddr: 10.16.35.114 + } + member { + memberaddr: 10.16.35.115 + } + member { + memberaddr: 10.16.35.116 + } + ringnumber: 0 + bindnetaddr: 10.16.35.0 + mcastport: 5405 + } + transport: udpu +} + +logging { + fileline: off + to_logfile: yes + to_syslog: yes + debug: on + logfile: /var/log/cluster/corosync.log + debug: off + timestamp: on + logger_subsys { + subsys: AMF + debug: off + } +} diff --git a/corosync.spec.in b/corosync.spec.in index ed531c3f..e2174d9a 100644 --- a/corosync.spec.in +++ b/corosync.spec.in @@ -110,6 +110,7 @@ fi %dir %{_sysconfdir}/corosync/service.d %dir %{_sysconfdir}/corosync/uidgid.d %config(noreplace) %{_sysconfdir}/corosync/corosync.conf.example +%config(noreplace) %{_sysconfdir}/corosync/corosync.conf.example.udpu %{_initrddir}/corosync %dir %{_libexecdir}/lcrso %{_libexecdir}/lcrso/coroparse.lcrso diff --git a/exec/Makefile.am b/exec/Makefile.am index 938237cf..39a72139 100644 --- a/exec/Makefile.am +++ b/exec/Makefile.am @@ -36,8 +36,8 @@ AM_CFLAGS = -fPIC INCLUDES = -I$(top_builddir)/include -I$(top_srcdir)/include $(nss_CFLAGS) $(rdmacm_CFLAGS) $(ibverbs_CFLAGS) TOTEM_SRC = coropoll.c totemip.c totemnet.c totemudp.c \ - totemrrp.c totemsrp.c totemmrp.c totempg.c \ - crypto.c wthread.c tsafe.c + totemudpu.c totemrrp.c totemsrp.c totemmrp.c \ + totempg.c crypto.c wthread.c tsafe.c if BUILD_RDMA TOTEM_SRC += totemiba.c endif @@ -72,10 +72,10 @@ SHARED_LIBS_SO = $(SHARED_LIBS:%.so.$(SONAME)=%.so) SHARED_LIBS_SO_TWO = $(SHARED_LIBS:%.so.$(SONAME)=%.so.$(SOMAJOR)) noinst_HEADERS = apidef.h crypto.h mainconfig.h main.h tsafe.h \ - quorum.h service.h sync.h timer.h tlist.h totemconfig.h \ - totemmrp.h totemnet.h totemudp.h totemiba.h totemrrp.h \ - totemsrp.h util.h vsf.h wthread.h schedwrk.h \ - evil.h syncv2.h fsm.h + quorum.h service.h sync.h timer.h tlist.h \ + totemconfig.h totemmrp.h totemnet.h totemudp.h \ + totemudpu.h totemiba.h totemrrp.h totemsrp.h util.h \ + vsf.h wthread.h schedwrk.h evil.h syncv2.h fsm.h EXTRA_DIST = $(LCRSO_SRC) diff --git a/exec/totemconfig.c b/exec/totemconfig.c index ad1b1166..7a30afdb 100644 --- a/exec/totemconfig.c +++ b/exec/totemconfig.c @@ -270,10 +270,13 @@ extern int totem_config_read ( int res = 0; hdb_handle_t object_totem_handle; hdb_handle_t object_interface_handle; + hdb_handle_t object_member_handle; const char *str; unsigned int ringnumber = 0; hdb_handle_t object_find_interface_handle; + hdb_handle_t object_find_member_handle; const char *transport_type; + int member_count = 0; res = totem_handle_find (objdb, &object_totem_handle); if (res == -1) { @@ -349,6 +352,8 @@ printf ("couldn't find totem handle\n"); object_find_interface_handle, &object_interface_handle) == 0) { + member_count = 0; + objdb_get_int (objdb, object_interface_handle, "ringnumber", &ringnumber); /* @@ -384,6 +389,22 @@ printf ("couldn't find totem handle\n"); res = totemip_parse (&totem_config->interfaces[ringnumber].bindnet, str, totem_config->interfaces[ringnumber].mcast_addr.family); } + objdb->object_find_create ( + object_interface_handle, + "member", + strlen ("member"), + &object_find_member_handle); + + while (objdb->object_find_next ( + object_find_member_handle, + &object_member_handle) == 0) { + + if (!objdb_get_string (objdb, object_member_handle, "memberaddr", &str)) { + res = totemip_parse (&totem_config->interfaces[ringnumber].member_list[member_count++], str, 0); + } + + } + totem_config->interfaces[ringnumber].member_count = member_count; totem_config->interface_count++; } @@ -391,11 +412,16 @@ printf ("couldn't find totem handle\n"); add_totem_config_notification(objdb, totem_config, object_totem_handle); - totem_config->transport_number = 0; + totem_config->transport_number = TOTEM_TRANSPORT_UDP; objdb_get_string (objdb, object_totem_handle, "transport", &transport_type); + if (transport_type) { + if (strcmp (transport_type, "udpu") == 0) { + totem_config->transport_number = TOTEM_TRANSPORT_UDPU; + } + } if (transport_type) { if (strcmp (transport_type, "iba") == 0) { - totem_config->transport_number = 1; + totem_config->transport_number = TOTEM_TRANSPORT_RDMA; } } @@ -425,7 +451,8 @@ int totem_config_validate ( struct totem_ip_address null_addr; memset (&null_addr, 0, sizeof (struct totem_ip_address)); - if (memcmp (&totem_config->interfaces[i].mcast_addr, &null_addr, + if ((totem_config->transport_number == 0) && + memcmp (&totem_config->interfaces[i].mcast_addr, &null_addr, sizeof (struct totem_ip_address)) == 0) { error_reason = "No multicast address specified"; goto parse_error; @@ -443,7 +470,7 @@ int totem_config_validate ( goto parse_error; } - if (totem_config->broadcast_use == 0) { + if (totem_config->broadcast_use == 0 && totem_config->transport_number == 0) { if (totem_config->interfaces[i].mcast_addr.family != totem_config->interfaces[i].bindnet.family) { error_reason = "Multicast address family does not match bind address family"; goto parse_error; diff --git a/exec/totemmrp.c b/exec/totemmrp.c index dceb44ab..75ddf986 100644 --- a/exec/totemmrp.c +++ b/exec/totemmrp.c @@ -245,3 +245,25 @@ extern void totemmrp_service_ready_register ( totemsrp_context, totem_service_ready); } + +int totemmrp_member_add ( + const struct totem_ip_address *member, + int ring_no) +{ + int res; + + res = totemsrp_member_add (totemsrp_context, member, ring_no); + + return (res); +} + +int totemmrp_member_remove ( + const struct totem_ip_address *member, + int ring_no) +{ + int res; + + res = totemsrp_member_remove (totemsrp_context, member, ring_no); + + return (res); +} diff --git a/exec/totemmrp.h b/exec/totemmrp.h index f9b19111..d3510723 100644 --- a/exec/totemmrp.h +++ b/exec/totemmrp.h @@ -116,4 +116,12 @@ extern int totemmrp_ring_reenable (void); extern void totemmrp_service_ready_register ( void (*totem_service_ready) (void)); +extern int totemmrp_member_add ( + const struct totem_ip_address *member, + int ring_no); + +extern int totemmrp_member_remove ( + const struct totem_ip_address *member, + int ring_no); + #endif /* TOTEMMRP_H_DEFINED */ diff --git a/exec/totemnet.c b/exec/totemnet.c index 5853e665..c7670f93 100644 --- a/exec/totemnet.c +++ b/exec/totemnet.c @@ -39,6 +39,7 @@ #include #endif #include +#include #include #define LOGSYS_UTILS_ONLY 1 @@ -112,11 +113,19 @@ struct transport { int (*recv_mcast_empty) ( void *transport_context); + + int (*member_add) ( + void *transport_context, + const struct totem_ip_address *member); + + int (*member_remove) ( + void *transport_context, + const struct totem_ip_address *member); }; struct transport transport_entries[] = { { - .name = "UDP/IP", + .name = "UDP/IP Multicast", .initialize = totemudp_initialize, .processor_count_set = totemudp_processor_count_set, .token_send = totemudp_token_send, @@ -133,6 +142,26 @@ struct transport transport_entries[] = { .crypto_set = totemudp_crypto_set, .recv_mcast_empty = totemudp_recv_mcast_empty }, + { + .name = "UDP/IP Unicast", + .initialize = totemudpu_initialize, + .processor_count_set = totemudpu_processor_count_set, + .token_send = totemudpu_token_send, + .mcast_flush_send = totemudpu_mcast_flush_send, + .mcast_noflush_send = totemudpu_mcast_noflush_send, + .recv_flush = totemudpu_recv_flush, + .send_flush = totemudpu_send_flush, + .iface_check = totemudpu_iface_check, + .finalize = totemudpu_finalize, + .net_mtu_adjust = totemudpu_net_mtu_adjust, + .iface_print = totemudpu_iface_print, + .iface_get = totemudpu_iface_get, + .token_target_set = totemudpu_token_target_set, + .crypto_set = totemudpu_crypto_set, + .recv_mcast_empty = totemudpu_recv_mcast_empty, + .member_add = totemudpu_member_add, + .member_remove = totemudpu_member_remove + }, #ifdef HAVE_RDMA { .name = "Infiniband/IP", @@ -192,13 +221,7 @@ static void totemnet_instance_initialize ( instance->totemnet_subsys_id = config->totem_logging_configuration.log_subsys_id; - transport = 0; - -#ifdef HAVE_RDMA - if (config->transport_number == 1) { - transport = 1; - } -#endif + transport = config->transport_number; log_printf (LOGSYS_LEVEL_NOTICE, "Initializing transport (%s).\n", transport_entries[transport].name); @@ -403,3 +426,35 @@ extern int totemnet_recv_mcast_empty ( return (res); } + +extern int totemnet_member_add ( + void *net_context, + const struct totem_ip_address *member) +{ + struct totemnet_instance *instance = (struct totemnet_instance *)net_context; + unsigned int res = 0; + + if (instance->transport->member_add) { + res = instance->transport->member_add ( + instance->transport_context, + member); + } + + return (res); +} + +extern int totemnet_member_remove ( + void *net_context, + const struct totem_ip_address *member) +{ + struct totemnet_instance *instance = (struct totemnet_instance *)net_context; + unsigned int res = 0; + + if (instance->transport->member_remove) { + res = instance->transport->member_remove ( + instance->transport_context, + member); + } + + return (res); +} diff --git a/exec/totemnet.h b/exec/totemnet.h index 96f063d2..7e6374c3 100644 --- a/exec/totemnet.h +++ b/exec/totemnet.h @@ -115,4 +115,12 @@ extern int totemnet_crypto_set ( extern int totemnet_recv_mcast_empty ( void *net_context); +extern int totemnet_member_add ( + void *net_context, + const struct totem_ip_address *member); + +extern int totemnet_member_remove ( + void *net_context, + const struct totem_ip_address *member); + #endif /* TOTEMNET_H_DEFINED */ diff --git a/exec/totempg.c b/exec/totempg.c index 10effc8b..fb216953 100644 --- a/exec/totempg.c +++ b/exec/totempg.c @@ -1377,3 +1377,10 @@ extern void totempg_service_ready_register ( totemmrp_service_ready_register (totem_service_ready); } +extern int totempg_member_add ( + const struct totem_ip_address *member, + int ring_no); + +extern int totempg_member_remove ( + const struct totem_ip_address *member, + int ring_no); diff --git a/exec/totemrrp.c b/exec/totemrrp.c index 0b803512..a8ebd087 100644 --- a/exec/totemrrp.c +++ b/exec/totemrrp.c @@ -166,6 +166,17 @@ struct rrp_algo { int (*mcast_recv_empty) ( struct totemrrp_instance *instance); + + int (*member_add) ( + struct totemrrp_instance *instance, + const struct totem_ip_address *member, + unsigned int iface_no); + + int (*member_remove) ( + struct totemrrp_instance *instance, + const struct totem_ip_address *member, + unsigned int iface_no); + }; struct totemrrp_instance { @@ -289,6 +300,15 @@ static void none_ring_reenable ( static int none_mcast_recv_empty ( struct totemrrp_instance *instance); +static int none_member_add ( + struct totemrrp_instance *instance, + const struct totem_ip_address *member, + unsigned int iface_no); + +static int none_member_remove ( + struct totemrrp_instance *instance, + const struct totem_ip_address *member, + unsigned int iface_no); /* * Passive Replication Forward Declerations */ @@ -350,6 +370,15 @@ static void passive_ring_reenable ( static int passive_mcast_recv_empty ( struct totemrrp_instance *instance); +static int passive_member_add ( + struct totemrrp_instance *instance, + const struct totem_ip_address *member, + unsigned int iface_no); + +static int passive_member_remove ( + struct totemrrp_instance *instance, + const struct totem_ip_address *member, + unsigned int iface_no); /* * Active Replication Forward Definitions */ @@ -411,6 +440,16 @@ static void active_ring_reenable ( static int active_mcast_recv_empty ( struct totemrrp_instance *instance); +static int active_member_add ( + struct totemrrp_instance *instance, + const struct totem_ip_address *member, + unsigned int iface_no); + +static int active_member_remove ( + struct totemrrp_instance *instance, + const struct totem_ip_address *member, + unsigned int iface_no); + static void active_timer_expired_token_start ( struct active_instance *active_instance); @@ -437,7 +476,9 @@ struct rrp_algo none_algo = { .processor_count_set = none_processor_count_set, .token_target_set = none_token_target_set, .ring_reenable = none_ring_reenable, - .mcast_recv_empty = none_mcast_recv_empty + .mcast_recv_empty = none_mcast_recv_empty, + .member_add = none_member_add, + .member_remove = none_member_remove }; struct rrp_algo passive_algo = { @@ -454,7 +495,9 @@ struct rrp_algo passive_algo = { .processor_count_set = passive_processor_count_set, .token_target_set = passive_token_target_set, .ring_reenable = passive_ring_reenable, - .mcast_recv_empty = passive_mcast_recv_empty + .mcast_recv_empty = passive_mcast_recv_empty, + .member_add = passive_member_add, + .member_remove = passive_member_remove }; struct rrp_algo active_algo = { @@ -471,7 +514,9 @@ struct rrp_algo active_algo = { .processor_count_set = active_processor_count_set, .token_target_set = active_token_target_set, .ring_reenable = active_ring_reenable, - .mcast_recv_empty = active_mcast_recv_empty + .mcast_recv_empty = active_mcast_recv_empty, + .member_add = active_member_add, + .member_remove = active_member_remove }; struct rrp_algo *rrp_algos[] = { @@ -598,6 +643,27 @@ static int none_mcast_recv_empty ( return (res); } +static int none_member_add ( + struct totemrrp_instance *instance, + const struct totem_ip_address *member, + unsigned int iface_no) +{ + int res; + res = totemnet_member_add (instance->net_handles[0], member); + return (res); +} + +static int none_member_remove ( + struct totemrrp_instance *instance, + const struct totem_ip_address *member, + unsigned int iface_no) +{ + int res; + res = totemnet_member_remove (instance->net_handles[0], member); + return (res); +} + + /* * Passive Replication Implementation */ @@ -947,6 +1013,27 @@ static int passive_mcast_recv_empty ( return (msgs_emptied); } +static int passive_member_add ( + struct totemrrp_instance *instance, + const struct totem_ip_address *member, + unsigned int iface_no) +{ + int res; + res = totemnet_member_add (instance->net_handles[iface_no], member); + return (res); +} + +static int passive_member_remove ( + struct totemrrp_instance *instance, + const struct totem_ip_address *member, + unsigned int iface_no) +{ + int res; + res = totemnet_member_remove (instance->net_handles[iface_no], member); + return (res); +} + + static void passive_ring_reenable ( struct totemrrp_instance *instance) { @@ -1264,6 +1351,26 @@ static void active_send_flush (struct totemrrp_instance *instance) } } +static int active_member_add ( + struct totemrrp_instance *instance, + const struct totem_ip_address *member, + unsigned int iface_no) +{ + int res; + res = totemnet_member_add (instance->net_handles[iface_no], member); + return (res); +} + +static int active_member_remove ( + struct totemrrp_instance *instance, + const struct totem_ip_address *member, + unsigned int iface_no) +{ + int res; + res = totemnet_member_remove (instance->net_handles[iface_no], member); + return (res); +} + static void active_iface_check (struct totemrrp_instance *instance) { struct active_instance *rrp_algo_instance = (struct active_instance *)instance->rrp_algo_instance; @@ -1550,7 +1657,6 @@ int totemrrp_initialize ( totemnet_net_mtu_adjust (instance->net_handles[i], totem_config); } - *rrp_context = instance; return (0); @@ -1707,3 +1813,28 @@ extern int totemrrp_mcast_recv_empty ( return (res); } +int totemrrp_member_add ( + void *rrp_context, + const struct totem_ip_address *member, + int iface_no) +{ + struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context; + int res; + + res = instance->rrp_algo->member_add (instance, member, iface_no); + + return (res); +} + +int totemrrp_member_remove ( + void *rrp_context, + const struct totem_ip_address *member, + int iface_no) +{ + struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context; + int res; + + res = instance->rrp_algo->member_remove (instance, member, iface_no); + + return (res); +} diff --git a/exec/totemrrp.h b/exec/totemrrp.h index 5fa0a0ff..da79ed2a 100644 --- a/exec/totemrrp.h +++ b/exec/totemrrp.h @@ -128,4 +128,14 @@ extern int totemrrp_ring_reenable ( extern int totemrrp_mcast_recv_empty ( void *rrp_context); +extern int totemrrp_member_add ( + void *net_context, + const struct totem_ip_address *member, + int iface_no); + +extern int totemrrp_member_remove ( + void *net_context, + const struct totem_ip_address *member, + int iface_no); + #endif /* TOTEMRRP_H_DEFINED */ diff --git a/exec/totemsrp.c b/exec/totemsrp.c index 9d15ade4..f7a66383 100644 --- a/exec/totemsrp.c +++ b/exec/totemsrp.c @@ -4352,6 +4352,7 @@ void main_iface_change_fn ( unsigned int iface_no) { struct totemsrp_instance *instance = context; + int i; totemip_copy (&instance->my_id.addr[iface_no], iface_addr); assert (instance->my_id.addr[iface_no].nodeid); @@ -4365,6 +4366,12 @@ void main_iface_change_fn ( "Created or loaded sequence id %lld.%s for this ring.\n", instance->my_ring_id.seq, totemip_print (&instance->my_ring_id.rep)); + for (i = 0; i < instance->totem_config->interfaces[iface_no].member_count; i++) { + totemsrp_member_add (instance, + &instance->totem_config->interfaces[iface_no].member_list[i], + iface_no); + + } if (instance->totemsrp_service_ready_fn) { instance->totemsrp_service_ready_fn (); } @@ -4387,3 +4394,29 @@ void totemsrp_service_ready_register ( instance->totemsrp_service_ready_fn = totem_service_ready; } + +int totemsrp_member_add ( + void *context, + const struct totem_ip_address *member, + int ring_no) +{ + struct totemsrp_instance *instance = (struct totemsrp_instance *)context; + int res; + + res = totemrrp_member_add (instance->totemrrp_context, member, ring_no); + + return (res); +} + +int totemsrp_member_remove ( + void *context, + const struct totem_ip_address *member, + int ring_no) +{ + struct totemsrp_instance *instance = (struct totemsrp_instance *)context; + int res; + + res = totemrrp_member_remove (instance->totemrrp_context, member, ring_no); + + return (res); +} diff --git a/exec/totemsrp.h b/exec/totemsrp.h index 37ef5887..55f7ce28 100644 --- a/exec/totemsrp.h +++ b/exec/totemsrp.h @@ -120,4 +120,14 @@ void totemsrp_service_ready_register ( void *srp_context, void (*totem_service_ready) (void)); +extern int totemsrp_member_add ( + void *srp_context, + const struct totem_ip_address *member, + int ring_no); + +extern int totemsrp_member_remove ( + void *srp_context, + const struct totem_ip_address *member, + int ring_no); + #endif /* TOTEMSRP_H_DEFINED */ diff --git a/exec/totemudpu.c b/exec/totemudpu.c new file mode 100644 index 00000000..e8eed92d --- /dev/null +++ b/exec/totemudpu.c @@ -0,0 +1,1720 @@ +/* + * Copyright (c) 2005 MontaVista Software, Inc. + * Copyright (c) 2006-2009 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Steven Dake (sdake@redhat.com) + + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#define LOGSYS_UTILS_ONLY 1 +#include +#include "totemudpu.h" + +#include "crypto.h" + +#ifdef HAVE_LIBNSS +#include +#include +#include +#include +#endif + +#ifndef MSG_NOSIGNAL +#define MSG_NOSIGNAL 0 +#endif + +#define MCAST_SOCKET_BUFFER_SIZE (TRANSMITS_ALLOWED * FRAME_SIZE_MAX) +#define NETIF_STATE_REPORT_UP 1 +#define NETIF_STATE_REPORT_DOWN 2 + +#define BIND_STATE_UNBOUND 0 +#define BIND_STATE_REGULAR 1 +#define BIND_STATE_LOOPBACK 2 + +#define HMAC_HASH_SIZE 20 +struct security_header { + unsigned char hash_digest[HMAC_HASH_SIZE]; /* The hash *MUST* be first in the data structure */ + unsigned char salt[16]; /* random number */ + char msg[0]; +} __attribute__((packed)); + +struct totemudpu_member { + struct list_head list; + struct totem_ip_address member; + int fd; +}; + +struct totemudpu_instance { + hmac_state totemudpu_hmac_state; + + prng_state totemudpu_prng_state; + +#ifdef HAVE_LIBNSS + PK11SymKey *nss_sym_key; + PK11SymKey *nss_sym_key_sign; +#endif + + unsigned char totemudpu_private_key[1024]; + + unsigned int totemudpu_private_key_len; + + hdb_handle_t totemudpu_poll_handle; + + struct totem_interface *totem_interface; + + int netif_state_report; + + int netif_bind_state; + + void *context; + + void (*totemudpu_deliver_fn) ( + void *context, + const void *msg, + unsigned int msg_len); + + void (*totemudpu_iface_change_fn) ( + void *context, + const struct totem_ip_address *iface_address); + + void (*totemudpu_target_set_completed) (void *context); + + /* + * Function and data used to log messages + */ + int totemudpu_log_level_security; + + int totemudpu_log_level_error; + + int totemudpu_log_level_warning; + + int totemudpu_log_level_notice; + + int totemudpu_log_level_debug; + + int totemudpu_subsys_id; + + void (*totemudpu_log_printf) ( + unsigned int rec_ident, + const char *function, + const char *file, + int line, + const char *format, + ...)__attribute__((format(printf, 5, 6))); + + void *udpu_context; + + char iov_buffer[FRAME_SIZE_MAX]; + + char iov_buffer_flush[FRAME_SIZE_MAX]; + + struct iovec totemudpu_iov_recv; + + struct iovec totemudpu_iov_recv_flush; + + struct list_head member_list; + + int stats_sent; + + int stats_recv; + + int stats_delv; + + int stats_remcasts; + + int stats_orf_token; + + struct timeval stats_tv_start; + + struct totem_ip_address my_id; + + int firstrun; + + poll_timer_handle timer_netif_check_timeout; + + unsigned int my_memb_entries; + + int flushing; + + struct totem_config *totem_config; + + struct totem_ip_address token_target; + + int token_socket; +}; + +struct work_item { + const void *msg; + unsigned int msg_len; + struct totemudpu_instance *instance; +}; + +static int totemudpu_build_sockets ( + struct totemudpu_instance *instance, + struct totem_ip_address *bindnet_address, + struct totem_ip_address *bound_to); + +static struct totem_ip_address localhost; + +static void totemudpu_instance_initialize (struct totemudpu_instance *instance) +{ + memset (instance, 0, sizeof (struct totemudpu_instance)); + + instance->netif_state_report = NETIF_STATE_REPORT_UP | NETIF_STATE_REPORT_DOWN; + + instance->totemudpu_iov_recv.iov_base = instance->iov_buffer; + + instance->totemudpu_iov_recv.iov_len = FRAME_SIZE_MAX; //sizeof (instance->iov_buffer); + instance->totemudpu_iov_recv_flush.iov_base = instance->iov_buffer_flush; + + instance->totemudpu_iov_recv_flush.iov_len = FRAME_SIZE_MAX; //sizeof (instance->iov_buffer); + + /* + * There is always atleast 1 processor + */ + instance->my_memb_entries = 1; + + list_init (&instance->member_list); +} + +#define log_printf(level, format, args...) \ +do { \ + instance->totemudpu_log_printf ( \ + LOGSYS_ENCODE_RECID(level, \ + instance->totemudpu_subsys_id, \ + LOGSYS_RECID_LOG), \ + __FUNCTION__, __FILE__, __LINE__, \ + (const char *)format, ##args); \ +} while (0); + + +static int authenticate_and_decrypt_sober ( + struct totemudpu_instance *instance, + struct iovec *iov, + unsigned int iov_len) +{ + unsigned char keys[48]; + struct security_header *header = (struct security_header *)iov[0].iov_base; + prng_state keygen_prng_state; + prng_state stream_prng_state; + unsigned char *hmac_key = &keys[32]; + unsigned char *cipher_key = &keys[16]; + unsigned char *initial_vector = &keys[0]; + unsigned char digest_comparison[HMAC_HASH_SIZE]; + unsigned long len; + + /* + * Generate MAC, CIPHER, IV keys from private key + */ + memset (keys, 0, sizeof (keys)); + sober128_start (&keygen_prng_state); + sober128_add_entropy (instance->totemudpu_private_key, + instance->totemudpu_private_key_len, &keygen_prng_state); + sober128_add_entropy (header->salt, sizeof (header->salt), &keygen_prng_state); + + sober128_read (keys, sizeof (keys), &keygen_prng_state); + + /* + * Setup stream cipher + */ + sober128_start (&stream_prng_state); + sober128_add_entropy (cipher_key, 16, &stream_prng_state); + sober128_add_entropy (initial_vector, 16, &stream_prng_state); + + /* + * Authenticate contents of message + */ + hmac_init (&instance->totemudpu_hmac_state, DIGEST_SHA1, hmac_key, 16); + + hmac_process (&instance->totemudpu_hmac_state, + (unsigned char *)iov->iov_base + HMAC_HASH_SIZE, + iov->iov_len - HMAC_HASH_SIZE); + + len = hash_descriptor[DIGEST_SHA1]->hashsize; + assert (HMAC_HASH_SIZE >= len); + hmac_done (&instance->totemudpu_hmac_state, digest_comparison, &len); + + if (memcmp (digest_comparison, header->hash_digest, len) != 0) { + return (-1); + } + + /* + * Decrypt the contents of the message with the cipher key + */ + sober128_read ((unsigned char*)iov->iov_base + + sizeof (struct security_header), + iov->iov_len - sizeof (struct security_header), + &stream_prng_state); + + return (0); +} + +static void init_sober_crypto( + struct totemudpu_instance *instance) +{ + log_printf(instance->totemudpu_log_level_notice, + "Initializing transmit/receive security: libtomcrypt SOBER128/SHA1HMAC (mode 0).\n"); + rng_make_prng (128, PRNG_SOBER, &instance->totemudpu_prng_state, NULL); +} + +#ifdef HAVE_LIBNSS + +static unsigned char *copy_from_iovec( + const struct iovec *iov, + unsigned int iov_len, + size_t *buf_size) +{ + int i; + size_t bufptr; + size_t buflen = 0; + unsigned char *newbuf; + + for (i=0; i buf_size) { + copylen = buf_size - bufptr; + } + memcpy(iov[i].iov_base, buf+bufptr, copylen); + bufptr += copylen; + if (iov[i].iov_len != copylen) { + iov[i].iov_len = copylen; + return; + } + } +} + +static void init_nss_crypto( + struct totemudpu_instance *instance) +{ + PK11SlotInfo* aes_slot = NULL; + PK11SlotInfo* sha1_slot = NULL; + SECItem key_item; + SECStatus rv; + + log_printf(instance->totemudpu_log_level_notice, + "Initializing transmit/receive security: NSS AES128CBC/SHA1HMAC (mode 1).\n"); + rv = NSS_NoDB_Init("."); + if (rv != SECSuccess) + { + log_printf(instance->totemudpu_log_level_security, "NSS initialization failed (err %d)\n", + PR_GetError()); + goto out; + } + + aes_slot = PK11_GetBestSlot(instance->totem_config->crypto_crypt_type, NULL); + if (aes_slot == NULL) + { + log_printf(instance->totemudpu_log_level_security, "Unable to find security slot (err %d)\n", + PR_GetError()); + goto out; + } + + sha1_slot = PK11_GetBestSlot(CKM_SHA_1_HMAC, NULL); + if (sha1_slot == NULL) + { + log_printf(instance->totemudpu_log_level_security, "Unable to find security slot (err %d)\n", + PR_GetError()); + goto out; + } + /* + * Make the private key into a SymKey that we can use + */ + key_item.type = siBuffer; + key_item.data = instance->totem_config->private_key; + key_item.len = 32; /* Use 128 bits */ + + instance->nss_sym_key = PK11_ImportSymKey(aes_slot, + instance->totem_config->crypto_crypt_type, + PK11_OriginUnwrap, CKA_ENCRYPT|CKA_DECRYPT, + &key_item, NULL); + if (instance->nss_sym_key == NULL) + { + log_printf(instance->totemudpu_log_level_security, "Failure to import key into NSS (err %d)\n", + PR_GetError()); + goto out; + } + + instance->nss_sym_key_sign = PK11_ImportSymKey(sha1_slot, + CKM_SHA_1_HMAC, + PK11_OriginUnwrap, CKA_SIGN, + &key_item, NULL); + if (instance->nss_sym_key_sign == NULL) { + log_printf(instance->totemudpu_log_level_security, "Failure to import key into NSS (err %d)\n", + PR_GetError()); + goto out; + } +out: + return; +} + +static int encrypt_and_sign_nss ( + struct totemudpu_instance *instance, + unsigned char *buf, + size_t *buf_len, + const struct iovec *iovec, + unsigned int iov_len) +{ + PK11Context* enc_context = NULL; + SECStatus rv1, rv2; + int tmp1_outlen; + unsigned int tmp2_outlen; + unsigned char *inbuf; + unsigned char *data; + unsigned char *outdata; + size_t datalen; + SECItem no_params; + SECItem iv_item; + struct security_header *header; + SECItem *nss_sec_param; + unsigned char nss_iv_data[16]; + SECStatus rv; + + no_params.type = siBuffer; + no_params.data = 0; + no_params.len = 0; + + tmp1_outlen = tmp2_outlen = 0; + inbuf = copy_from_iovec(iovec, iov_len, &datalen); + if (!inbuf) { + log_printf(instance->totemudpu_log_level_security, "malloc error copying buffer from iovec\n"); + return -1; + } + + data = inbuf + sizeof (struct security_header); + datalen -= sizeof (struct security_header); + + outdata = buf + sizeof (struct security_header); + header = (struct security_header *)buf; + + rv = PK11_GenerateRandom ( + nss_iv_data, + sizeof (nss_iv_data)); + if (rv != SECSuccess) { + log_printf(instance->totemudpu_log_level_security, + "Failure to generate a random number %d\n", + PR_GetError()); + } + + memcpy(header->salt, nss_iv_data, sizeof(nss_iv_data)); + iv_item.type = siBuffer; + iv_item.data = nss_iv_data; + iv_item.len = sizeof (nss_iv_data); + + nss_sec_param = PK11_ParamFromIV ( + instance->totem_config->crypto_crypt_type, + &iv_item); + if (nss_sec_param == NULL) { + log_printf(instance->totemudpu_log_level_security, + "Failure to set up PKCS11 param (err %d)\n", + PR_GetError()); + free (inbuf); + return (-1); + } + + /* + * Create cipher context for encryption + */ + enc_context = PK11_CreateContextBySymKey ( + instance->totem_config->crypto_crypt_type, + CKA_ENCRYPT, + instance->nss_sym_key, + nss_sec_param); + if (!enc_context) { + char err[1024]; + PR_GetErrorText(err); + err[PR_GetErrorTextLength()] = 0; + log_printf(instance->totemudpu_log_level_security, + "PK11_CreateContext failed (encrypt) crypt_type=%d (err %d): %s\n", + instance->totem_config->crypto_crypt_type, + PR_GetError(), err); + free(inbuf); + return -1; + } + rv1 = PK11_CipherOp(enc_context, outdata, + &tmp1_outlen, FRAME_SIZE_MAX - sizeof(struct security_header), + data, datalen); + rv2 = PK11_DigestFinal(enc_context, outdata + tmp1_outlen, &tmp2_outlen, + FRAME_SIZE_MAX - tmp1_outlen); + PK11_DestroyContext(enc_context, PR_TRUE); + + *buf_len = tmp1_outlen + tmp2_outlen; + free(inbuf); +// memcpy(&outdata[*buf_len], nss_iv_data, sizeof(nss_iv_data)); + + if (rv1 != SECSuccess || rv2 != SECSuccess) + goto out; + + /* Now do the digest */ + enc_context = PK11_CreateContextBySymKey(CKM_SHA_1_HMAC, + CKA_SIGN, instance->nss_sym_key_sign, &no_params); + if (!enc_context) { + char err[1024]; + PR_GetErrorText(err); + err[PR_GetErrorTextLength()] = 0; + log_printf(instance->totemudpu_log_level_security, "encrypt: PK11_CreateContext failed (digest) err %d: %s\n", + PR_GetError(), err); + return -1; + } + + + PK11_DigestBegin(enc_context); + + rv1 = PK11_DigestOp(enc_context, outdata - 16, *buf_len + 16); + rv2 = PK11_DigestFinal(enc_context, header->hash_digest, &tmp2_outlen, sizeof(header->hash_digest)); + + PK11_DestroyContext(enc_context, PR_TRUE); + + if (rv1 != SECSuccess || rv2 != SECSuccess) + goto out; + + + *buf_len = *buf_len + sizeof(struct security_header); + SECITEM_FreeItem(nss_sec_param, PR_TRUE); + return 0; + +out: + return -1; +} + + +static int authenticate_and_decrypt_nss ( + struct totemudpu_instance *instance, + struct iovec *iov, + unsigned int iov_len) +{ + PK11Context* enc_context = NULL; + SECStatus rv1, rv2; + int tmp1_outlen; + unsigned int tmp2_outlen; + unsigned char outbuf[FRAME_SIZE_MAX]; + unsigned char digest[HMAC_HASH_SIZE]; + unsigned char *outdata; + int result_len; + unsigned char *data; + unsigned char *inbuf; + size_t datalen; + struct security_header *header = (struct security_header *)iov[0].iov_base; + SECItem no_params; + SECItem ivdata; + + no_params.type = siBuffer; + no_params.data = 0; + no_params.len = 0; + + tmp1_outlen = tmp2_outlen = 0; + if (iov_len > 1) { + inbuf = copy_from_iovec(iov, iov_len, &datalen); + if (!inbuf) { + log_printf(instance->totemudpu_log_level_security, "malloc error copying buffer from iovec\n"); + return -1; + } + } + else { + inbuf = (unsigned char *)iov[0].iov_base; + datalen = iov[0].iov_len; + } + data = inbuf + sizeof (struct security_header) - 16; + datalen = datalen - sizeof (struct security_header) + 16; + + outdata = outbuf + sizeof (struct security_header); + + /* Check the digest */ + enc_context = PK11_CreateContextBySymKey ( + CKM_SHA_1_HMAC, CKA_SIGN, + instance->nss_sym_key_sign, + &no_params); + if (!enc_context) { + char err[1024]; + PR_GetErrorText(err); + err[PR_GetErrorTextLength()] = 0; + log_printf(instance->totemudpu_log_level_security, "PK11_CreateContext failed (check digest) err %d: %s\n", + PR_GetError(), err); + free (inbuf); + return -1; + } + + PK11_DigestBegin(enc_context); + + rv1 = PK11_DigestOp(enc_context, data, datalen); + rv2 = PK11_DigestFinal(enc_context, digest, &tmp2_outlen, sizeof(digest)); + + PK11_DestroyContext(enc_context, PR_TRUE); + + if (rv1 != SECSuccess || rv2 != SECSuccess) { + log_printf(instance->totemudpu_log_level_security, "Digest check failed\n"); + return -1; + } + + if (memcmp(digest, header->hash_digest, tmp2_outlen) != 0) { + log_printf(instance->totemudpu_log_level_error, "Digest does not match\n"); + return -1; + } + + /* + * Get rid of salt + */ + data += 16; + datalen -= 16; + + /* Create cipher context for decryption */ + ivdata.type = siBuffer; + ivdata.data = header->salt; + ivdata.len = sizeof(header->salt); + + enc_context = PK11_CreateContextBySymKey( + instance->totem_config->crypto_crypt_type, + CKA_DECRYPT, + instance->nss_sym_key, &ivdata); + if (!enc_context) { + log_printf(instance->totemudpu_log_level_security, + "PK11_CreateContext (decrypt) failed (err %d)\n", + PR_GetError()); + return -1; + } + + rv1 = PK11_CipherOp(enc_context, outdata, &tmp1_outlen, + sizeof(outbuf) - sizeof (struct security_header), + data, datalen); + if (rv1 != SECSuccess) { + log_printf(instance->totemudpu_log_level_security, + "PK11_CipherOp (decrypt) failed (err %d)\n", + PR_GetError()); + } + rv2 = PK11_DigestFinal(enc_context, outdata + tmp1_outlen, &tmp2_outlen, + sizeof(outbuf) - tmp1_outlen); + PK11_DestroyContext(enc_context, PR_TRUE); + result_len = tmp1_outlen + tmp2_outlen + sizeof (struct security_header); + + /* Copy it back to the buffer */ + copy_to_iovec(iov, iov_len, outbuf, result_len); + if (iov_len > 1) + free(inbuf); + + if (rv1 != SECSuccess || rv2 != SECSuccess) + return -1; + + return 0; +} +#endif + +static int encrypt_and_sign_sober ( + struct totemudpu_instance *instance, + unsigned char *buf, + size_t *buf_len, + const struct iovec *iovec, + unsigned int iov_len) +{ + int i; + unsigned char *addr; + unsigned char keys[48]; + struct security_header *header; + unsigned char *hmac_key = &keys[32]; + unsigned char *cipher_key = &keys[16]; + unsigned char *initial_vector = &keys[0]; + unsigned long len; + size_t outlen = 0; + hmac_state hmac_st; + prng_state keygen_prng_state; + prng_state stream_prng_state; + prng_state *prng_state_in = &instance->totemudpu_prng_state; + + header = (struct security_header *)buf; + addr = buf + sizeof (struct security_header); + + memset (keys, 0, sizeof (keys)); + memset (header->salt, 0, sizeof (header->salt)); + + /* + * Generate MAC, CIPHER, IV keys from private key + */ + sober128_read (header->salt, sizeof (header->salt), prng_state_in); + sober128_start (&keygen_prng_state); + sober128_add_entropy (instance->totemudpu_private_key, + instance->totemudpu_private_key_len, + &keygen_prng_state); + sober128_add_entropy (header->salt, sizeof (header->salt), + &keygen_prng_state); + + sober128_read (keys, sizeof (keys), &keygen_prng_state); + + /* + * Setup stream cipher + */ + sober128_start (&stream_prng_state); + sober128_add_entropy (cipher_key, 16, &stream_prng_state); + sober128_add_entropy (initial_vector, 16, &stream_prng_state); + + outlen = sizeof (struct security_header); + /* + * Copy remainder of message, then encrypt it + */ + for (i = 1; i < iov_len; i++) { + memcpy (addr, iovec[i].iov_base, iovec[i].iov_len); + addr += iovec[i].iov_len; + outlen += iovec[i].iov_len; + } + + /* + * Encrypt message by XORing stream cipher data + */ + sober128_read (buf + sizeof (struct security_header), + outlen - sizeof (struct security_header), + &stream_prng_state); + + memset (&hmac_st, 0, sizeof (hmac_st)); + + /* + * Sign the contents of the message with the hmac key and store signature in message + */ + hmac_init (&hmac_st, DIGEST_SHA1, hmac_key, 16); + + hmac_process (&hmac_st, + buf + HMAC_HASH_SIZE, + outlen - HMAC_HASH_SIZE); + + len = hash_descriptor[DIGEST_SHA1]->hashsize; + + hmac_done (&hmac_st, header->hash_digest, &len); + + *buf_len = outlen; + + return 0; +} + +static int encrypt_and_sign_worker ( + struct totemudpu_instance *instance, + unsigned char *buf, + size_t *buf_len, + const struct iovec *iovec, + unsigned int iov_len) +{ + if (instance->totem_config->crypto_type == TOTEM_CRYPTO_SOBER || + instance->totem_config->crypto_accept == TOTEM_CRYPTO_ACCEPT_OLD) + return encrypt_and_sign_sober(instance, buf, buf_len, iovec, iov_len); +#ifdef HAVE_LIBNSS + if (instance->totem_config->crypto_type == TOTEM_CRYPTO_NSS) + return encrypt_and_sign_nss(instance, buf, buf_len, iovec, iov_len); +#endif + return -1; +} + +static int authenticate_and_decrypt ( + struct totemudpu_instance *instance, + struct iovec *iov, + unsigned int iov_len) +{ + unsigned char type; + unsigned char *endbuf = (unsigned char *)iov[iov_len-1].iov_base; + int res = -1; + + /* + * Get the encryption type and remove it from the buffer + */ + type = endbuf[iov[iov_len-1].iov_len-1]; + iov[iov_len-1].iov_len -= 1; + + if (type == TOTEM_CRYPTO_SOBER) + res = authenticate_and_decrypt_sober(instance, iov, iov_len); + + /* + * Only try higher crypto options if NEW has been requested + */ + if (instance->totem_config->crypto_accept == TOTEM_CRYPTO_ACCEPT_NEW) { +#ifdef HAVE_LIBNSS + if (type == TOTEM_CRYPTO_NSS) + res = authenticate_and_decrypt_nss(instance, iov, iov_len); +#endif + } + + /* + * If it failed, then try decrypting the whole packet as it might be + * from aisexec + */ + if (res == -1) { + iov[iov_len-1].iov_len += 1; + res = authenticate_and_decrypt_sober(instance, iov, iov_len); + } + + return res; +} + +static void init_crypto( + struct totemudpu_instance *instance) +{ + /* + * If we are expecting NEW crypto type then initialise all available + * crypto options. For OLD then we only need SOBER128. + */ + + init_sober_crypto(instance); + + if (instance->totem_config->crypto_accept == TOTEM_CRYPTO_ACCEPT_OLD) + return; + +#ifdef HAVE_LIBNSS + init_nss_crypto(instance); +#endif +} + +int totemudpu_crypto_set ( + void *udpu_context, + unsigned int type) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + int res = 0; + + /* + * Can't set crypto type if OLD is selected + */ + if (instance->totem_config->crypto_accept == TOTEM_CRYPTO_ACCEPT_OLD) { + res = -1; + } else { + /* + * Validate crypto algorithm + */ + switch (type) { + case TOTEM_CRYPTO_SOBER: + log_printf(instance->totemudpu_log_level_security, + "Transmit security set to: libtomcrypt SOBER128/SHA1HMAC (mode 0)"); + break; + case TOTEM_CRYPTO_NSS: + log_printf(instance->totemudpu_log_level_security, + "Transmit security set to: NSS AES128CBC/SHA1HMAC (mode 1)"); + break; + default: + res = -1; + break; + } + } + + return (res); +} + + +static inline void ucast_sendmsg ( + struct totemudpu_instance *instance, + struct totem_ip_address *system_to, + const void *msg, + unsigned int msg_len) +{ + struct msghdr msg_ucast; + int res = 0; + size_t buf_len; + unsigned char sheader[sizeof (struct security_header)]; + unsigned char encrypt_data[FRAME_SIZE_MAX]; + struct iovec iovec_encrypt[2]; + const struct iovec *iovec_sendmsg; + struct sockaddr_storage sockaddr; + struct iovec iovec; + unsigned int iov_len; + int addrlen; + + if (instance->totem_config->secauth == 1) { + iovec_encrypt[0].iov_base = (void *)sheader; + iovec_encrypt[0].iov_len = sizeof (struct security_header); + iovec_encrypt[1].iov_base = (void *)msg; + iovec_encrypt[1].iov_len = msg_len; + + /* + * Encrypt and digest the message + */ + encrypt_and_sign_worker ( + instance, + encrypt_data, + &buf_len, + iovec_encrypt, + 2); + + if (instance->totem_config->crypto_accept == TOTEM_CRYPTO_ACCEPT_NEW) { + encrypt_data[buf_len++] = instance->totem_config->crypto_type; + } + else { + encrypt_data[buf_len++] = 0; + } + + iovec_encrypt[0].iov_base = (void *)encrypt_data; + iovec_encrypt[0].iov_len = buf_len; + iovec_sendmsg = &iovec_encrypt[0]; + iov_len = 1; + } else { + iovec.iov_base = (void *)msg; + iovec.iov_len = msg_len; + iovec_sendmsg = &iovec; + iov_len = 1; + } + + /* + * Build unicast message + */ + totemip_totemip_to_sockaddr_convert(system_to, + instance->totem_interface->ip_port, &sockaddr, &addrlen); + msg_ucast.msg_name = &sockaddr; + msg_ucast.msg_namelen = addrlen; + msg_ucast.msg_iov = (void *) iovec_sendmsg; + msg_ucast.msg_iovlen = iov_len; +#if !defined(COROSYNC_SOLARIS) + msg_ucast.msg_control = 0; + msg_ucast.msg_controllen = 0; + msg_ucast.msg_flags = 0; +#else + msg_ucast.msg_accrights = NULL; + msg_ucast.msg_accrightslen = 0; +#endif + + + /* + * Transmit unicast message + * An error here is recovered by totemsrp + */ + res = sendmsg (instance->token_socket, &msg_ucast, MSG_NOSIGNAL); + if (res < 0) { + char error_str[100]; + strerror_r (errno, error_str, sizeof(error_str)); + log_printf (instance->totemudpu_log_level_debug, + "sendmsg(ucast) failed (non-critical): %s\n", error_str); + } +} + +static inline void mcast_sendmsg ( + struct totemudpu_instance *instance, + const void *msg, + unsigned int msg_len) +{ + struct msghdr msg_mcast; + int res = 0; + size_t buf_len; + unsigned char sheader[sizeof (struct security_header)]; + unsigned char encrypt_data[FRAME_SIZE_MAX]; + struct iovec iovec_encrypt[2]; + struct iovec iovec; + const struct iovec *iovec_sendmsg; + struct sockaddr_storage sockaddr; + unsigned int iov_len; + int addrlen; + struct list_head *list; + struct totemudpu_member *member; + + if (instance->totem_config->secauth == 1) { + iovec_encrypt[0].iov_base = (void *)sheader; + iovec_encrypt[0].iov_len = sizeof (struct security_header); + iovec_encrypt[1].iov_base = (void *)msg; + iovec_encrypt[1].iov_len = msg_len; + + /* + * Encrypt and digest the message + */ + encrypt_and_sign_worker ( + instance, + encrypt_data, + &buf_len, + iovec_encrypt, + 2); + + if (instance->totem_config->crypto_accept == TOTEM_CRYPTO_ACCEPT_NEW) { + encrypt_data[buf_len++] = instance->totem_config->crypto_type; + } + else { + encrypt_data[buf_len++] = 0; + } + + iovec_encrypt[0].iov_base = (void *)encrypt_data; + iovec_encrypt[0].iov_len = buf_len; + iovec_sendmsg = &iovec_encrypt[0]; + iov_len = 1; + } else { + iovec.iov_base = (void *)msg; + iovec.iov_len = msg_len; + + iovec_sendmsg = &iovec; + iov_len = 1; + } + + /* + * Build multicast message + */ + for (list = instance->member_list.next; + list != &instance->member_list; + list = list->next) { + + member = list_entry (list, + struct totemudpu_member, + list); + + totemip_totemip_to_sockaddr_convert(&member->member, + instance->totem_interface->ip_port, &sockaddr, &addrlen); + msg_mcast.msg_name = &sockaddr; + msg_mcast.msg_namelen = addrlen; + msg_mcast.msg_iov = (void *) iovec_sendmsg; + msg_mcast.msg_iovlen = iov_len; + #if !defined(COROSYNC_SOLARIS) + msg_mcast.msg_control = 0; + msg_mcast.msg_controllen = 0; + msg_mcast.msg_flags = 0; + #else + msg_mcast.msg_accrights = NULL; + msg_mcast.msg_accrightslen = 0; + #endif + + /* + * Transmit multicast message + * An error here is recovered by totemsrp + */ + res = sendmsg (member->fd, &msg_mcast, MSG_NOSIGNAL); + if (res < 0) { + char error_str[100]; + strerror_r (errno, error_str, sizeof(error_str)); + log_printf (instance->totemudpu_log_level_debug, + "sendmsg(mcast) failed (non-critical): %s\n", error_str); + } + } +} + +int totemudpu_finalize ( + void *udpu_context) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + int res = 0; + + if (instance->token_socket > 0) { + close (instance->token_socket); + poll_dispatch_delete (instance->totemudpu_poll_handle, + instance->token_socket); + } + + return (res); +} + +static int net_deliver_fn ( + hdb_handle_t handle, + int fd, + int revents, + void *data) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)data; + struct msghdr msg_recv; + struct iovec *iovec; + struct security_header *security_header; + struct sockaddr_storage system_from; + int bytes_received; + int res = 0; + unsigned char *msg_offset; + unsigned int size_delv; + + if (instance->flushing == 1) { + iovec = &instance->totemudpu_iov_recv_flush; + } else { + iovec = &instance->totemudpu_iov_recv; + } + + /* + * Receive datagram + */ + msg_recv.msg_name = &system_from; + msg_recv.msg_namelen = sizeof (struct sockaddr_storage); + msg_recv.msg_iov = iovec; + msg_recv.msg_iovlen = 1; +#if !defined(COROSYNC_SOLARIS) + msg_recv.msg_control = 0; + msg_recv.msg_controllen = 0; + msg_recv.msg_flags = 0; +#else + msg_recv.msg_accrights = NULL; + msg_recv.msg_accrightslen = 0; +#endif + + bytes_received = recvmsg (fd, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT); + if (bytes_received == -1) { + return (0); + } else { + instance->stats_recv += bytes_received; + } + + if ((instance->totem_config->secauth == 1) && + (bytes_received < sizeof (struct security_header))) { + + log_printf (instance->totemudpu_log_level_security, "Received message is too short... ignoring %d.\n", bytes_received); + return (0); + } + + security_header = (struct security_header *)iovec->iov_base; + + iovec->iov_len = bytes_received; + if (instance->totem_config->secauth == 1) { + /* + * Authenticate and if authenticated, decrypt datagram + */ + + res = authenticate_and_decrypt (instance, iovec, 1); + if (res == -1) { + log_printf (instance->totemudpu_log_level_security, "Received message has invalid digest... ignoring.\n"); + log_printf (instance->totemudpu_log_level_security, + "Invalid packet data\n"); + iovec->iov_len = FRAME_SIZE_MAX; + return 0; + } + msg_offset = (unsigned char *)iovec->iov_base + + sizeof (struct security_header); + size_delv = bytes_received - sizeof (struct security_header); + } else { + msg_offset = (void *)iovec->iov_base; + size_delv = bytes_received; + } + + /* + * Handle incoming message + */ + instance->totemudpu_deliver_fn ( + instance->context, + msg_offset, + size_delv); + + iovec->iov_len = FRAME_SIZE_MAX; + return (0); +} + +static int netif_determine ( + struct totemudpu_instance *instance, + struct totem_ip_address *bindnet, + struct totem_ip_address *bound_to, + int *interface_up, + int *interface_num) +{ + int res; + + res = totemip_iface_check (bindnet, bound_to, + interface_up, interface_num, + instance->totem_config->clear_node_high_bit); + + + return (res); +} + + +/* + * If the interface is up, the sockets for totem are built. If the interface is down + * this function is requeued in the timer list to retry building the sockets later. + */ +static void timer_function_netif_check_timeout ( + void *data) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)data; + int res; + int interface_up; + int interface_num; + struct totem_ip_address *bind_address; + + /* + * Build sockets for every interface + */ + netif_determine (instance, + &instance->totem_interface->bindnet, + &instance->totem_interface->boundto, + &interface_up, &interface_num); + /* + * If the network interface isn't back up and we are already + * in loopback mode, add timer to check again and return + */ + if ((instance->netif_bind_state == BIND_STATE_LOOPBACK && + interface_up == 0) || + + (instance->my_memb_entries == 1 && + instance->netif_bind_state == BIND_STATE_REGULAR && + interface_up == 1)) { + + poll_timer_add (instance->totemudpu_poll_handle, + instance->totem_config->downcheck_timeout, + (void *)instance, + timer_function_netif_check_timeout, + &instance->timer_netif_check_timeout); + + /* + * Add a timer to check for a downed regular interface + */ + return; + } + + if (instance->token_socket > 0) { + close (instance->token_socket); + poll_dispatch_delete (instance->totemudpu_poll_handle, + instance->token_socket); + } + + if (interface_up == 0) { + /* + * Interface is not up + */ + instance->netif_bind_state = BIND_STATE_LOOPBACK; + bind_address = &localhost; + + /* + * Add a timer to retry building interfaces and request memb_gather_enter + */ + poll_timer_add (instance->totemudpu_poll_handle, + instance->totem_config->downcheck_timeout, + (void *)instance, + timer_function_netif_check_timeout, + &instance->timer_netif_check_timeout); + } else { + /* + * Interface is up + */ + instance->netif_bind_state = BIND_STATE_REGULAR; + bind_address = &instance->totem_interface->bindnet; + } + /* + * Create and bind the multicast and unicast sockets + */ + res = totemudpu_build_sockets (instance, + bind_address, + &instance->totem_interface->boundto); + + poll_dispatch_add ( + instance->totemudpu_poll_handle, + instance->token_socket, + POLLIN, instance, net_deliver_fn); + + totemip_copy (&instance->my_id, &instance->totem_interface->boundto); + + /* + * This reports changes in the interface to the user and totemsrp + */ + if (instance->netif_bind_state == BIND_STATE_REGULAR) { + if (instance->netif_state_report & NETIF_STATE_REPORT_UP) { + log_printf (instance->totemudpu_log_level_notice, + "The network interface [%s] is now up.\n", + totemip_print (&instance->totem_interface->boundto)); + instance->netif_state_report = NETIF_STATE_REPORT_DOWN; + instance->totemudpu_iface_change_fn (instance->context, &instance->my_id); + } + /* + * Add a timer to check for interface going down in single membership + */ + if (instance->my_memb_entries == 1) { + poll_timer_add (instance->totemudpu_poll_handle, + instance->totem_config->downcheck_timeout, + (void *)instance, + timer_function_netif_check_timeout, + &instance->timer_netif_check_timeout); + } + + } else { + if (instance->netif_state_report & NETIF_STATE_REPORT_DOWN) { + log_printf (instance->totemudpu_log_level_notice, + "The network interface is down.\n"); + instance->totemudpu_iface_change_fn (instance->context, &instance->my_id); + } + instance->netif_state_report = NETIF_STATE_REPORT_UP; + + } +} + +/* Set the socket priority to INTERACTIVE to ensure + that our messages don't get queued behind anything else */ +static void totemudpu_traffic_control_set(struct totemudpu_instance *instance, int sock) +{ +#ifdef SO_PRIORITY + int prio = 6; /* TC_PRIO_INTERACTIVE */ + char error_str[100]; + + if (setsockopt(sock, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(int))) { + strerror_r (errno, error_str, 100); + log_printf (instance->totemudpu_log_level_warning, + "Could not set traffic priority. (%s)\n", error_str); + } +#endif +} + +static int totemudpu_build_sockets_ip ( + struct totemudpu_instance *instance, + struct totem_ip_address *bindnet_address, + struct totem_ip_address *bound_to, + int interface_num) +{ + struct sockaddr_storage sockaddr; + int addrlen; + int res; + + /* + * Setup unicast socket + */ + instance->token_socket = socket (bindnet_address->family, SOCK_DGRAM, 0); + if (instance->token_socket == -1) { + perror ("socket2"); + return (-1); + } + + totemip_nosigpipe (instance->token_socket); + res = fcntl (instance->token_socket, F_SETFL, O_NONBLOCK); + if (res == -1) { + char error_str[100]; + strerror_r (errno, error_str, 100); + log_printf (instance->totemudpu_log_level_warning, + "Could not set non-blocking operation on token socket: %s\n", error_str); + return (-1); + } + + /* + * Force reuse + */ +// flag = 1; +// if ( setsockopt(instance->token_socket, SOL_SOCKET, SO_REUSEADDR, (char *)&flag, sizeof (flag)) < 0) { +// perror("setsockopt reuseaddr"); +// return (-1); +// } + + /* + * Bind to unicast socket used for token send/receives + * This has the side effect of binding to the correct interface + */ + totemip_totemip_to_sockaddr_convert(bound_to, instance->totem_interface->ip_port, &sockaddr, &addrlen); + res = bind (instance->token_socket, (struct sockaddr *)&sockaddr, addrlen); + if (res == -1) { + perror ("bind token socket failed"); + return (-1); + } + + return 0; +} + +static int totemudpu_build_sockets ( + struct totemudpu_instance *instance, + struct totem_ip_address *bindnet_address, + struct totem_ip_address *bound_to) +{ + int interface_num; + int interface_up; + int res; + + /* + * Determine the ip address bound to and the interface name + */ + res = netif_determine (instance, + bindnet_address, + bound_to, + &interface_up, + &interface_num); + + if (res == -1) { + return (-1); + } + + totemip_copy(&instance->my_id, bound_to); + + res = totemudpu_build_sockets_ip (instance, + bindnet_address, bound_to, interface_num); + + /* We only send out of the token socket */ + totemudpu_traffic_control_set(instance, instance->token_socket); + return res; +} + +/* + * Totem Network interface - also does encryption/decryption + * depends on poll abstraction, POSIX, IPV4 + */ + +/* + * Create an instance + */ +int totemudpu_initialize ( + hdb_handle_t poll_handle, + void **udpu_context, + struct totem_config *totem_config, + int interface_no, + void *context, + + void (*deliver_fn) ( + void *context, + const void *msg, + unsigned int msg_len), + + void (*iface_change_fn) ( + void *context, + const struct totem_ip_address *iface_address), + + void (*target_set_completed) ( + void *context)) +{ + struct totemudpu_instance *instance; + + instance = malloc (sizeof (struct totemudpu_instance)); + if (instance == NULL) { + return (-1); + } + + totemudpu_instance_initialize (instance); + + instance->totem_config = totem_config; + /* + * Configure logging + */ + instance->totemudpu_log_level_security = 1; //totem_config->totem_logging_configuration.log_level_security; + instance->totemudpu_log_level_error = totem_config->totem_logging_configuration.log_level_error; + instance->totemudpu_log_level_warning = totem_config->totem_logging_configuration.log_level_warning; + instance->totemudpu_log_level_notice = totem_config->totem_logging_configuration.log_level_notice; + instance->totemudpu_log_level_debug = totem_config->totem_logging_configuration.log_level_debug; + instance->totemudpu_subsys_id = totem_config->totem_logging_configuration.log_subsys_id; + instance->totemudpu_log_printf = totem_config->totem_logging_configuration.log_printf; + + /* + * Initialize random number generator for later use to generate salt + */ + memcpy (instance->totemudpu_private_key, totem_config->private_key, + totem_config->private_key_len); + + instance->totemudpu_private_key_len = totem_config->private_key_len; + + init_crypto(instance); + + /* + * Initialize local variables for totemudpu + */ + instance->totem_interface = &totem_config->interfaces[interface_no]; + memset (instance->iov_buffer, 0, FRAME_SIZE_MAX); + + instance->totemudpu_poll_handle = poll_handle; + + instance->totem_interface->bindnet.nodeid = instance->totem_config->node_id; + + instance->context = context; + instance->totemudpu_deliver_fn = deliver_fn; + + instance->totemudpu_iface_change_fn = iface_change_fn; + + instance->totemudpu_target_set_completed = target_set_completed; + + totemip_localhost (AF_INET, &localhost); + localhost.nodeid = instance->totem_config->node_id; + + /* + * RRP layer isn't ready to receive message because it hasn't + * initialized yet. Add short timer to check the interfaces. + */ + poll_timer_add (instance->totemudpu_poll_handle, + 100, + (void *)instance, + timer_function_netif_check_timeout, + &instance->timer_netif_check_timeout); + + *udpu_context = instance; + return (0); +} + +int totemudpu_processor_count_set ( + void *udpu_context, + int processor_count) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + int res = 0; + + instance->my_memb_entries = processor_count; + poll_timer_delete (instance->totemudpu_poll_handle, + instance->timer_netif_check_timeout); + if (processor_count == 1) { + poll_timer_add (instance->totemudpu_poll_handle, + instance->totem_config->downcheck_timeout, + (void *)instance, + timer_function_netif_check_timeout, + &instance->timer_netif_check_timeout); + } + + return (res); +} + +int totemudpu_recv_flush (void *udpu_context) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + struct pollfd ufd; + int nfds; + int res = 0; + + instance->flushing = 1; + + do { + ufd.fd = instance->token_socket; + ufd.events = POLLIN; + nfds = poll (&ufd, 1, 0); + if (nfds == 1 && ufd.revents & POLLIN) { + net_deliver_fn (0, instance->token_socket, + ufd.revents, instance); + } + } while (nfds == 1); + + instance->flushing = 0; + + return (res); +} + +int totemudpu_send_flush (void *udpu_context) +{ + int res = 0; + + return (res); +} + +int totemudpu_token_send ( + void *udpu_context, + const void *msg, + unsigned int msg_len) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + int res = 0; + + ucast_sendmsg (instance, &instance->token_target, msg, msg_len); + + return (res); +} +int totemudpu_mcast_flush_send ( + void *udpu_context, + const void *msg, + unsigned int msg_len) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + int res = 0; + + mcast_sendmsg (instance, msg, msg_len); + + return (res); +} + +int totemudpu_mcast_noflush_send ( + void *udpu_context, + const void *msg, + unsigned int msg_len) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + int res = 0; + + mcast_sendmsg (instance, msg, msg_len); + + return (res); +} + +extern int totemudpu_iface_check (void *udpu_context) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + int res = 0; + + timer_function_netif_check_timeout (instance); + + return (res); +} + +extern void totemudpu_net_mtu_adjust (void *udpu_context, struct totem_config *totem_config) +{ +#define UDPIP_HEADER_SIZE (20 + 8) /* 20 bytes for ip 8 bytes for udp */ + if (totem_config->secauth == 1) { + totem_config->net_mtu -= sizeof (struct security_header) + + UDPIP_HEADER_SIZE; + } else { + totem_config->net_mtu -= UDPIP_HEADER_SIZE; + } +} + +const char *totemudpu_iface_print (void *udpu_context) { + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + const char *ret_char; + + ret_char = totemip_print (&instance->my_id); + + return (ret_char); +} + +int totemudpu_iface_get ( + void *udpu_context, + struct totem_ip_address *addr) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + int res = 0; + + memcpy (addr, &instance->my_id, sizeof (struct totem_ip_address)); + + return (res); +} + +int totemudpu_token_target_set ( + void *udpu_context, + const struct totem_ip_address *token_target) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + int res = 0; + + memcpy (&instance->token_target, token_target, + sizeof (struct totem_ip_address)); + + instance->totemudpu_target_set_completed (instance->context); + + return (res); +} + +extern int totemudpu_recv_mcast_empty ( + void *udpu_context) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + unsigned int res; + struct sockaddr_storage system_from; + struct msghdr msg_recv; + struct pollfd ufd; + int nfds; + int msg_processed = 0; + + /* + * Receive datagram + */ + msg_recv.msg_name = &system_from; + msg_recv.msg_namelen = sizeof (struct sockaddr_storage); + msg_recv.msg_iov = &instance->totemudpu_iov_recv_flush; + msg_recv.msg_iovlen = 1; +#if !defined(COROSYNC_SOLARIS) + msg_recv.msg_control = 0; + msg_recv.msg_controllen = 0; + msg_recv.msg_flags = 0; +#else + msg_recv.msg_accrights = NULL; + msg_recv.msg_accrightslen = 0; +#endif + + do { + ufd.fd = instance->token_socket; + ufd.events = POLLIN; + nfds = poll (&ufd, 1, 0); + if (nfds == 1 && ufd.revents & POLLIN) { + res = recvmsg (instance->token_socket, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT); + if (res != -1) { + msg_processed = 1; + } else { + msg_processed = -1; + } + } + } while (nfds == 1); + + return (msg_processed); +} + +int totemudpu_member_add ( + void *udpu_context, + const struct totem_ip_address *member) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + + struct totemudpu_member *new_member; + int res; + char error_str[100]; + + new_member = malloc (sizeof (struct totemudpu_member)); + if (new_member == NULL) { + return (-1); + } + list_init (&new_member->list); + list_add_tail (&new_member->list, &instance->member_list); + memcpy (&new_member->member, member, sizeof (struct totem_ip_address)); + new_member->fd = socket (member->family, SOCK_DGRAM, 0); + if (new_member->fd == -1) { + strerror_r (errno, error_str, 100); + log_printf (instance->totemudpu_log_level_warning, + "Could not create socket for new member: %s\n", error_str); + return (-1); + } + totemip_nosigpipe (new_member->fd); + res = fcntl (new_member->fd, F_SETFL, O_NONBLOCK); + if (res == -1) { + strerror_r (errno, error_str, 100); + log_printf (instance->totemudpu_log_level_warning, + "Could not set non-blocking operation on token socket: %s\n", error_str); + return (-1); + } + return (0); +} + +int totemudpu_member_remove ( + void *udpu_context, + const struct totem_ip_address *token_target) +{ + struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context; + + instance = NULL; + return (0); +} diff --git a/exec/totemudpu.h b/exec/totemudpu.h new file mode 100644 index 00000000..2dcad248 --- /dev/null +++ b/exec/totemudpu.h @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2005 MontaVista Software, Inc. + * Copyright (c) 2006-2010 Red Hat, Inc. + * + * All rights reserved. + * + * Author: Steven Dake (sdake@redhat.com) + * + * This software licensed under BSD license, the text of which follows: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the MontaVista Software, Inc. nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef TOTEMUDPU_H_DEFINED +#define TOTEMUDPU_H_DEFINED + +#include +#include +#include + +#include + +/* + * Create an instance + */ +extern int totemudpu_initialize ( + hdb_handle_t poll_handle, + void **udpu_context, + struct totem_config *totem_config, + int interface_no, + void *context, + + void (*deliver_fn) ( + void *context, + const void *msg, + unsigned int msg_len), + + void (*iface_change_fn) ( + void *context, + const struct totem_ip_address *iface_address), + + void (*target_set_completed) ( + void *context)); + +extern int totemudpu_processor_count_set ( + void *udpu_context, + int processor_count); + +extern int totemudpu_token_send ( + void *udpu_context, + const void *msg, + unsigned int msg_len); + +extern int totemudpu_mcast_flush_send ( + void *udpu_context, + const void *msg, + unsigned int msg_len); + +extern int totemudpu_mcast_noflush_send ( + void *udpu_context, + const void *msg, + unsigned int msg_len); + +extern int totemudpu_recv_flush (void *udpu_context); + +extern int totemudpu_send_flush (void *udpu_context); + +extern int totemudpu_iface_check (void *udpu_context); + +extern int totemudpu_finalize (void *udpu_context); + +extern void totemudpu_net_mtu_adjust (void *udpu_context, struct totem_config *totem_config); + +extern const char *totemudpu_iface_print (void *udpu_context); + +extern int totemudpu_iface_get ( + void *udpu_context, + struct totem_ip_address *addr); + +extern int totemudpu_token_target_set ( + void *udpu_context, + const struct totem_ip_address *token_target); + +extern int totemudpu_crypto_set ( + void *udpu_context, + unsigned int type); + +extern int totemudpu_recv_mcast_empty ( + void *udpu_context); + +extern int totemudpu_member_add ( + void *udpu_context, + const struct totem_ip_address *member); + +extern int totemudpu_member_remove ( + void *udpu_context, + const struct totem_ip_address *member); + +#endif /* TOTEMUDPU_H_DEFINED */ diff --git a/include/corosync/totem/totem.h b/include/corosync/totem/totem.h index 80bbf5c1..b84d9ba5 100644 --- a/include/corosync/totem/totem.h +++ b/include/corosync/totem/totem.h @@ -57,6 +57,9 @@ struct totem_interface { struct totem_ip_address boundto; struct totem_ip_address mcast_addr; uint16_t ip_port; + int member_count; + struct totem_ip_address member_list[PROCESSOR_COUNT_MAX]; + }; struct totem_logging_configuration { @@ -79,6 +82,12 @@ struct totem_logging_configuration { enum { TOTEM_PRIVATE_KEY_LEN = 128 }; enum { TOTEM_RRP_MODE_BYTES = 64 }; +typedef enum { + TOTEM_TRANSPORT_UDP = 0, + TOTEM_TRANSPORT_UDPU = 1, + TOTEM_TRANSPORT_RDMA = 2 +} totem_transport_t; + struct totem_config { int version; @@ -164,7 +173,7 @@ struct totem_config { int crypto_crypt_type; int crypto_sign_type; - int transport_number; + totem_transport_t transport_number; }; #define TOTEM_CONFIGURATION_TYPE diff --git a/include/corosync/totem/totempg.h b/include/corosync/totem/totempg.h index fbf71fb3..26447aa6 100644 --- a/include/corosync/totem/totempg.h +++ b/include/corosync/totem/totempg.h @@ -160,6 +160,14 @@ extern int totempg_ring_reenable (void); extern void totempg_service_ready_register ( void (*totem_service_ready) (void)); +extern int totempg_member_add ( + const struct totem_ip_address *member, + int ring_no); + +extern int totempg_member_remove ( + const struct totem_ip_address *member, + int ring_no); + #ifdef __cplusplus } #endif diff --git a/man/corosync.conf.5 b/man/corosync.conf.5 index 5d3f7b5e..8fc3dc6f 100644 --- a/man/corosync.conf.5 +++ b/man/corosync.conf.5 @@ -1,6 +1,6 @@ .\"/* .\" * Copyright (c) 2005 MontaVista Software, Inc. -.\" * Copyright (c) 2006-2009 Red Hat, Inc. +.\" * Copyright (c) 2006-2010 Red Hat, Inc. .\" * .\" * All rights reserved. .\" * @@ -77,7 +77,8 @@ option which is required: .PP Within the .B interface -sub-directive of totem there are four parameters which are required: +sub-directive of totem there are four parameters which are required. There is +one parameter which is optional. .TP ringnumber @@ -126,6 +127,13 @@ mcastport - 1 (for mcast sends). If you have multiple clusters on the same network using the same mcastaddr please configure the mcastports with a gap. +.TP +member +This specifies a member on the interface and used with the udpu transport only. +Every node that should be a member of the membership should be specified as +a separate member directive. Within the member directive there is a parameter +memberaddr which specifies the ip address of one of the nodes. + .PP .PP Within the @@ -258,11 +266,13 @@ The default is ykd. The vsftype can also be set to none. .TP transport This directive controls the transport mechanism used. If the interface to -which corosync is binding is Infiniband, you can specify the "iba" option. Any -other option is ignored. Note Infiniband interfaces will use RDMA transport -techniques and perform at higher bandwidths and lower latency than gige networks. +which corosync is binding is an RDMA interface such as RoCEE or Infiniband, the +"iba" parameter may be specified. To avoid the use of multicast entirely, a +unicast transport parameter "udpu" can be specified. This requires specifying +the list of members that could potentially make up the membership before +deployment. -The default is udp. The transport type can also be set to iba. +The default is udp. The transport type can also be set to udpu or iba. Within the .B totem From b403fcbea96e0d5a93286487cdc58f36b792f86a Mon Sep 17 00:00:00 2001 From: Steven Dake Date: Thu, 18 Nov 2010 14:51:17 -0700 Subject: [PATCH 2/2] Remove dead soresueaddr code Signed-off-by: Steven Dake Reviewed-by: Angus Salkeld --- exec/totemudpu.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/exec/totemudpu.c b/exec/totemudpu.c index e8eed92d..dc30a125 100644 --- a/exec/totemudpu.c +++ b/exec/totemudpu.c @@ -1336,15 +1336,6 @@ static int totemudpu_build_sockets_ip ( return (-1); } - /* - * Force reuse - */ -// flag = 1; -// if ( setsockopt(instance->token_socket, SOL_SOCKET, SO_REUSEADDR, (char *)&flag, sizeof (flag)) < 0) { -// perror("setsockopt reuseaddr"); -// return (-1); -// } - /* * Bind to unicast socket used for token send/receives * This has the side effect of binding to the correct interface