From a7f4b6d8cb840d164a6e58703deceeba72bf92a4 Mon Sep 17 00:00:00 2001 From: Steven Dake Date: Tue, 24 Jun 2008 04:45:38 +0000 Subject: [PATCH] Fix a ton of bugs in totem by forward porting the whitetank totem code into trunk. git-svn-id: http://svn.fedorahosted.org/svn/corosync/trunk@1562 fd59a12c-fef9-0310-b244-a6a79926bd2f --- exec/totem.h | 23 +--------- exec/totemconfig.c | 12 ++--- exec/totemconfig.h | 1 + exec/totemip.c | 77 ++++++++------------------------ exec/totemip.h | 27 ++--------- exec/totemmrp.c | 1 - exec/totemnet.c | 62 +++++++------------------- exec/totemnet.h | 2 +- exec/totempg.c | 109 ++++++++++++++++++++------------------------- exec/totemrrp.c | 2 +- exec/totemrrp.h | 1 + exec/totemsrp.c | 92 +++++++++++++++++++------------------- 12 files changed, 143 insertions(+), 266 deletions(-) diff --git a/exec/totem.h b/exec/totem.h index 583a1e84..364741bf 100644 --- a/exec/totem.h +++ b/exec/totem.h @@ -1,6 +1,6 @@ /* * Copyright (c) 2005 MontaVista Software, Inc. - * Copyright (c) 2006 Red Hat, Inc. + * Copyright (c) 2006-2007 Red Hat, Inc. * Copyright (c) 2006 Sun Microsystems, Inc. * * Author: Steven Dake (sdake@redhat.com) @@ -164,26 +164,5 @@ struct memb_ring_id { unsigned long long seq; } __attribute__((packed)); -typedef struct memb_ring_id memb_ring_id_t; - -static inline void swab_memb_ring_id_t (memb_ring_id_t *to_swab) -{ - swab_totem_ip_address_t (&to_swab->rep); - to_swab->seq = swab64 (to_swab->seq); -} - -static inline void memb_ring_id_copy( - memb_ring_id_t *out, memb_ring_id_t *in) -{ - totemip_copy (&out->rep, &in->rep); - out->seq = in->seq; -} - -static inline void memb_ring_id_copy_endian_convert( - memb_ring_id_t *out, memb_ring_id_t *in) -{ - totemip_copy_endian_convert (&out->rep, &in->rep); - out->seq = swab64 (in->seq); -} #endif /* TOTEM_H_DEFINED */ diff --git a/exec/totemconfig.c b/exec/totemconfig.c index f50980dd..f15c1570 100644 --- a/exec/totemconfig.c +++ b/exec/totemconfig.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2002-2005 MontaVista Software, Inc. - * Copyright (c) 2006 RedHat, Inc. + * Copyright (c) 2006-2007 Red Hat, Inc. * * All rights reserved. * @@ -46,14 +46,16 @@ #include #include -#include "swab.h" -#include "list.h" +#include "../include/list.h" #include "util.h" #include "totem.h" #include "totemconfig.h" #include "logsys.h" #include "objdb.h" -#include "tlist.h" /* for HZ */ + +#if defined(OPENAIS_BSD) || defined(OPENAIS_DARWIN) + #define HZ 100 /* 10ms */ +#endif #define TOKEN_RETRANSMITS_BEFORE_LOSS_CONST 4 #define TOKEN_TIMEOUT 1000 @@ -232,7 +234,7 @@ extern int totem_config_read ( * Get mcast port */ if (!objdb_get_string (objdb, object_interface_handle, "mcastport", &str)) { - totem_config->interfaces[ringnumber].ip_port = atoi (str); + totem_config->interfaces[ringnumber].ip_port = htons (atoi (str)); } /* diff --git a/exec/totemconfig.h b/exec/totemconfig.h index 40470ed9..b154bc85 100644 --- a/exec/totemconfig.h +++ b/exec/totemconfig.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2005 MontaVista Software, Inc. + * Copyright (c) 2006-2007 Red Hat, Inc. * * All rights reserved. * diff --git a/exec/totemip.c b/exec/totemip.c index 0f6b9d9d..e49517ab 100644 --- a/exec/totemip.c +++ b/exec/totemip.c @@ -1,6 +1,5 @@ /* - * Copyright (c) 2005 Red Hat Inc - * Copyright (c) 2006 Sun Microsystems, Inc. + * Copyright (c) 2005-2007 Red Hat, Inc. * * All rights reserved. * @@ -42,12 +41,10 @@ #include #include #include -#if defined(OPENAIS_BSD) || defined(OPENAIS_DARWIN) || defined(OPENAIS_SOLARIS) +#if defined(OPENAIS_BSD) || defined(OPENAIS_DARWIN) #include #include -#ifndef OPENAIS_SOLARIS #include -#endif #include #endif #include @@ -63,24 +60,14 @@ /* ARGH!! I hate netlink */ #include #include - -/* this should catch 2.6.19 headers */ -#ifndef IFA_MAX -#include -#endif -/* redefine macro that disappeared in 2.6.19 */ -#ifndef IFA_RTA -#define IFA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifaddrmsg)))) #endif -#endif - -#if ! defined(OPENAIS_SOLARIS) && ! defined(s6_addr16) +#ifndef s6_addr16 #define s6_addr16 __u6_addr.__u6_addr16 #endif -#include "swab.h" #include "totemip.h" +#include "swab.h" #define LOCALHOST_IPV4 "127.0.0.1" #define LOCALHOST_IPV6 "::1" @@ -128,9 +115,7 @@ void totemip_copy_endian_convert(struct totem_ip_address *addr1, struct totem_ip { addr1->nodeid = swab32(addr2->nodeid); addr1->family = swab16(addr2->family); - if (addr1 != addr2) { - memcpy(addr1->addr, addr2->addr, TOTEMIP_ADDRLEN); - } + memcpy(addr1->addr, addr2->addr, TOTEMIP_ADDRLEN); } /* For sorting etc. params are void * for qsort's benefit */ @@ -169,13 +154,8 @@ int totemip_compare(const void *a, const void *b) memcpy (&ipv6_a1, totemip_a->addr, sizeof (struct in6_addr)); memcpy (&ipv6_a2, totemip_b->addr, sizeof (struct in6_addr)); for (i = 0; i < 8; i++) { -#ifndef OPENAIS_SOLARIS - int res = htons(ipv6_a1.s6_addr16[i]) - - htons(ipv6_a2.s6_addr16[i]); -#else - int res = htons(((uint16_t *)ipv6_a1.s6_addr)[i]) - - htons(((uint16_t *)ipv6_a2.s6_addr)[i]); -#endif + int res = htons(ipv6_a1.s6_addr16[i]) - + htons(ipv6_a2.s6_addr16[i]); if (res) { return res; } @@ -186,7 +166,6 @@ int totemip_compare(const void *a, const void *b) * Family not set, should be! */ assert (0); - exit (1); } } @@ -194,16 +173,14 @@ int totemip_compare(const void *a, const void *b) int totemip_localhost(int family, struct totem_ip_address *localhost) { char *addr_text; - uint32_t nodeid; memset (localhost, 0, sizeof (struct totem_ip_address)); if (family == AF_INET) { addr_text = LOCALHOST_IPV4; - if (inet_pton(family, addr_text, (char *)&nodeid) <= 0) { + if (inet_pton(family, addr_text, (char *)&localhost->nodeid) <= 0) { return -1; } - localhost->nodeid = ntohl(nodeid); } else { addr_text = LOCALHOST_IPV6; } @@ -246,7 +223,7 @@ int totemip_totemip_to_sockaddr_convert(struct totem_ip_address *ip_addr, sin->sin_len = sizeof(struct sockaddr_in); #endif sin->sin_family = ip_addr->family; - sin->sin_port = htons (port); + sin->sin_port = port; memcpy(&sin->sin_addr, ip_addr->addr, sizeof(struct in_addr)); *addrlen = sizeof(struct sockaddr_in); ret = 0; @@ -260,7 +237,7 @@ int totemip_totemip_to_sockaddr_convert(struct totem_ip_address *ip_addr, sin->sin6_len = sizeof(struct sockaddr_in6); #endif sin->sin6_family = ip_addr->family; - sin->sin6_port = htons (port); + sin->sin6_port = port; sin->sin6_scope_id = 2; memcpy(&sin->sin6_addr, ip_addr->addr, sizeof(struct in6_addr)); @@ -272,8 +249,8 @@ int totemip_totemip_to_sockaddr_convert(struct totem_ip_address *ip_addr, } /* Converts an address string string into a totem_ip_address. - * family can be AF_INET, AF_INET6 or 0 (for "don't care") - */ + family can be AF_INET, AF_INET6 or 0 ("for "don't care") +*/ int totemip_parse(struct totem_ip_address *totemip, char *addr, int family) { struct addrinfo *ainfo; @@ -301,8 +278,6 @@ int totemip_parse(struct totem_ip_address *totemip, char *addr, int family) else memcpy(totemip->addr, &sa6->sin6_addr, sizeof(struct in6_addr)); - freeaddrinfo(ainfo); - return 0; } @@ -331,19 +306,14 @@ int totemip_sockaddr_to_totemip_convert(struct sockaddr_storage *saddr, struct t return ret; } -#if defined(OPENAIS_BSD) || defined(OPENAIS_DARWIN) || defined(OPENAIS_SOLARIS) +#if defined(OPENAIS_BSD) || defined(OPENAIS_DARWIN) int totemip_iface_check(struct totem_ip_address *bindnet, struct totem_ip_address *boundto, int *interface_up, int *interface_num) { -#ifndef OPENAIS_SOLARIS #define NEXT_IFR(a) ((struct ifreq *)((u_char *)&(a)->ifr_addr +\ ((a)->ifr_addr.sa_len ? (a)->ifr_addr.sa_len : sizeof((a)->ifr_addr)))) -#else -#define NEXT_IFR(a) ((struct ifreq *)((u_char *)&(a)->ifr_addr +\ - sizeof((a)->ifr_addr))) -#endif struct sockaddr_in *intf_addr_mask; struct sockaddr_storage bindnet_ss, intf_addr_ss; @@ -367,24 +337,14 @@ int totemip_iface_check(struct totem_ip_address *bindnet, * Generate list of local interfaces in ifc.ifc_req structure */ id_fd = socket (AF_INET, SOCK_DGRAM, 0); - ifc.ifc_buf = NULL; + ifc.ifc_buf = 0; do { - void *ifc_buf_tmp; numreqs += 32; ifc.ifc_len = sizeof (struct ifreq) * numreqs; - ifc_buf_tmp = realloc (ifc.ifc_buf, ifc.ifc_len); - if (ifc_buf_tmp == NULL) { - close (id_fd); - if (ifc.ifc_buf != NULL) { - free (ifc.ifc_buf); - } - return -1; - } - ifc.ifc_buf = ifc_buf_tmp; + ifc.ifc_buf = (void *)realloc(ifc.ifc_buf, ifc.ifc_len); res = ioctl (id_fd, SIOCGIFCONF, &ifc); if (res < 0) { close (id_fd); - free (ifc.ifc_buf); return -1; } } while (ifc.ifc_len == sizeof (struct ifreq) * numreqs); @@ -441,9 +401,7 @@ int totemip_iface_check(struct totem_ip_address *bindnet, } } } - if (ifc.ifc_buf != NULL) { - free (ifc.ifc_buf); - } + free (ifc.ifc_buf); close (id_fd); return (res); @@ -544,8 +502,9 @@ int totemip_iface_check(struct totem_ip_address *bindnet, parse_rtattr(tb, IFA_MAX, IFA_RTA(ifa), len); memcpy(ipaddr.addr, RTA_DATA(tb[IFA_ADDRESS]), TOTEMIP_ADDRLEN); - if (totemip_equal(&ipaddr, bindnet)) + if (totemip_equal(&ipaddr, bindnet)) { found_if = 1; + } /* If the address we have is an IPv4 network address, then substitute the actual IP address of this interface */ diff --git a/exec/totemip.h b/exec/totemip.h index 19ba17dc..ec87b793 100644 --- a/exec/totemip.h +++ b/exec/totemip.h @@ -1,11 +1,10 @@ /* - * Copyright (c) 2005 Red Hat Inc - * Author: Patrick Caulfield (pcaulfie@redhat.com) - * - * Copyright (c) 2006 Sun Microsystems, Inc. + * Copyright (c) 2005-2007 Red Hat, Inc. * * All rights reserved. * + * Author: Patrick Caulfield (pcaulfie@redhat.com) + * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without @@ -38,8 +37,6 @@ #ifndef TOTEMIP_H_DEFINED #define TOTEMIP_H_DEFINED -#include -#include #include #include @@ -59,16 +56,9 @@ struct totem_ip_address { unsigned int nodeid; unsigned short family; - unsigned char addr[TOTEMIP_ADDRLEN]; /* in Network Byteorder */ + unsigned char addr[TOTEMIP_ADDRLEN]; } __attribute__((packed)); -typedef struct totem_ip_address totem_ip_address_t; - -static inline void swab_totem_ip_address_t (totem_ip_address_t *to_swab) -{ - to_swab->nodeid = swab32 (to_swab->nodeid); - to_swab->family = swab16 (to_swab->family); -} extern int totemip_equal(struct totem_ip_address *addr1, struct totem_ip_address *addr2); extern int totemip_compare(const void *a, const void *b); @@ -93,13 +83,4 @@ static inline int totemip_zero_check(struct totem_ip_address *addr) return (addr->family == 0); } -static inline unsigned int totemip_compute_nodeid_from_addr( - const struct totem_ip_address *addr) -{ - struct in_addr *in = (struct in_addr *)addr->addr; - assert(addr->family == AF_INET); - - return (unsigned int)ntohl(in->s_addr); -} - #endif diff --git a/exec/totemmrp.c b/exec/totemmrp.c index 20efd718..4e6a5cd7 100644 --- a/exec/totemmrp.c +++ b/exec/totemmrp.c @@ -54,7 +54,6 @@ #include #include -#include "swab.h" #include "totem.h" #include "totemsrp.h" #include "aispoll.h" diff --git a/exec/totemnet.c b/exec/totemnet.c index 3e029e42..3711ad8d 100644 --- a/exec/totemnet.c +++ b/exec/totemnet.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2005 MontaVista Software, Inc. - * Copyright (c) 2006 Red Hat, Inc. + * Copyright (c) 2006-2007 Red Hat, Inc. * Copyright (c) 2006 Sun Microsystems, Inc. * * All rights reserved. @@ -60,7 +60,6 @@ #include "aispoll.h" #include "totemnet.h" #include "wthread.h" -#include "swab.h" #include "../include/queue.h" #include "../include/sq.h" #include "../include/list.h" @@ -69,10 +68,6 @@ #include "crypto.h" -#ifdef OPENAIS_SOLARIS -#define MSG_NOSIGNAL 0 -#endif - #define MCAST_SOCKET_BUFFER_SIZE (TRANSMITS_ALLOWED * FRAME_SIZE_MAX) #define NETIF_STATE_REPORT_UP 1 @@ -240,8 +235,7 @@ static int authenticate_and_decrypt ( struct iovec *iov) { unsigned char keys[48]; - struct security_header *header = - (struct security_header *)iov[0].iov_base; + struct security_header *header = iov[0].iov_base; prng_state keygen_prng_state; prng_state stream_prng_state; unsigned char *hmac_key = &keys[32]; @@ -274,7 +268,7 @@ static int authenticate_and_decrypt ( hmac_init (&instance->totemnet_hmac_state, DIGEST_SHA1, hmac_key, 16); hmac_process (&instance->totemnet_hmac_state, - (unsigned char *)iov->iov_base + HMAC_HASH_SIZE, + iov->iov_base + HMAC_HASH_SIZE, iov->iov_len - HMAC_HASH_SIZE); len = hash_descriptor[DIGEST_SHA1]->hashsize; @@ -289,10 +283,8 @@ static int authenticate_and_decrypt ( /* * Decrypt the contents of the message with the cipher key */ - assert(iov->iov_len >= sizeof (struct security_header)); - sober128_read ( - (unsigned char *)iov->iov_base + sizeof (struct security_header), - (unsigned long)iov->iov_len - sizeof (struct security_header), + sober128_read (iov->iov_base + sizeof (struct security_header), + iov->iov_len - sizeof (struct security_header), &stream_prng_state); return (0); @@ -398,7 +390,7 @@ static inline void ucast_sendmsg ( if (instance->totem_config->secauth == 1) { - iovec_encrypt[0].iov_base = (char *)sheader; + iovec_encrypt[0].iov_base = sheader; iovec_encrypt[0].iov_len = sizeof (struct security_header); memcpy (&iovec_encrypt[1], &iovec_in[0], sizeof (struct iovec) * iov_len_in); @@ -414,7 +406,7 @@ static inline void ucast_sendmsg ( iov_len_in + 1, &instance->totemnet_prng_state); - iovec_encrypt[0].iov_base = (char *)encrypt_data; + iovec_encrypt[0].iov_base = encrypt_data; iovec_encrypt[0].iov_len = buf_len; iovec_sendmsg = &iovec_encrypt[0]; iov_len = 1; @@ -432,14 +424,9 @@ static inline void ucast_sendmsg ( msg_ucast.msg_namelen = addrlen; msg_ucast.msg_iov = iovec_sendmsg; msg_ucast.msg_iovlen = iov_len; -#ifndef OPENAIS_SOLARIS msg_ucast.msg_control = 0; msg_ucast.msg_controllen = 0; msg_ucast.msg_flags = 0; -#else - msg_ucast.msg_accrights = NULL; - msg_ucast.msg_accrightslen = 0; -#endif /* * Transmit multicast message @@ -467,7 +454,7 @@ static inline void mcast_sendmsg ( if (instance->totem_config->secauth == 1) { - iovec_encrypt[0].iov_base = (char *)sheader; + iovec_encrypt[0].iov_base = sheader; iovec_encrypt[0].iov_len = sizeof (struct security_header); memcpy (&iovec_encrypt[1], &iovec_in[0], sizeof (struct iovec) * iov_len_in); @@ -483,7 +470,7 @@ static inline void mcast_sendmsg ( iov_len_in + 1, &instance->totemnet_prng_state); - iovec_encrypt[0].iov_base = (char *)encrypt_data; + iovec_encrypt[0].iov_base = encrypt_data; iovec_encrypt[0].iov_len = buf_len; iovec_sendmsg = &iovec_encrypt[0]; iov_len = 1; @@ -501,14 +488,9 @@ static inline void mcast_sendmsg ( msg_mcast.msg_namelen = addrlen; msg_mcast.msg_iov = iovec_sendmsg; msg_mcast.msg_iovlen = iov_len; -#ifndef OPENAIS_SOLARIS msg_mcast.msg_control = 0; msg_mcast.msg_controllen = 0; msg_mcast.msg_flags = 0; -#else - msg_mcast.msg_accrights = NULL; - msg_mcast.msg_accrightslen = 0; -#endif /* * Transmit multicast message @@ -550,7 +532,7 @@ static void totemnet_mcast_worker_fn (void *thread_state, void *work_item_in) if (instance->totem_config->secauth == 1) { memmove (&work_item->iovec[1], &work_item->iovec[0], work_item->iov_len * sizeof (struct iovec)); - work_item->iovec[0].iov_base = (char *)sheader; + work_item->iovec[0].iov_base = sheader; work_item->iovec[0].iov_len = sizeof (struct security_header); /* @@ -563,7 +545,7 @@ static void totemnet_mcast_worker_fn (void *thread_state, void *work_item_in) &totemnet_mcast_thread_state->prng_state); iovec_sendmsg = &iovec_encrypted; - iovec_sendmsg->iov_base = (char *)totemnet_mcast_thread_state->iobuf; + iovec_sendmsg->iov_base = totemnet_mcast_thread_state->iobuf; iovec_sendmsg->iov_len = buf_len; iovs = 1; } else { @@ -578,14 +560,9 @@ static void totemnet_mcast_worker_fn (void *thread_state, void *work_item_in) msg_mcast.msg_namelen = addrlen; msg_mcast.msg_iov = iovec_sendmsg; msg_mcast.msg_iovlen = iovs; -#ifndef OPENAIS_SOLARIS msg_mcast.msg_control = 0; msg_mcast.msg_controllen = 0; msg_mcast.msg_flags = 0; -#else - msg_mcast.msg_accrights = NULL; - msg_mcast.msg_accrightslen = 0; -#endif /* * Transmit multicast message @@ -636,7 +613,7 @@ static int net_deliver_fn ( struct sockaddr_storage system_from; int bytes_received; int res = 0; - char *msg_offset; + unsigned char *msg_offset; unsigned int size_delv; if (instance->flushing == 1) { @@ -652,14 +629,9 @@ static int net_deliver_fn ( msg_recv.msg_namelen = sizeof (struct sockaddr_storage); msg_recv.msg_iov = iovec; msg_recv.msg_iovlen = 1; -#ifndef OPENAIS_SOLARIS msg_recv.msg_control = 0; msg_recv.msg_controllen = 0; msg_recv.msg_flags = 0; -#else - msg_recv.msg_accrights = NULL; - msg_recv.msg_accrightslen = 0; -#endif bytes_received = recvmsg (fd, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT); if (bytes_received == -1) { @@ -730,7 +702,7 @@ static int netif_determine ( * field is only 32 bits. */ if (bound_to->family == AF_INET && bound_to->nodeid == 0) { - bound_to->nodeid = totemip_compute_nodeid_from_addr(bound_to); + memcpy (&bound_to->nodeid, bound_to->addr, sizeof (int)); } return (res); @@ -1071,7 +1043,6 @@ static int totemnet_build_sockets_ip ( break; } -#ifndef OPENAIS_SOLARIS /* * Turn on multicast loopback */ @@ -1079,18 +1050,17 @@ static int totemnet_build_sockets_ip ( flag = 1; switch ( bindnet_address->family ) { case AF_INET: - res = setsockopt (sockets->mcast_recv, IPPROTO_IP, IP_MULTICAST_LOOP, + res = setsockopt (sockets->mcast_send, IPPROTO_IP, IP_MULTICAST_LOOP, &flag, sizeof (flag)); break; case AF_INET6: - res = setsockopt (sockets->mcast_recv, IPPROTO_IPV6, IPV6_MULTICAST_LOOP, + res = setsockopt (sockets->mcast_send, IPPROTO_IPV6, IPV6_MULTICAST_LOOP, &flag, sizeof (flag)); } if (res == -1) { perror ("turn off loopback"); return (-1); } -#endif /* * Set multicast packets TTL @@ -1107,7 +1077,6 @@ static int totemnet_build_sockets_ip ( } } -#ifndef OPENAIS_SOLARIS /* * Bind to a specific interface for multicast send and receive */ @@ -1137,7 +1106,6 @@ static int totemnet_build_sockets_ip ( } break; } -#endif return 0; } diff --git a/exec/totemnet.h b/exec/totemnet.h index 25991bea..f4788abc 100644 --- a/exec/totemnet.h +++ b/exec/totemnet.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2005 MontaVista Software, Inc. + * Copyright (c) 2006-2007 Red Hat, Inc. * * All rights reserved. * @@ -37,7 +38,6 @@ #include #include -#include "swab.h" #include "totem.h" #include "aispoll.h" diff --git a/exec/totempg.c b/exec/totempg.c index 34f5f815..dd6af250 100644 --- a/exec/totempg.c +++ b/exec/totempg.c @@ -1,13 +1,13 @@ /* * Copyright (c) 2003-2005 MontaVista Software, Inc. * Copyright (c) 2005 OSDL. - * Copyright (c) 2006-2007 Red Hat, Inc. * Copyright (c) 2006 Sun Microsystems, Inc. + * Copyright (c) 2006-2007 Red Hat, Inc. * * All rights reserved. * * Author: Steven Dake (sdake@redhat.com) - * Mark Haverkamp (markh@osdl.org) + * Author: Mark Haverkamp (markh@osdl.org) * * This software licensed under BSD license, the text of which follows: * @@ -83,9 +83,6 @@ * */ -#ifndef OPENAIS_BSD -#include -#endif #include #include #include @@ -95,7 +92,6 @@ #include #include -#include "swab.h" #include "../include/hdb.h" #include "../include/list.h" #include "totempg.h" @@ -182,7 +178,7 @@ DECLARE_LIST_INIT(assembly_list_free); * fragment_contuation indicates whether the first packed message in * the buffer is a continuation of a previously packed fragment. */ -static char *fragmentation_data; +static unsigned char *fragmentation_data; static int fragment_size = 0; @@ -341,25 +337,25 @@ static inline int group_matches ( char *group_name; int i; int j; -#ifdef __sparc - struct iovec iovec_aligned = { NULL, 0 }; -#endif + struct iovec iovec_aligned = { NULL, 0 }; assert (iov_len == 1); -#ifdef __sparc + /* + * Align data structure for sparc and ia64 + */ if ((size_t)iovec->iov_base % 4 != 0) { iovec_aligned.iov_base = alloca(iovec->iov_len); - memcpy(iovec_aligned.iov_base, iovec->iov_base, iovec->iov_len); - iovec_aligned.iov_len = iovec->iov_len; + memcpy(iovec_aligned.iov_base, iovec->iov_base, iovec->iov_len); iovec_aligned.iov_len = iovec->iov_len; iovec = &iovec_aligned; } -#endif + group_len = (unsigned short *)iovec->iov_base; group_name = ((char *)iovec->iov_base) + sizeof (unsigned short) * (group_len[0] + 1); + /* * Calculate amount to adjust the iovec by before delivering to app */ @@ -393,53 +389,46 @@ static inline void app_deliver_fn ( int i; struct totempg_group_instance *instance; struct iovec stripped_iovec; -#ifdef __sparc - struct iovec aligned_iovec = { NULL, 0 }; -#endif unsigned int adjust_iovec; unsigned int res; + struct iovec aligned_iovec = { NULL, 0 }; if (endian_conversion_required) { -#ifdef __sparc - if ((size_t)iovec->iov_base % 4 != 0) { - /* Deal with misalignment */ - aligned_iovec.iov_base = alloca(iovec->iov_len); - aligned_iovec.iov_len = iovec->iov_len; - memcpy(aligned_iovec.iov_base, iovec->iov_base, - iovec->iov_len); - iovec = &aligned_iovec; - } -#endif group_endian_convert (iovec); } + + /* + * Align data structure for sparc and ia64 + */ + aligned_iovec.iov_base = alloca(iovec->iov_len); + aligned_iovec.iov_len = iovec->iov_len; + memcpy(aligned_iovec.iov_base, iovec->iov_base, iovec->iov_len); + iovec = &aligned_iovec; + for (i = 0; i <= totempg_max_handle; i++) { res = hdb_handle_get (&totempg_groups_instance_database, i, (void *)&instance); if (res == 0) { assert (iov_len == 1); - if (group_matches (iovec, iov_len, instance->groups, - instance->groups_cnt, &adjust_iovec)) { - stripped_iovec.iov_len = - iovec->iov_len - adjust_iovec; -#ifndef __sparc - stripped_iovec.iov_base = - (char *)iovec->iov_base + adjust_iovec; -#else + if (group_matches (iovec, iov_len, instance->groups, instance->groups_cnt, &adjust_iovec)) { + stripped_iovec.iov_len = iovec->iov_len - adjust_iovec; +// stripped_iovec.iov_base = (char *)iovec->iov_base + adjust_iovec; + + /* + * Align data structure for sparc and ia64 + */ if (iovec->iov_base + adjust_iovec % 4 != 0) { - /* Deal with misalignment */ /* - * XXX Using alloca() is dangerous, - * since it may be called multiple - * times within the for() loop + * Deal with misalignment */ - stripped_iovec.iov_base = alloca( - stripped_iovec.iov_len); - memcpy(stripped_iovec.iov_base, - iovec->iov_base + adjust_iovec, + stripped_iovec.iov_base = + alloca (stripped_iovec.iov_len); + memcpy (stripped_iovec.iov_base, + iovec->iov_base + adjust_iovec, stripped_iovec.iov_len); } -#endif + instance->deliver_fn ( nodeid, &stripped_iovec, @@ -451,6 +440,7 @@ static inline void app_deliver_fn ( } } } + static void totempg_confchg_fn ( enum totem_configuration_type configuration_type, unsigned int *member_list, int member_list_entries, @@ -506,9 +496,7 @@ static void totempg_deliver_fn ( msg_count = mcast->msg_count; datasize = sizeof (struct totempg_mcast) + msg_count * sizeof (unsigned short); - - assert (iovec[0].iov_len >= datasize); - + memcpy (header, iovec[0].iov_base, datasize); assert(iovec); data = iovec[0].iov_base; @@ -525,7 +513,7 @@ static void totempg_deliver_fn ( } else { /* * The message originated from local processor - * because there is greater than one iovec for then full msg. + * becasue there is greater than one iovec for then full msg. */ h_index = 0; for (i = 0; i < 2; i++) { @@ -555,7 +543,7 @@ static void totempg_deliver_fn ( */ msg_count = mcast->fragmented ? mcast->msg_count - 1 : mcast->msg_count; continuation = mcast->continuation; - iov_delv.iov_base = (char *)&assembly->data[0]; + iov_delv.iov_base = &assembly->data[0]; iov_delv.iov_len = assembly->index + msg_lens[0]; /* @@ -592,7 +580,7 @@ static void totempg_deliver_fn ( */ if (!continuation) { assembly->index += msg_lens[0]; - iov_delv.iov_base = (char *)&assembly->data[assembly->index]; + iov_delv.iov_base = &assembly->data[assembly->index]; iov_delv.iov_len = msg_lens[1]; start = 1; } @@ -603,7 +591,7 @@ static void totempg_deliver_fn ( app_deliver_fn(nodeid, &iov_delv, 1, endian_conversion_required); assembly->index += msg_lens[i]; - iov_delv.iov_base = (char *)&assembly->data[assembly->index]; + iov_delv.iov_base = &assembly->data[assembly->index]; if (i < (msg_count - 1)) { iov_delv.iov_len = msg_lens[i + 1]; } @@ -666,9 +654,9 @@ int callback_token_received_fn (enum totem_callback_token_type type, mcast.msg_count = mcast_packed_msg_count; - iovecs[0].iov_base = (char *)&mcast; + iovecs[0].iov_base = &mcast; iovecs[0].iov_len = sizeof (struct totempg_mcast); - iovecs[1].iov_base = (char *)mcast_packed_msg_lens; + iovecs[1].iov_base = mcast_packed_msg_lens; iovecs[1].iov_len = mcast_packed_msg_count * sizeof (unsigned short); iovecs[2].iov_base = &fragmentation_data[0]; iovecs[2].iov_len = fragment_size; @@ -794,7 +782,7 @@ static int mcast_msg ( * If it just fits or is too big, then send out what fits. */ } else { - char *data_ptr; + unsigned char *data_ptr; copy_len = min(copy_len, max_packet_size - fragment_size); if( copy_len == max_packet_size ) @@ -831,9 +819,9 @@ static int mcast_msg ( * assemble the message and send it */ mcast.msg_count = ++mcast_packed_msg_count; - iovecs[0].iov_base = (char *)&mcast; + iovecs[0].iov_base = &mcast; iovecs[0].iov_len = sizeof(struct totempg_mcast); - iovecs[1].iov_base = (char *)mcast_packed_msg_lens; + iovecs[1].iov_base = mcast_packed_msg_lens; iovecs[1].iov_len = mcast_packed_msg_count * sizeof(unsigned short); iovecs[2].iov_base = data_ptr; @@ -882,13 +870,14 @@ static int mcast_msg ( /* * Determine if a message of msg_size could be queued */ +#define FUZZY_AVAIL_SUBTRACT 5 static int send_ok ( int msg_size) { int avail = 0; int total; - avail = totemmrp_avail (); + avail = totemmrp_avail () - FUZZY_AVAIL_SUBTRACT; /* * msg size less then totempg_totem_config->net_mtu - 25 will take up @@ -1000,7 +989,7 @@ int totempg_groups_join ( new_groups = realloc (instance->groups, sizeof (struct totempg_group) * (instance->groups_cnt + group_cnt)); - if (new_groups == NULL) { + if (new_groups == 0) { res = ENOMEM; goto error_exit; } @@ -1070,7 +1059,7 @@ int totempg_groups_mcast_joined ( iovec_mcast[i + 1].iov_base = instance->groups[i].group; } iovec_mcast[0].iov_len = (instance->groups_cnt + 1) * sizeof (unsigned short); - iovec_mcast[0].iov_base = (char *)group_len; + iovec_mcast[0].iov_base = group_len; for (i = 0; i < iov_len; i++) { iovec_mcast[i + instance->groups_cnt + 1].iov_len = iovec[i].iov_len; iovec_mcast[i + instance->groups_cnt + 1].iov_base = iovec[i].iov_base; @@ -1150,7 +1139,7 @@ int totempg_groups_mcast_groups ( iovec_mcast[i + 1].iov_base = groups[i].group; } iovec_mcast[0].iov_len = (groups_cnt + 1) * sizeof (unsigned short); - iovec_mcast[0].iov_base = (char *)group_len; + iovec_mcast[0].iov_base = group_len; for (i = 0; i < iov_len; i++) { iovec_mcast[i + groups_cnt + 1].iov_len = iovec[i].iov_len; iovec_mcast[i + groups_cnt + 1].iov_base = iovec[i].iov_base; diff --git a/exec/totemrrp.c b/exec/totemrrp.c index c697ab9e..8c7ac471 100644 --- a/exec/totemrrp.c +++ b/exec/totemrrp.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2005 MontaVista Software, Inc. - * Copyright (c) 2006 Red Hat, Inc. + * Copyright (c) 2006-2007 Red Hat, Inc. * Copyright (c) 2006 Sun Microsystems, Inc. * * All rights reserved. diff --git a/exec/totemrrp.h b/exec/totemrrp.h index 7bea833f..fad81d75 100644 --- a/exec/totemrrp.h +++ b/exec/totemrrp.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2005 MontaVista Software, Inc. + * Copyright (c) 2006-2007 Red Hat, Inc. * * All rights reserved. * diff --git a/exec/totemsrp.c b/exec/totemsrp.c index 9ab39cac..9b2a5418 100644 --- a/exec/totemsrp.c +++ b/exec/totemsrp.c @@ -48,10 +48,6 @@ * usage on 1.6ghz xeon from 35% to less then .1 % as measured by top */ -#ifndef OPENAIS_BSD -#include -#endif - #include #include #include @@ -74,7 +70,6 @@ #include #include -#include "swab.h" #include "aispoll.h" #include "totemsrp.h" #include "totemrrp.h" @@ -500,8 +495,6 @@ struct totemsrp_instance { unsigned int my_pbl; unsigned int my_cbl; - - unsigned int operational_entered_once; }; struct message_handlers { @@ -685,6 +678,7 @@ int totemsrp_initialize ( { struct totemsrp_instance *instance; unsigned int res; + res = hdb_handle_create (&totemsrp_instance_database, sizeof (struct totemsrp_instance), handle); if (res != 0) { @@ -885,10 +879,25 @@ int totemsrp_ifaces_get ( memcpy (interfaces, &instance->my_memb_list[i], sizeof (struct srp_addr)); *iface_count = instance->totem_config->interface_count; + goto finish; + } + + for (i = 0; i < instance->my_left_memb_entries; i++) { + if (instance->my_left_memb_list[i].addr[0].nodeid == nodeid) { + found = 1; + break; + } + } + + if (found) { + memcpy (interfaces, &instance->my_left_memb_list[i], + sizeof (struct srp_addr)); + *iface_count = instance->totem_config->interface_count; } else { res = -1; } +finish: totemrrp_ifaces_get (instance->totemrrp_handle, status, NULL); hdb_handle_put (&totemsrp_instance_database, handle); @@ -952,6 +961,7 @@ error_exit: return (res); } + /* * Set operations for use by the membership algorithm */ @@ -1510,12 +1520,12 @@ static void deliver_messages_from_recovery_to_regular (struct totemsrp_instance * Convert recovery message into regular message */ if (recovery_message_item->iov_len > 1) { - mcast = (struct mcast *)recovery_message_item->iovec[1].iov_base; + mcast = recovery_message_item->iovec[1].iov_base; memcpy (®ular_message_item.iovec[0], &recovery_message_item->iovec[1], sizeof (struct iovec) * recovery_message_item->iov_len); } else { - mcast = (struct mcast *)recovery_message_item->iovec[0].iov_base; + mcast = recovery_message_item->iovec[0].iov_base; if (mcast->header.encapsulated == 1) { /* * Message is a recovery message encapsulated @@ -1526,7 +1536,7 @@ static void deliver_messages_from_recovery_to_regular (struct totemsrp_instance regular_message_item.iovec[0].iov_len = recovery_message_item->iovec[0].iov_len - sizeof (struct mcast); regular_message_item.iov_len = 1; - mcast = (struct mcast *)regular_message_item.iovec[0].iov_base; + mcast = regular_message_item.iovec[0].iov_base; } else { continue; /* TODO this case shouldn't happen */ /* @@ -1670,8 +1680,6 @@ static void memb_state_operational_enter (struct totemsrp_instance *instance) instance->my_received_flg = 0; - instance->operational_entered_once = 1; - return; } @@ -1808,15 +1816,10 @@ static void memb_state_recovery_enter ( /* * Build regular configuration */ - instance->my_new_memb_entries = commit_token->addr_entries; - totemrrp_processor_count_set ( instance->totemrrp_handle, commit_token->addr_entries); - memcpy (instance->my_new_memb_list, addr, - sizeof (struct srp_addr) * instance->my_new_memb_entries); - /* * Build transitional configuration */ @@ -1838,7 +1841,7 @@ static void memb_state_recovery_enter ( memb_list[i].high_delivered, memb_list[i].received_flg); - assert (totemip_print (&memb_list[i].ring_id.rep) != 0); + // assert (totemip_print (&memb_list[i].ring_id.rep) != 0); } /* * Determine if any received flag is false @@ -2279,15 +2282,15 @@ static int orf_token_mcast ( * Build IO vector */ memset (&sort_queue_item, 0, sizeof (struct sort_queue_item)); - sort_queue_item.iovec[0].iov_base = (char *)message_item->mcast; + sort_queue_item.iovec[0].iov_base = message_item->mcast; sort_queue_item.iovec[0].iov_len = sizeof (struct mcast); - mcast = (struct mcast *)sort_queue_item.iovec[0].iov_base; + mcast = sort_queue_item.iovec[0].iov_base; memcpy (&sort_queue_item.iovec[1], message_item->iovec, message_item->iov_len * sizeof (struct iovec)); - memb_ring_id_copy (&mcast->ring_id, &instance->my_ring_id); + memcpy (&mcast->ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id)); sort_queue_item.iov_len = message_item->iov_len + 1; @@ -2435,8 +2438,8 @@ static int orf_token_rtr ( /* * Missing message not found in current retransmit list so add it */ - memb_ring_id_copy (&rtr_list[orf_token->rtr_list_entries].ring_id, - &instance->my_ring_id); + memcpy (&rtr_list[orf_token->rtr_list_entries].ring_id, + &instance->my_ring_id, sizeof (struct memb_ring_id)); rtr_list[orf_token->rtr_list_entries].seq = instance->my_aru + i; orf_token->rtr_list_entries++; } @@ -2606,7 +2609,7 @@ static int orf_token_send_initial (struct totemsrp_instance *instance) orf_token.aru = SEQNO_START_MSG - 1; orf_token.aru_addr = instance->my_id.addr[0].nodeid; - memb_ring_id_copy (&orf_token.ring_id, &instance->my_ring_id); + memcpy (&orf_token.ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id)); orf_token.fcc = 0; orf_token.backlog = 0; @@ -2627,8 +2630,13 @@ static void memb_state_commit_token_update ( addr = (struct srp_addr *)commit_token->end_of_commit_token; memb_list = (struct memb_commit_token_memb_entry *)(addr + commit_token->addr_entries); - memb_ring_id_copy (&memb_list[commit_token->memb_index].ring_id, - &instance->my_old_ring_id); + memcpy (instance->my_new_memb_list, addr, + sizeof (struct srp_addr) * commit_token->addr_entries); + + instance->my_new_memb_entries = commit_token->addr_entries; + + memcpy (&memb_list[commit_token->memb_index].ring_id, + &instance->my_old_ring_id, sizeof (struct memb_ring_id)); assert (!totemip_zero_check(&instance->my_old_ring_id.rep)); memb_list[commit_token->memb_index].aru = instance->old_ring_state_aru; @@ -2757,17 +2765,7 @@ static void memb_state_commit_token_create ( totemip_copy(&commit_token->ring_id.rep, &instance->my_id.addr[0]); - /* - * The first time operational is entered, don't increment the ring - * sequence number (just reload it from stable storage). This prevents - * an error condition where if the executive is stopped and started - * before a new ring is formed, the protocol will get stuck in recovery. - */ - if (instance->operational_entered_once) { - commit_token->ring_id.seq = instance->token_ring_id_seq + 4; - } else { - commit_token->ring_id.seq = instance->token_ring_id_seq; - } + commit_token->ring_id.seq = instance->token_ring_id_seq + 4; /* * This qsort is necessary to ensure the commit token traverses @@ -2877,7 +2875,6 @@ static void memb_ring_id_create_or_load ( } res = write (fd, &memb_ring_id->seq, sizeof (unsigned long long)); assert (res == sizeof (unsigned long long)); - fsync (fd); close (fd); } else { log_printf (instance->totemsrp_log_level_warning, @@ -2918,7 +2915,6 @@ static void memb_ring_id_set_and_store ( //assert (fd > 0); res = write (fd, &instance->my_ring_id.seq, sizeof (unsigned long long)); assert (res == sizeof (unsigned long long)); - fsync (fd); close (fd); } @@ -3850,9 +3846,8 @@ static void memb_commit_token_endian_convert (struct memb_commit_token *in, stru * Only convert the memb entry if it has been set */ if (in_memb_list[i].ring_id.rep.family != 0) { - memb_ring_id_copy_endian_convert ( - &out_memb_list[i].ring_id, - &in_memb_list[i].ring_id); + totemip_copy_endian_convert (&out_memb_list[i].ring_id.rep, + &in_memb_list[i].ring_id.rep); out_memb_list[i].ring_id.seq = swab64 (in_memb_list[i].ring_id.seq); @@ -3873,15 +3868,16 @@ static void orf_token_endian_convert (struct orf_token *in, struct orf_token *ou out->seq = swab32 (in->seq); out->token_seq = swab32 (in->token_seq); out->aru = swab32 (in->aru); - memb_ring_id_copy_endian_convert (&out->ring_id, &in->ring_id); + totemip_copy_endian_convert(&out->ring_id.rep, &in->ring_id.rep); out->aru_addr = swab32(in->aru_addr); + out->ring_id.seq = swab64 (in->ring_id.seq); out->fcc = swab32 (in->fcc); out->backlog = swab32 (in->backlog); out->retrans_flg = swab32 (in->retrans_flg); out->rtr_list_entries = swab32 (in->rtr_list_entries); for (i = 0; i < out->rtr_list_entries; i++) { - memb_ring_id_copy_endian_convert (&out->rtr_list[i].ring_id, - &in->rtr_list[i].ring_id); + totemip_copy_endian_convert(&out->rtr_list[i].ring_id.rep, &in->rtr_list[i].ring_id.rep); + out->rtr_list[i].ring_id.seq = swab64 (in->rtr_list[i].ring_id.seq); out->rtr_list[i].seq = swab32 (in->rtr_list[i].seq); } } @@ -3895,7 +3891,8 @@ static void mcast_endian_convert (struct mcast *in, struct mcast *out) out->seq = swab32 (in->seq); out->this_seqno = swab32 (in->this_seqno); - memb_ring_id_copy_endian_convert (&out->ring_id, &in->ring_id); + totemip_copy_endian_convert(&out->ring_id.rep, &in->ring_id.rep); + out->ring_id.seq = swab64 (in->ring_id.seq); out->node_id = swab32 (in->node_id); out->guarantee = swab32 (in->guarantee); srp_addr_copy_endian_convert (&out->system_from, &in->system_from); @@ -3908,7 +3905,8 @@ static void memb_merge_detect_endian_convert ( out->header.type = in->header.type; out->header.endian_detector = ENDIAN_LOCAL; out->header.nodeid = swab32 (in->header.nodeid); - memb_ring_id_copy_endian_convert(&out->ring_id, &in->ring_id); + totemip_copy_endian_convert(&out->ring_id.rep, &in->ring_id.rep); + out->ring_id.seq = swab64 (in->ring_id.seq); srp_addr_copy_endian_convert (&out->system_from, &in->system_from); }