Fix a ton of bugs in totem by forward porting the whitetank totem code into

trunk.


git-svn-id: http://svn.fedorahosted.org/svn/corosync/trunk@1562 fd59a12c-fef9-0310-b244-a6a79926bd2f
This commit is contained in:
Steven Dake 2008-06-24 04:45:38 +00:00
parent 9e2376fcc0
commit a7f4b6d8cb
12 changed files with 143 additions and 266 deletions

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2005 MontaVista Software, Inc.
* Copyright (c) 2006 Red Hat, Inc.
* Copyright (c) 2006-2007 Red Hat, Inc.
* Copyright (c) 2006 Sun Microsystems, Inc.
*
* Author: Steven Dake (sdake@redhat.com)
@ -164,26 +164,5 @@ struct memb_ring_id {
unsigned long long seq;
} __attribute__((packed));
typedef struct memb_ring_id memb_ring_id_t;
static inline void swab_memb_ring_id_t (memb_ring_id_t *to_swab)
{
swab_totem_ip_address_t (&to_swab->rep);
to_swab->seq = swab64 (to_swab->seq);
}
static inline void memb_ring_id_copy(
memb_ring_id_t *out, memb_ring_id_t *in)
{
totemip_copy (&out->rep, &in->rep);
out->seq = in->seq;
}
static inline void memb_ring_id_copy_endian_convert(
memb_ring_id_t *out, memb_ring_id_t *in)
{
totemip_copy_endian_convert (&out->rep, &in->rep);
out->seq = swab64 (in->seq);
}
#endif /* TOTEM_H_DEFINED */

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2002-2005 MontaVista Software, Inc.
* Copyright (c) 2006 RedHat, Inc.
* Copyright (c) 2006-2007 Red Hat, Inc.
*
* All rights reserved.
*
@ -46,14 +46,16 @@
#include <arpa/inet.h>
#include <sys/param.h>
#include "swab.h"
#include "list.h"
#include "../include/list.h"
#include "util.h"
#include "totem.h"
#include "totemconfig.h"
#include "logsys.h"
#include "objdb.h"
#include "tlist.h" /* for HZ */
#if defined(OPENAIS_BSD) || defined(OPENAIS_DARWIN)
#define HZ 100 /* 10ms */
#endif
#define TOKEN_RETRANSMITS_BEFORE_LOSS_CONST 4
#define TOKEN_TIMEOUT 1000
@ -232,7 +234,7 @@ extern int totem_config_read (
* Get mcast port
*/
if (!objdb_get_string (objdb, object_interface_handle, "mcastport", &str)) {
totem_config->interfaces[ringnumber].ip_port = atoi (str);
totem_config->interfaces[ringnumber].ip_port = htons (atoi (str));
}
/*

View File

@ -1,5 +1,6 @@
/*
* Copyright (c) 2005 MontaVista Software, Inc.
* Copyright (c) 2006-2007 Red Hat, Inc.
*
* All rights reserved.
*

View File

@ -1,6 +1,5 @@
/*
* Copyright (c) 2005 Red Hat Inc
* Copyright (c) 2006 Sun Microsystems, Inc.
* Copyright (c) 2005-2007 Red Hat, Inc.
*
* All rights reserved.
*
@ -42,12 +41,10 @@
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>
#if defined(OPENAIS_BSD) || defined(OPENAIS_DARWIN) || defined(OPENAIS_SOLARIS)
#if defined(OPENAIS_BSD) || defined(OPENAIS_DARWIN)
#include <sys/sockio.h>
#include <net/if.h>
#ifndef OPENAIS_SOLARIS
#include <net/if_var.h>
#endif
#include <netinet/in_var.h>
#endif
#include <string.h>
@ -63,24 +60,14 @@
/* ARGH!! I hate netlink */
#include <asm/types.h>
#include <linux/rtnetlink.h>
/* this should catch 2.6.19 headers */
#ifndef IFA_MAX
#include <linux/if_addr.h>
#endif
/* redefine macro that disappeared in 2.6.19 */
#ifndef IFA_RTA
#define IFA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifaddrmsg))))
#endif
#endif
#if ! defined(OPENAIS_SOLARIS) && ! defined(s6_addr16)
#ifndef s6_addr16
#define s6_addr16 __u6_addr.__u6_addr16
#endif
#include "swab.h"
#include "totemip.h"
#include "swab.h"
#define LOCALHOST_IPV4 "127.0.0.1"
#define LOCALHOST_IPV6 "::1"
@ -128,9 +115,7 @@ void totemip_copy_endian_convert(struct totem_ip_address *addr1, struct totem_ip
{
addr1->nodeid = swab32(addr2->nodeid);
addr1->family = swab16(addr2->family);
if (addr1 != addr2) {
memcpy(addr1->addr, addr2->addr, TOTEMIP_ADDRLEN);
}
memcpy(addr1->addr, addr2->addr, TOTEMIP_ADDRLEN);
}
/* For sorting etc. params are void * for qsort's benefit */
@ -169,13 +154,8 @@ int totemip_compare(const void *a, const void *b)
memcpy (&ipv6_a1, totemip_a->addr, sizeof (struct in6_addr));
memcpy (&ipv6_a2, totemip_b->addr, sizeof (struct in6_addr));
for (i = 0; i < 8; i++) {
#ifndef OPENAIS_SOLARIS
int res = htons(ipv6_a1.s6_addr16[i]) -
htons(ipv6_a2.s6_addr16[i]);
#else
int res = htons(((uint16_t *)ipv6_a1.s6_addr)[i]) -
htons(((uint16_t *)ipv6_a2.s6_addr)[i]);
#endif
int res = htons(ipv6_a1.s6_addr16[i]) -
htons(ipv6_a2.s6_addr16[i]);
if (res) {
return res;
}
@ -186,7 +166,6 @@ int totemip_compare(const void *a, const void *b)
* Family not set, should be!
*/
assert (0);
exit (1);
}
}
@ -194,16 +173,14 @@ int totemip_compare(const void *a, const void *b)
int totemip_localhost(int family, struct totem_ip_address *localhost)
{
char *addr_text;
uint32_t nodeid;
memset (localhost, 0, sizeof (struct totem_ip_address));
if (family == AF_INET) {
addr_text = LOCALHOST_IPV4;
if (inet_pton(family, addr_text, (char *)&nodeid) <= 0) {
if (inet_pton(family, addr_text, (char *)&localhost->nodeid) <= 0) {
return -1;
}
localhost->nodeid = ntohl(nodeid);
} else {
addr_text = LOCALHOST_IPV6;
}
@ -246,7 +223,7 @@ int totemip_totemip_to_sockaddr_convert(struct totem_ip_address *ip_addr,
sin->sin_len = sizeof(struct sockaddr_in);
#endif
sin->sin_family = ip_addr->family;
sin->sin_port = htons (port);
sin->sin_port = port;
memcpy(&sin->sin_addr, ip_addr->addr, sizeof(struct in_addr));
*addrlen = sizeof(struct sockaddr_in);
ret = 0;
@ -260,7 +237,7 @@ int totemip_totemip_to_sockaddr_convert(struct totem_ip_address *ip_addr,
sin->sin6_len = sizeof(struct sockaddr_in6);
#endif
sin->sin6_family = ip_addr->family;
sin->sin6_port = htons (port);
sin->sin6_port = port;
sin->sin6_scope_id = 2;
memcpy(&sin->sin6_addr, ip_addr->addr, sizeof(struct in6_addr));
@ -272,8 +249,8 @@ int totemip_totemip_to_sockaddr_convert(struct totem_ip_address *ip_addr,
}
/* Converts an address string string into a totem_ip_address.
* family can be AF_INET, AF_INET6 or 0 (for "don't care")
*/
family can be AF_INET, AF_INET6 or 0 ("for "don't care")
*/
int totemip_parse(struct totem_ip_address *totemip, char *addr, int family)
{
struct addrinfo *ainfo;
@ -301,8 +278,6 @@ int totemip_parse(struct totem_ip_address *totemip, char *addr, int family)
else
memcpy(totemip->addr, &sa6->sin6_addr, sizeof(struct in6_addr));
freeaddrinfo(ainfo);
return 0;
}
@ -331,19 +306,14 @@ int totemip_sockaddr_to_totemip_convert(struct sockaddr_storage *saddr, struct t
return ret;
}
#if defined(OPENAIS_BSD) || defined(OPENAIS_DARWIN) || defined(OPENAIS_SOLARIS)
#if defined(OPENAIS_BSD) || defined(OPENAIS_DARWIN)
int totemip_iface_check(struct totem_ip_address *bindnet,
struct totem_ip_address *boundto,
int *interface_up,
int *interface_num)
{
#ifndef OPENAIS_SOLARIS
#define NEXT_IFR(a) ((struct ifreq *)((u_char *)&(a)->ifr_addr +\
((a)->ifr_addr.sa_len ? (a)->ifr_addr.sa_len : sizeof((a)->ifr_addr))))
#else
#define NEXT_IFR(a) ((struct ifreq *)((u_char *)&(a)->ifr_addr +\
sizeof((a)->ifr_addr)))
#endif
struct sockaddr_in *intf_addr_mask;
struct sockaddr_storage bindnet_ss, intf_addr_ss;
@ -367,24 +337,14 @@ int totemip_iface_check(struct totem_ip_address *bindnet,
* Generate list of local interfaces in ifc.ifc_req structure
*/
id_fd = socket (AF_INET, SOCK_DGRAM, 0);
ifc.ifc_buf = NULL;
ifc.ifc_buf = 0;
do {
void *ifc_buf_tmp;
numreqs += 32;
ifc.ifc_len = sizeof (struct ifreq) * numreqs;
ifc_buf_tmp = realloc (ifc.ifc_buf, ifc.ifc_len);
if (ifc_buf_tmp == NULL) {
close (id_fd);
if (ifc.ifc_buf != NULL) {
free (ifc.ifc_buf);
}
return -1;
}
ifc.ifc_buf = ifc_buf_tmp;
ifc.ifc_buf = (void *)realloc(ifc.ifc_buf, ifc.ifc_len);
res = ioctl (id_fd, SIOCGIFCONF, &ifc);
if (res < 0) {
close (id_fd);
free (ifc.ifc_buf);
return -1;
}
} while (ifc.ifc_len == sizeof (struct ifreq) * numreqs);
@ -441,9 +401,7 @@ int totemip_iface_check(struct totem_ip_address *bindnet,
}
}
}
if (ifc.ifc_buf != NULL) {
free (ifc.ifc_buf);
}
free (ifc.ifc_buf);
close (id_fd);
return (res);
@ -544,8 +502,9 @@ int totemip_iface_check(struct totem_ip_address *bindnet,
parse_rtattr(tb, IFA_MAX, IFA_RTA(ifa), len);
memcpy(ipaddr.addr, RTA_DATA(tb[IFA_ADDRESS]), TOTEMIP_ADDRLEN);
if (totemip_equal(&ipaddr, bindnet))
if (totemip_equal(&ipaddr, bindnet)) {
found_if = 1;
}
/* If the address we have is an IPv4 network address, then
substitute the actual IP address of this interface */

View File

@ -1,11 +1,10 @@
/*
* Copyright (c) 2005 Red Hat Inc
* Author: Patrick Caulfield (pcaulfie@redhat.com)
*
* Copyright (c) 2006 Sun Microsystems, Inc.
* Copyright (c) 2005-2007 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Patrick Caulfield (pcaulfie@redhat.com)
*
* This software licensed under BSD license, the text of which follows:
*
* Redistribution and use in source and binary forms, with or without
@ -38,8 +37,6 @@
#ifndef TOTEMIP_H_DEFINED
#define TOTEMIP_H_DEFINED
#include <assert.h>
#include <string.h>
#include <sys/socket.h>
#include <netinet/in.h>
@ -59,16 +56,9 @@ struct totem_ip_address
{
unsigned int nodeid;
unsigned short family;
unsigned char addr[TOTEMIP_ADDRLEN]; /* in Network Byteorder */
unsigned char addr[TOTEMIP_ADDRLEN];
} __attribute__((packed));
typedef struct totem_ip_address totem_ip_address_t;
static inline void swab_totem_ip_address_t (totem_ip_address_t *to_swab)
{
to_swab->nodeid = swab32 (to_swab->nodeid);
to_swab->family = swab16 (to_swab->family);
}
extern int totemip_equal(struct totem_ip_address *addr1, struct totem_ip_address *addr2);
extern int totemip_compare(const void *a, const void *b);
@ -93,13 +83,4 @@ static inline int totemip_zero_check(struct totem_ip_address *addr)
return (addr->family == 0);
}
static inline unsigned int totemip_compute_nodeid_from_addr(
const struct totem_ip_address *addr)
{
struct in_addr *in = (struct in_addr *)addr->addr;
assert(addr->family == AF_INET);
return (unsigned int)ntohl(in->s_addr);
}
#endif

View File

@ -54,7 +54,6 @@
#include <sys/time.h>
#include <sys/poll.h>
#include "swab.h"
#include "totem.h"
#include "totemsrp.h"
#include "aispoll.h"

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2005 MontaVista Software, Inc.
* Copyright (c) 2006 Red Hat, Inc.
* Copyright (c) 2006-2007 Red Hat, Inc.
* Copyright (c) 2006 Sun Microsystems, Inc.
*
* All rights reserved.
@ -60,7 +60,6 @@
#include "aispoll.h"
#include "totemnet.h"
#include "wthread.h"
#include "swab.h"
#include "../include/queue.h"
#include "../include/sq.h"
#include "../include/list.h"
@ -69,10 +68,6 @@
#include "crypto.h"
#ifdef OPENAIS_SOLARIS
#define MSG_NOSIGNAL 0
#endif
#define MCAST_SOCKET_BUFFER_SIZE (TRANSMITS_ALLOWED * FRAME_SIZE_MAX)
#define NETIF_STATE_REPORT_UP 1
@ -240,8 +235,7 @@ static int authenticate_and_decrypt (
struct iovec *iov)
{
unsigned char keys[48];
struct security_header *header =
(struct security_header *)iov[0].iov_base;
struct security_header *header = iov[0].iov_base;
prng_state keygen_prng_state;
prng_state stream_prng_state;
unsigned char *hmac_key = &keys[32];
@ -274,7 +268,7 @@ static int authenticate_and_decrypt (
hmac_init (&instance->totemnet_hmac_state, DIGEST_SHA1, hmac_key, 16);
hmac_process (&instance->totemnet_hmac_state,
(unsigned char *)iov->iov_base + HMAC_HASH_SIZE,
iov->iov_base + HMAC_HASH_SIZE,
iov->iov_len - HMAC_HASH_SIZE);
len = hash_descriptor[DIGEST_SHA1]->hashsize;
@ -289,10 +283,8 @@ static int authenticate_and_decrypt (
/*
* Decrypt the contents of the message with the cipher key
*/
assert(iov->iov_len >= sizeof (struct security_header));
sober128_read (
(unsigned char *)iov->iov_base + sizeof (struct security_header),
(unsigned long)iov->iov_len - sizeof (struct security_header),
sober128_read (iov->iov_base + sizeof (struct security_header),
iov->iov_len - sizeof (struct security_header),
&stream_prng_state);
return (0);
@ -398,7 +390,7 @@ static inline void ucast_sendmsg (
if (instance->totem_config->secauth == 1) {
iovec_encrypt[0].iov_base = (char *)sheader;
iovec_encrypt[0].iov_base = sheader;
iovec_encrypt[0].iov_len = sizeof (struct security_header);
memcpy (&iovec_encrypt[1], &iovec_in[0],
sizeof (struct iovec) * iov_len_in);
@ -414,7 +406,7 @@ static inline void ucast_sendmsg (
iov_len_in + 1,
&instance->totemnet_prng_state);
iovec_encrypt[0].iov_base = (char *)encrypt_data;
iovec_encrypt[0].iov_base = encrypt_data;
iovec_encrypt[0].iov_len = buf_len;
iovec_sendmsg = &iovec_encrypt[0];
iov_len = 1;
@ -432,14 +424,9 @@ static inline void ucast_sendmsg (
msg_ucast.msg_namelen = addrlen;
msg_ucast.msg_iov = iovec_sendmsg;
msg_ucast.msg_iovlen = iov_len;
#ifndef OPENAIS_SOLARIS
msg_ucast.msg_control = 0;
msg_ucast.msg_controllen = 0;
msg_ucast.msg_flags = 0;
#else
msg_ucast.msg_accrights = NULL;
msg_ucast.msg_accrightslen = 0;
#endif
/*
* Transmit multicast message
@ -467,7 +454,7 @@ static inline void mcast_sendmsg (
if (instance->totem_config->secauth == 1) {
iovec_encrypt[0].iov_base = (char *)sheader;
iovec_encrypt[0].iov_base = sheader;
iovec_encrypt[0].iov_len = sizeof (struct security_header);
memcpy (&iovec_encrypt[1], &iovec_in[0],
sizeof (struct iovec) * iov_len_in);
@ -483,7 +470,7 @@ static inline void mcast_sendmsg (
iov_len_in + 1,
&instance->totemnet_prng_state);
iovec_encrypt[0].iov_base = (char *)encrypt_data;
iovec_encrypt[0].iov_base = encrypt_data;
iovec_encrypt[0].iov_len = buf_len;
iovec_sendmsg = &iovec_encrypt[0];
iov_len = 1;
@ -501,14 +488,9 @@ static inline void mcast_sendmsg (
msg_mcast.msg_namelen = addrlen;
msg_mcast.msg_iov = iovec_sendmsg;
msg_mcast.msg_iovlen = iov_len;
#ifndef OPENAIS_SOLARIS
msg_mcast.msg_control = 0;
msg_mcast.msg_controllen = 0;
msg_mcast.msg_flags = 0;
#else
msg_mcast.msg_accrights = NULL;
msg_mcast.msg_accrightslen = 0;
#endif
/*
* Transmit multicast message
@ -550,7 +532,7 @@ static void totemnet_mcast_worker_fn (void *thread_state, void *work_item_in)
if (instance->totem_config->secauth == 1) {
memmove (&work_item->iovec[1], &work_item->iovec[0],
work_item->iov_len * sizeof (struct iovec));
work_item->iovec[0].iov_base = (char *)sheader;
work_item->iovec[0].iov_base = sheader;
work_item->iovec[0].iov_len = sizeof (struct security_header);
/*
@ -563,7 +545,7 @@ static void totemnet_mcast_worker_fn (void *thread_state, void *work_item_in)
&totemnet_mcast_thread_state->prng_state);
iovec_sendmsg = &iovec_encrypted;
iovec_sendmsg->iov_base = (char *)totemnet_mcast_thread_state->iobuf;
iovec_sendmsg->iov_base = totemnet_mcast_thread_state->iobuf;
iovec_sendmsg->iov_len = buf_len;
iovs = 1;
} else {
@ -578,14 +560,9 @@ static void totemnet_mcast_worker_fn (void *thread_state, void *work_item_in)
msg_mcast.msg_namelen = addrlen;
msg_mcast.msg_iov = iovec_sendmsg;
msg_mcast.msg_iovlen = iovs;
#ifndef OPENAIS_SOLARIS
msg_mcast.msg_control = 0;
msg_mcast.msg_controllen = 0;
msg_mcast.msg_flags = 0;
#else
msg_mcast.msg_accrights = NULL;
msg_mcast.msg_accrightslen = 0;
#endif
/*
* Transmit multicast message
@ -636,7 +613,7 @@ static int net_deliver_fn (
struct sockaddr_storage system_from;
int bytes_received;
int res = 0;
char *msg_offset;
unsigned char *msg_offset;
unsigned int size_delv;
if (instance->flushing == 1) {
@ -652,14 +629,9 @@ static int net_deliver_fn (
msg_recv.msg_namelen = sizeof (struct sockaddr_storage);
msg_recv.msg_iov = iovec;
msg_recv.msg_iovlen = 1;
#ifndef OPENAIS_SOLARIS
msg_recv.msg_control = 0;
msg_recv.msg_controllen = 0;
msg_recv.msg_flags = 0;
#else
msg_recv.msg_accrights = NULL;
msg_recv.msg_accrightslen = 0;
#endif
bytes_received = recvmsg (fd, &msg_recv, MSG_NOSIGNAL | MSG_DONTWAIT);
if (bytes_received == -1) {
@ -730,7 +702,7 @@ static int netif_determine (
* field is only 32 bits.
*/
if (bound_to->family == AF_INET && bound_to->nodeid == 0) {
bound_to->nodeid = totemip_compute_nodeid_from_addr(bound_to);
memcpy (&bound_to->nodeid, bound_to->addr, sizeof (int));
}
return (res);
@ -1071,7 +1043,6 @@ static int totemnet_build_sockets_ip (
break;
}
#ifndef OPENAIS_SOLARIS
/*
* Turn on multicast loopback
*/
@ -1079,18 +1050,17 @@ static int totemnet_build_sockets_ip (
flag = 1;
switch ( bindnet_address->family ) {
case AF_INET:
res = setsockopt (sockets->mcast_recv, IPPROTO_IP, IP_MULTICAST_LOOP,
res = setsockopt (sockets->mcast_send, IPPROTO_IP, IP_MULTICAST_LOOP,
&flag, sizeof (flag));
break;
case AF_INET6:
res = setsockopt (sockets->mcast_recv, IPPROTO_IPV6, IPV6_MULTICAST_LOOP,
res = setsockopt (sockets->mcast_send, IPPROTO_IPV6, IPV6_MULTICAST_LOOP,
&flag, sizeof (flag));
}
if (res == -1) {
perror ("turn off loopback");
return (-1);
}
#endif
/*
* Set multicast packets TTL
@ -1107,7 +1077,6 @@ static int totemnet_build_sockets_ip (
}
}
#ifndef OPENAIS_SOLARIS
/*
* Bind to a specific interface for multicast send and receive
*/
@ -1137,7 +1106,6 @@ static int totemnet_build_sockets_ip (
}
break;
}
#endif
return 0;
}

View File

@ -1,5 +1,6 @@
/*
* Copyright (c) 2005 MontaVista Software, Inc.
* Copyright (c) 2006-2007 Red Hat, Inc.
*
* All rights reserved.
*
@ -37,7 +38,6 @@
#include <sys/types.h>
#include <sys/socket.h>
#include "swab.h"
#include "totem.h"
#include "aispoll.h"

View File

@ -1,13 +1,13 @@
/*
* Copyright (c) 2003-2005 MontaVista Software, Inc.
* Copyright (c) 2005 OSDL.
* Copyright (c) 2006-2007 Red Hat, Inc.
* Copyright (c) 2006 Sun Microsystems, Inc.
* Copyright (c) 2006-2007 Red Hat, Inc.
*
* All rights reserved.
*
* Author: Steven Dake (sdake@redhat.com)
* Mark Haverkamp (markh@osdl.org)
* Author: Mark Haverkamp (markh@osdl.org)
*
* This software licensed under BSD license, the text of which follows:
*
@ -83,9 +83,6 @@
*
*/
#ifndef OPENAIS_BSD
#include <alloca.h>
#endif
#include <netinet/in.h>
#include <sys/uio.h>
#include <stdio.h>
@ -95,7 +92,6 @@
#include <pthread.h>
#include <errno.h>
#include "swab.h"
#include "../include/hdb.h"
#include "../include/list.h"
#include "totempg.h"
@ -182,7 +178,7 @@ DECLARE_LIST_INIT(assembly_list_free);
* fragment_contuation indicates whether the first packed message in
* the buffer is a continuation of a previously packed fragment.
*/
static char *fragmentation_data;
static unsigned char *fragmentation_data;
static int fragment_size = 0;
@ -341,25 +337,25 @@ static inline int group_matches (
char *group_name;
int i;
int j;
#ifdef __sparc
struct iovec iovec_aligned = { NULL, 0 };
#endif
struct iovec iovec_aligned = { NULL, 0 };
assert (iov_len == 1);
#ifdef __sparc
/*
* Align data structure for sparc and ia64
*/
if ((size_t)iovec->iov_base % 4 != 0) {
iovec_aligned.iov_base = alloca(iovec->iov_len);
memcpy(iovec_aligned.iov_base, iovec->iov_base, iovec->iov_len);
iovec_aligned.iov_len = iovec->iov_len;
memcpy(iovec_aligned.iov_base, iovec->iov_base, iovec->iov_len); iovec_aligned.iov_len = iovec->iov_len;
iovec = &iovec_aligned;
}
#endif
group_len = (unsigned short *)iovec->iov_base;
group_name = ((char *)iovec->iov_base) +
sizeof (unsigned short) * (group_len[0] + 1);
/*
* Calculate amount to adjust the iovec by before delivering to app
*/
@ -393,53 +389,46 @@ static inline void app_deliver_fn (
int i;
struct totempg_group_instance *instance;
struct iovec stripped_iovec;
#ifdef __sparc
struct iovec aligned_iovec = { NULL, 0 };
#endif
unsigned int adjust_iovec;
unsigned int res;
struct iovec aligned_iovec = { NULL, 0 };
if (endian_conversion_required) {
#ifdef __sparc
if ((size_t)iovec->iov_base % 4 != 0) {
/* Deal with misalignment */
aligned_iovec.iov_base = alloca(iovec->iov_len);
aligned_iovec.iov_len = iovec->iov_len;
memcpy(aligned_iovec.iov_base, iovec->iov_base,
iovec->iov_len);
iovec = &aligned_iovec;
}
#endif
group_endian_convert (iovec);
}
/*
* Align data structure for sparc and ia64
*/
aligned_iovec.iov_base = alloca(iovec->iov_len);
aligned_iovec.iov_len = iovec->iov_len;
memcpy(aligned_iovec.iov_base, iovec->iov_base, iovec->iov_len);
iovec = &aligned_iovec;
for (i = 0; i <= totempg_max_handle; i++) {
res = hdb_handle_get (&totempg_groups_instance_database,
i, (void *)&instance);
if (res == 0) {
assert (iov_len == 1);
if (group_matches (iovec, iov_len, instance->groups,
instance->groups_cnt, &adjust_iovec)) {
stripped_iovec.iov_len =
iovec->iov_len - adjust_iovec;
#ifndef __sparc
stripped_iovec.iov_base =
(char *)iovec->iov_base + adjust_iovec;
#else
if (group_matches (iovec, iov_len, instance->groups, instance->groups_cnt, &adjust_iovec)) {
stripped_iovec.iov_len = iovec->iov_len - adjust_iovec;
// stripped_iovec.iov_base = (char *)iovec->iov_base + adjust_iovec;
/*
* Align data structure for sparc and ia64
*/
if (iovec->iov_base + adjust_iovec % 4 != 0) {
/* Deal with misalignment */
/*
* XXX Using alloca() is dangerous,
* since it may be called multiple
* times within the for() loop
* Deal with misalignment
*/
stripped_iovec.iov_base = alloca(
stripped_iovec.iov_len);
memcpy(stripped_iovec.iov_base,
iovec->iov_base + adjust_iovec,
stripped_iovec.iov_base =
alloca (stripped_iovec.iov_len);
memcpy (stripped_iovec.iov_base,
iovec->iov_base + adjust_iovec,
stripped_iovec.iov_len);
}
#endif
instance->deliver_fn (
nodeid,
&stripped_iovec,
@ -451,6 +440,7 @@ static inline void app_deliver_fn (
}
}
}
static void totempg_confchg_fn (
enum totem_configuration_type configuration_type,
unsigned int *member_list, int member_list_entries,
@ -506,9 +496,7 @@ static void totempg_deliver_fn (
msg_count = mcast->msg_count;
datasize = sizeof (struct totempg_mcast) +
msg_count * sizeof (unsigned short);
assert (iovec[0].iov_len >= datasize);
memcpy (header, iovec[0].iov_base, datasize);
assert(iovec);
data = iovec[0].iov_base;
@ -525,7 +513,7 @@ static void totempg_deliver_fn (
} else {
/*
* The message originated from local processor
* because there is greater than one iovec for then full msg.
* becasue there is greater than one iovec for then full msg.
*/
h_index = 0;
for (i = 0; i < 2; i++) {
@ -555,7 +543,7 @@ static void totempg_deliver_fn (
*/
msg_count = mcast->fragmented ? mcast->msg_count - 1 : mcast->msg_count;
continuation = mcast->continuation;
iov_delv.iov_base = (char *)&assembly->data[0];
iov_delv.iov_base = &assembly->data[0];
iov_delv.iov_len = assembly->index + msg_lens[0];
/*
@ -592,7 +580,7 @@ static void totempg_deliver_fn (
*/
if (!continuation) {
assembly->index += msg_lens[0];
iov_delv.iov_base = (char *)&assembly->data[assembly->index];
iov_delv.iov_base = &assembly->data[assembly->index];
iov_delv.iov_len = msg_lens[1];
start = 1;
}
@ -603,7 +591,7 @@ static void totempg_deliver_fn (
app_deliver_fn(nodeid, &iov_delv, 1,
endian_conversion_required);
assembly->index += msg_lens[i];
iov_delv.iov_base = (char *)&assembly->data[assembly->index];
iov_delv.iov_base = &assembly->data[assembly->index];
if (i < (msg_count - 1)) {
iov_delv.iov_len = msg_lens[i + 1];
}
@ -666,9 +654,9 @@ int callback_token_received_fn (enum totem_callback_token_type type,
mcast.msg_count = mcast_packed_msg_count;
iovecs[0].iov_base = (char *)&mcast;
iovecs[0].iov_base = &mcast;
iovecs[0].iov_len = sizeof (struct totempg_mcast);
iovecs[1].iov_base = (char *)mcast_packed_msg_lens;
iovecs[1].iov_base = mcast_packed_msg_lens;
iovecs[1].iov_len = mcast_packed_msg_count * sizeof (unsigned short);
iovecs[2].iov_base = &fragmentation_data[0];
iovecs[2].iov_len = fragment_size;
@ -794,7 +782,7 @@ static int mcast_msg (
* If it just fits or is too big, then send out what fits.
*/
} else {
char *data_ptr;
unsigned char *data_ptr;
copy_len = min(copy_len, max_packet_size - fragment_size);
if( copy_len == max_packet_size )
@ -831,9 +819,9 @@ static int mcast_msg (
* assemble the message and send it
*/
mcast.msg_count = ++mcast_packed_msg_count;
iovecs[0].iov_base = (char *)&mcast;
iovecs[0].iov_base = &mcast;
iovecs[0].iov_len = sizeof(struct totempg_mcast);
iovecs[1].iov_base = (char *)mcast_packed_msg_lens;
iovecs[1].iov_base = mcast_packed_msg_lens;
iovecs[1].iov_len = mcast_packed_msg_count *
sizeof(unsigned short);
iovecs[2].iov_base = data_ptr;
@ -882,13 +870,14 @@ static int mcast_msg (
/*
* Determine if a message of msg_size could be queued
*/
#define FUZZY_AVAIL_SUBTRACT 5
static int send_ok (
int msg_size)
{
int avail = 0;
int total;
avail = totemmrp_avail ();
avail = totemmrp_avail () - FUZZY_AVAIL_SUBTRACT;
/*
* msg size less then totempg_totem_config->net_mtu - 25 will take up
@ -1000,7 +989,7 @@ int totempg_groups_join (
new_groups = realloc (instance->groups,
sizeof (struct totempg_group) *
(instance->groups_cnt + group_cnt));
if (new_groups == NULL) {
if (new_groups == 0) {
res = ENOMEM;
goto error_exit;
}
@ -1070,7 +1059,7 @@ int totempg_groups_mcast_joined (
iovec_mcast[i + 1].iov_base = instance->groups[i].group;
}
iovec_mcast[0].iov_len = (instance->groups_cnt + 1) * sizeof (unsigned short);
iovec_mcast[0].iov_base = (char *)group_len;
iovec_mcast[0].iov_base = group_len;
for (i = 0; i < iov_len; i++) {
iovec_mcast[i + instance->groups_cnt + 1].iov_len = iovec[i].iov_len;
iovec_mcast[i + instance->groups_cnt + 1].iov_base = iovec[i].iov_base;
@ -1150,7 +1139,7 @@ int totempg_groups_mcast_groups (
iovec_mcast[i + 1].iov_base = groups[i].group;
}
iovec_mcast[0].iov_len = (groups_cnt + 1) * sizeof (unsigned short);
iovec_mcast[0].iov_base = (char *)group_len;
iovec_mcast[0].iov_base = group_len;
for (i = 0; i < iov_len; i++) {
iovec_mcast[i + groups_cnt + 1].iov_len = iovec[i].iov_len;
iovec_mcast[i + groups_cnt + 1].iov_base = iovec[i].iov_base;

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2005 MontaVista Software, Inc.
* Copyright (c) 2006 Red Hat, Inc.
* Copyright (c) 2006-2007 Red Hat, Inc.
* Copyright (c) 2006 Sun Microsystems, Inc.
*
* All rights reserved.

View File

@ -1,5 +1,6 @@
/*
* Copyright (c) 2005 MontaVista Software, Inc.
* Copyright (c) 2006-2007 Red Hat, Inc.
*
* All rights reserved.
*

View File

@ -48,10 +48,6 @@
* usage on 1.6ghz xeon from 35% to less then .1 % as measured by top
*/
#ifndef OPENAIS_BSD
#include <alloca.h>
#endif
#include <assert.h>
#include <sys/mman.h>
#include <sys/types.h>
@ -74,7 +70,6 @@
#include <sys/time.h>
#include <sys/poll.h>
#include "swab.h"
#include "aispoll.h"
#include "totemsrp.h"
#include "totemrrp.h"
@ -500,8 +495,6 @@ struct totemsrp_instance {
unsigned int my_pbl;
unsigned int my_cbl;
unsigned int operational_entered_once;
};
struct message_handlers {
@ -685,6 +678,7 @@ int totemsrp_initialize (
{
struct totemsrp_instance *instance;
unsigned int res;
res = hdb_handle_create (&totemsrp_instance_database,
sizeof (struct totemsrp_instance), handle);
if (res != 0) {
@ -885,10 +879,25 @@ int totemsrp_ifaces_get (
memcpy (interfaces, &instance->my_memb_list[i],
sizeof (struct srp_addr));
*iface_count = instance->totem_config->interface_count;
goto finish;
}
for (i = 0; i < instance->my_left_memb_entries; i++) {
if (instance->my_left_memb_list[i].addr[0].nodeid == nodeid) {
found = 1;
break;
}
}
if (found) {
memcpy (interfaces, &instance->my_left_memb_list[i],
sizeof (struct srp_addr));
*iface_count = instance->totem_config->interface_count;
} else {
res = -1;
}
finish:
totemrrp_ifaces_get (instance->totemrrp_handle, status, NULL);
hdb_handle_put (&totemsrp_instance_database, handle);
@ -952,6 +961,7 @@ error_exit:
return (res);
}
/*
* Set operations for use by the membership algorithm
*/
@ -1510,12 +1520,12 @@ static void deliver_messages_from_recovery_to_regular (struct totemsrp_instance
* Convert recovery message into regular message
*/
if (recovery_message_item->iov_len > 1) {
mcast = (struct mcast *)recovery_message_item->iovec[1].iov_base;
mcast = recovery_message_item->iovec[1].iov_base;
memcpy (&regular_message_item.iovec[0],
&recovery_message_item->iovec[1],
sizeof (struct iovec) * recovery_message_item->iov_len);
} else {
mcast = (struct mcast *)recovery_message_item->iovec[0].iov_base;
mcast = recovery_message_item->iovec[0].iov_base;
if (mcast->header.encapsulated == 1) {
/*
* Message is a recovery message encapsulated
@ -1526,7 +1536,7 @@ static void deliver_messages_from_recovery_to_regular (struct totemsrp_instance
regular_message_item.iovec[0].iov_len =
recovery_message_item->iovec[0].iov_len - sizeof (struct mcast);
regular_message_item.iov_len = 1;
mcast = (struct mcast *)regular_message_item.iovec[0].iov_base;
mcast = regular_message_item.iovec[0].iov_base;
} else {
continue; /* TODO this case shouldn't happen */
/*
@ -1670,8 +1680,6 @@ static void memb_state_operational_enter (struct totemsrp_instance *instance)
instance->my_received_flg = 0;
instance->operational_entered_once = 1;
return;
}
@ -1808,15 +1816,10 @@ static void memb_state_recovery_enter (
/*
* Build regular configuration
*/
instance->my_new_memb_entries = commit_token->addr_entries;
totemrrp_processor_count_set (
instance->totemrrp_handle,
commit_token->addr_entries);
memcpy (instance->my_new_memb_list, addr,
sizeof (struct srp_addr) * instance->my_new_memb_entries);
/*
* Build transitional configuration
*/
@ -1838,7 +1841,7 @@ static void memb_state_recovery_enter (
memb_list[i].high_delivered,
memb_list[i].received_flg);
assert (totemip_print (&memb_list[i].ring_id.rep) != 0);
// assert (totemip_print (&memb_list[i].ring_id.rep) != 0);
}
/*
* Determine if any received flag is false
@ -2279,15 +2282,15 @@ static int orf_token_mcast (
* Build IO vector
*/
memset (&sort_queue_item, 0, sizeof (struct sort_queue_item));
sort_queue_item.iovec[0].iov_base = (char *)message_item->mcast;
sort_queue_item.iovec[0].iov_base = message_item->mcast;
sort_queue_item.iovec[0].iov_len = sizeof (struct mcast);
mcast = (struct mcast *)sort_queue_item.iovec[0].iov_base;
mcast = sort_queue_item.iovec[0].iov_base;
memcpy (&sort_queue_item.iovec[1], message_item->iovec,
message_item->iov_len * sizeof (struct iovec));
memb_ring_id_copy (&mcast->ring_id, &instance->my_ring_id);
memcpy (&mcast->ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id));
sort_queue_item.iov_len = message_item->iov_len + 1;
@ -2435,8 +2438,8 @@ static int orf_token_rtr (
/*
* Missing message not found in current retransmit list so add it
*/
memb_ring_id_copy (&rtr_list[orf_token->rtr_list_entries].ring_id,
&instance->my_ring_id);
memcpy (&rtr_list[orf_token->rtr_list_entries].ring_id,
&instance->my_ring_id, sizeof (struct memb_ring_id));
rtr_list[orf_token->rtr_list_entries].seq = instance->my_aru + i;
orf_token->rtr_list_entries++;
}
@ -2606,7 +2609,7 @@ static int orf_token_send_initial (struct totemsrp_instance *instance)
orf_token.aru = SEQNO_START_MSG - 1;
orf_token.aru_addr = instance->my_id.addr[0].nodeid;
memb_ring_id_copy (&orf_token.ring_id, &instance->my_ring_id);
memcpy (&orf_token.ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id));
orf_token.fcc = 0;
orf_token.backlog = 0;
@ -2627,8 +2630,13 @@ static void memb_state_commit_token_update (
addr = (struct srp_addr *)commit_token->end_of_commit_token;
memb_list = (struct memb_commit_token_memb_entry *)(addr + commit_token->addr_entries);
memb_ring_id_copy (&memb_list[commit_token->memb_index].ring_id,
&instance->my_old_ring_id);
memcpy (instance->my_new_memb_list, addr,
sizeof (struct srp_addr) * commit_token->addr_entries);
instance->my_new_memb_entries = commit_token->addr_entries;
memcpy (&memb_list[commit_token->memb_index].ring_id,
&instance->my_old_ring_id, sizeof (struct memb_ring_id));
assert (!totemip_zero_check(&instance->my_old_ring_id.rep));
memb_list[commit_token->memb_index].aru = instance->old_ring_state_aru;
@ -2757,17 +2765,7 @@ static void memb_state_commit_token_create (
totemip_copy(&commit_token->ring_id.rep, &instance->my_id.addr[0]);
/*
* The first time operational is entered, don't increment the ring
* sequence number (just reload it from stable storage). This prevents
* an error condition where if the executive is stopped and started
* before a new ring is formed, the protocol will get stuck in recovery.
*/
if (instance->operational_entered_once) {
commit_token->ring_id.seq = instance->token_ring_id_seq + 4;
} else {
commit_token->ring_id.seq = instance->token_ring_id_seq;
}
commit_token->ring_id.seq = instance->token_ring_id_seq + 4;
/*
* This qsort is necessary to ensure the commit token traverses
@ -2877,7 +2875,6 @@ static void memb_ring_id_create_or_load (
}
res = write (fd, &memb_ring_id->seq, sizeof (unsigned long long));
assert (res == sizeof (unsigned long long));
fsync (fd);
close (fd);
} else {
log_printf (instance->totemsrp_log_level_warning,
@ -2918,7 +2915,6 @@ static void memb_ring_id_set_and_store (
//assert (fd > 0);
res = write (fd, &instance->my_ring_id.seq, sizeof (unsigned long long));
assert (res == sizeof (unsigned long long));
fsync (fd);
close (fd);
}
@ -3850,9 +3846,8 @@ static void memb_commit_token_endian_convert (struct memb_commit_token *in, stru
* Only convert the memb entry if it has been set
*/
if (in_memb_list[i].ring_id.rep.family != 0) {
memb_ring_id_copy_endian_convert (
&out_memb_list[i].ring_id,
&in_memb_list[i].ring_id);
totemip_copy_endian_convert (&out_memb_list[i].ring_id.rep,
&in_memb_list[i].ring_id.rep);
out_memb_list[i].ring_id.seq =
swab64 (in_memb_list[i].ring_id.seq);
@ -3873,15 +3868,16 @@ static void orf_token_endian_convert (struct orf_token *in, struct orf_token *ou
out->seq = swab32 (in->seq);
out->token_seq = swab32 (in->token_seq);
out->aru = swab32 (in->aru);
memb_ring_id_copy_endian_convert (&out->ring_id, &in->ring_id);
totemip_copy_endian_convert(&out->ring_id.rep, &in->ring_id.rep);
out->aru_addr = swab32(in->aru_addr);
out->ring_id.seq = swab64 (in->ring_id.seq);
out->fcc = swab32 (in->fcc);
out->backlog = swab32 (in->backlog);
out->retrans_flg = swab32 (in->retrans_flg);
out->rtr_list_entries = swab32 (in->rtr_list_entries);
for (i = 0; i < out->rtr_list_entries; i++) {
memb_ring_id_copy_endian_convert (&out->rtr_list[i].ring_id,
&in->rtr_list[i].ring_id);
totemip_copy_endian_convert(&out->rtr_list[i].ring_id.rep, &in->rtr_list[i].ring_id.rep);
out->rtr_list[i].ring_id.seq = swab64 (in->rtr_list[i].ring_id.seq);
out->rtr_list[i].seq = swab32 (in->rtr_list[i].seq);
}
}
@ -3895,7 +3891,8 @@ static void mcast_endian_convert (struct mcast *in, struct mcast *out)
out->seq = swab32 (in->seq);
out->this_seqno = swab32 (in->this_seqno);
memb_ring_id_copy_endian_convert (&out->ring_id, &in->ring_id);
totemip_copy_endian_convert(&out->ring_id.rep, &in->ring_id.rep);
out->ring_id.seq = swab64 (in->ring_id.seq);
out->node_id = swab32 (in->node_id);
out->guarantee = swab32 (in->guarantee);
srp_addr_copy_endian_convert (&out->system_from, &in->system_from);
@ -3908,7 +3905,8 @@ static void memb_merge_detect_endian_convert (
out->header.type = in->header.type;
out->header.endian_detector = ENDIAN_LOCAL;
out->header.nodeid = swab32 (in->header.nodeid);
memb_ring_id_copy_endian_convert(&out->ring_id, &in->ring_id);
totemip_copy_endian_convert(&out->ring_id.rep, &in->ring_id.rep);
out->ring_id.seq = swab64 (in->ring_id.seq);
srp_addr_copy_endian_convert (&out->system_from, &in->system_from);
}