/* * Copyright (c) 2003-2005 MontaVista Software, Inc. * Copyright (c) 2005 OSDL. * * All rights reserved. * * Author: Steven Dake (sdake@mvista.com) * Mark Haverkamp (markh@osdl.org) * * This software licensed under BSD license, the text of which follows: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the MontaVista Software, Inc. nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /* * FRAGMENTATION AND PACKING ALGORITHM: * * Assemble the entire message into one buffer * if full fragment * store fragment into lengths list * for each full fragment * multicast fragment * set length and fragment fields of pg mesage * store remaining multicast into head of fragmentation data and set lens field * * If a message exceeds the maximum packet size allowed by the totem * single ring protocol, the protocol could lose forward progress. * Statically calculating the allowed data amount doesn't work because * the amount of data allowed depends on the number of fragments in * each message. In this implementation, the maximum fragment size * is dynamically calculated for each fragment added to the message. * It is possible for a message to be two bytes short of the maximum * packet size. This occurs when a message or collection of * messages + the mcast header + the lens are two bytes short of the * end of the packet. Since another len field consumes two bytes, the * len field would consume the rest of the packet without room for data. * * One optimization would be to forgo the final len field and determine * it from the size of the udp datagram. Then this condition would no * longer occur. */ /* * ASSEMBLY AND UNPACKING ALGORITHM: * * copy incoming packet into assembly data buffer indexed by current * location of end of fragment * * if not fragmented * deliver all messages in assembly data buffer * else * if msg_count > 1 and fragmented * deliver all messages except last message in assembly data buffer * copy last fragmented section to start of assembly data buffer * else * if msg_count = 1 and fragmented * do nothing * */ #include #include #include #include #include #include #include #include "../include/hdb.h" #include "totempg.h" #include "totemmrp.h" #include "totemsrp.h" #include "swab.h" #define min(a,b) ((a) < (b)) ? a : b struct totempg_mcast_header { short version; short type; }; /* * totempg_mcast structure * * header: Identify the mcast. * fragmented: Set if this message continues into next message * continuation: Set if this message is a continuation from last message * msg_count Indicates how many packed messages are contained * in the mcast. * Also, the size of each packed message and the messages themselves are * appended to the end of this structure when sent. */ struct totempg_mcast { struct totempg_mcast_header header; unsigned char fragmented; unsigned char continuation; unsigned short msg_count; /* * short msg_len[msg_count]; */ /* * data for messages */ }; /* * Maximum packet size for totem pg messages */ #define TOTEMPG_PACKET_SIZE (totempg_totem_config->net_mtu - \ sizeof (struct totempg_mcast)) /* * Local variables used for packing small messages */ static unsigned short mcast_packed_msg_lens[FRAME_SIZE_MAX]; static int mcast_packed_msg_count = 0; /* * Function and data used to log messages */ static int totempg_log_level_security; static int totempg_log_level_error; static int totempg_log_level_warning; static int totempg_log_level_notice; static int totempg_log_level_debug; static void (*totempg_log_printf) (char *file, int line, int level, char *format, ...) = NULL; struct totem_config *totempg_totem_config; struct assembly { unsigned int nodeid; unsigned char data[MESSAGE_SIZE_MAX]; int index; unsigned char last_frag_num; }; struct assembly *assembly_list[PROCESSOR_COUNT_MAX]; int assembly_list_entries = 0; /* * Staging buffer for packed messages. Messages are staged in this buffer * before sending. Multiple messages may fit which cuts down on the * number of mcasts sent. If a message doesn't completely fit, then * the mcast header has a fragment bit set that says that there are more * data to follow. fragment_size is an index into the buffer. It indicates * the size of message data and where to place new message data. * fragment_contuation indicates whether the first packed message in * the buffer is a continuation of a previously packed fragment. */ static unsigned char *fragmentation_data; static int fragment_size = 0; static int fragment_continuation = 0; static struct iovec iov_delv; static unsigned int totempg_max_handle = 0; struct totempg_group_instance { void (*deliver_fn) ( unsigned int nodeid, struct iovec *iovec, int iov_len, int endian_conversion_required); void (*confchg_fn) ( enum totem_configuration_type configuration_type, unsigned int *member_list, int member_list_entries, unsigned int *left_list, int left_list_entries, unsigned int *joined_list, int joined_list_entries, struct memb_ring_id *ring_id); struct totempg_group *groups; int groups_cnt; }; static struct hdb_handle_database totempg_groups_instance_database = { .handle_count = 0, .handles = 0, .iterator = 0, .mutex = PTHREAD_MUTEX_INITIALIZER }; static int send_ok (int msg_size); static unsigned char next_fragment = 1; static pthread_mutex_t totempg_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t callback_token_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t mcast_msg_mutex = PTHREAD_MUTEX_INITIALIZER; #define log_printf(level, format, args...) \ totempg_log_printf (__FILE__, __LINE__, level, format, ##args) static struct assembly *find_assembly (unsigned int nodeid) { int i; for (i = 0; i < assembly_list_entries; i++) { if (nodeid == assembly_list[i]->nodeid) { return (assembly_list[i]); } } return (0); } static inline void app_confchg_fn ( enum totem_configuration_type configuration_type, unsigned int *member_list, int member_list_entries, unsigned int *left_list, int left_list_entries, unsigned int *joined_list, int joined_list_entries, struct memb_ring_id *ring_id) { int i; struct totempg_group_instance *instance; unsigned int res; for (i = 0; i <= totempg_max_handle; i++) { res = hdb_handle_get (&totempg_groups_instance_database, i, (void *)&instance); if (res == 0) { if (instance->confchg_fn) { instance->confchg_fn ( configuration_type, member_list, member_list_entries, left_list, left_list_entries, joined_list, joined_list_entries, ring_id); } hdb_handle_put (&totempg_groups_instance_database, i); } } } static inline void group_endian_convert ( struct iovec *iovec) { unsigned short *group_len; int i; group_len = (unsigned short *)iovec->iov_base; group_len[0] = swab16(group_len[0]); for (i = 1; i < group_len[0] + 1; i++) { group_len[i] = swab16(group_len[i]); } } static inline int group_matches ( struct iovec *iovec, unsigned int iov_len, struct totempg_group *groups_b, unsigned int group_b_cnt, unsigned int *adjust_iovec) { unsigned short *group_len; char *group_name; int i; int j; assert (iov_len == 1); group_len = (unsigned short *)iovec->iov_base; group_name = ((char *)iovec->iov_base) + sizeof (unsigned short) * (group_len[0] + 1); /* * Calculate amount to adjust the iovec by before delivering to app */ *adjust_iovec = sizeof (unsigned short) * (group_len[0] + 1); for (i = 1; i < group_len[0] + 1; i++) { *adjust_iovec += group_len[i]; } /* * Determine if this message should be delivered to this instance */ for (i = 1; i < group_len[0] + 1; i++) { for (j = 0; j < group_b_cnt; j++) { if ((group_len[i] == groups_b[j].group_len) && (memcmp (groups_b[j].group, group_name, group_len[i]) == 0)) { return (1); } } group_name += group_len[i]; } return (0); } static inline void app_deliver_fn ( unsigned int nodeid, struct iovec *iovec, unsigned int iov_len, int endian_conversion_required) { int i; struct totempg_group_instance *instance; struct iovec stripped_iovec; unsigned int adjust_iovec; unsigned int res; if (endian_conversion_required) { group_endian_convert (iovec); } for (i = 0; i <= totempg_max_handle; i++) { res = hdb_handle_get (&totempg_groups_instance_database, i, (void *)&instance); if (res == 0) { assert (iov_len == 1); if (group_matches (iovec, iov_len, instance->groups, instance->groups_cnt, &adjust_iovec)) { stripped_iovec.iov_len = iovec->iov_len - adjust_iovec; stripped_iovec.iov_base = (char *)iovec->iov_base + adjust_iovec; instance->deliver_fn ( nodeid, &stripped_iovec, iov_len, endian_conversion_required); } hdb_handle_put (&totempg_groups_instance_database, i); } } } static void totempg_confchg_fn ( enum totem_configuration_type configuration_type, unsigned int *member_list, int member_list_entries, unsigned int *left_list, int left_list_entries, unsigned int *joined_list, int joined_list_entries, struct memb_ring_id *ring_id) { int i; int j; int found; /* * Clean out the assembly area for nodes that have left the * membership. If they return, we don't want any stale message * data that may be there. */ for (i = 0; i < left_list_entries; i++) { for (j = 0; j < assembly_list_entries; j++) { if (left_list[i] == assembly_list[j]->nodeid) { assembly_list[j]->index = 0; } } } /* * Create a message assembly area for any new members. */ for (i = 0; i < member_list_entries; i++) { found = 0; for (j = 0; j < assembly_list_entries; j++) { if (member_list[i] == assembly_list[j]->nodeid) { found = 1; break; } } if (found == 0) { assembly_list[assembly_list_entries] = malloc (sizeof (struct assembly)); assert (assembly_list[assembly_list_entries]); // TODO assembly_list[assembly_list_entries]->nodeid = member_list[i]; assembly_list[assembly_list_entries]->index = 0; assembly_list_entries += 1; } } app_confchg_fn (configuration_type, member_list, member_list_entries, left_list, left_list_entries, joined_list, joined_list_entries, ring_id); } static void totempg_deliver_fn ( unsigned int nodeid, struct iovec *iovec, int iov_len, int endian_conversion_required) { struct totempg_mcast *mcast; unsigned short *msg_lens; int i; struct assembly *assembly; char header[FRAME_SIZE_MAX]; int h_index; int a_i = 0; int msg_count; int continuation; int start; assembly = find_assembly (nodeid); assert (assembly); /* * Assemble the header into one block of data and * assemble the packet contents into one block of data to simplify delivery */ if (iov_len == 1) { /* * This message originated from external processor * because there is only one iovec for the full msg. */ char *data; int datasize; mcast = (struct totempg_mcast *)iovec[0].iov_base; if (endian_conversion_required) { mcast->msg_count = swab16 (mcast->msg_count); } msg_count = mcast->msg_count; datasize = sizeof (struct totempg_mcast) + msg_count * sizeof (unsigned short); memcpy (header, iovec[0].iov_base, datasize); assert(iovec); data = iovec[0].iov_base; msg_lens = (unsigned short *) (header + sizeof (struct totempg_mcast)); if (endian_conversion_required) { for (i = 0; i < mcast->msg_count; i++) { msg_lens[i] = swab16 (msg_lens[i]); } } memcpy (&assembly->data[assembly->index], &data[datasize], iovec[0].iov_len - datasize); } else { /* * The message originated from local processor * becasue there is greater than one iovec for then full msg. */ h_index = 0; for (i = 0; i < 2; i++) { memcpy (&header[h_index], iovec[i].iov_base, iovec[i].iov_len); h_index += iovec[i].iov_len; } mcast = (struct totempg_mcast *)header; // TODO make sure we are using a copy of mcast not the actual data itself msg_lens = (unsigned short *) (header + sizeof (struct totempg_mcast)); for (i = 2; i < iov_len; i++) { a_i = assembly->index; assert (iovec[i].iov_len + a_i <= MESSAGE_SIZE_MAX); memcpy (&assembly->data[a_i], iovec[i].iov_base, iovec[i].iov_len); a_i += msg_lens[i - 2]; } iov_len -= 2; } /* * If the last message in the buffer is a fragment, then we * can't deliver it. We'll first deliver the full messages * then adjust the assembly buffer so we can add the rest of the * fragment when it arrives. */ msg_count = mcast->fragmented ? mcast->msg_count - 1 : mcast->msg_count; continuation = mcast->continuation; iov_delv.iov_base = &assembly->data[0]; iov_delv.iov_len = assembly->index + msg_lens[0]; /* * Make sure that if this message is a continuation, that it * matches the sequence number of the previous fragment. * Also, if the first packed message is a continuation * of a previous message, but the assembly buffer * is empty, then we need to discard it since we can't * assemble a complete message. Likewise, if this message isn't a * continuation and the assembly buffer is empty, we have to discard * the continued message. */ start = 0; if (continuation) { if (continuation != assembly->last_frag_num) { log_printf (totempg_log_level_error, "Message continuation doesn't match previous frag e: %u - a: %u\n", assembly->last_frag_num, continuation); continuation = 0; } if ((assembly->index == 0) || (!continuation && assembly->index)) { log_printf (totempg_log_level_error, "Throwing away broken message: continuation %u, index %u\n", continuation, assembly->index); continuation = 0; } /* * we decided to throw away the first continued message * in this buffer, if continuation was set to zero. */ if (!continuation) { assembly->index += msg_lens[0]; iov_delv.iov_base = &assembly->data[assembly->index]; iov_delv.iov_len = msg_lens[1]; start = 1; } } for (i = start; i < msg_count; i++) { app_deliver_fn(nodeid, &iov_delv, 1, endian_conversion_required); assembly->index += msg_lens[i]; iov_delv.iov_base = &assembly->data[assembly->index]; if (i < (msg_count - 1)) { iov_delv.iov_len = msg_lens[i + 1]; } } if (mcast->fragmented) { assembly->last_frag_num = mcast->fragmented; if (mcast->msg_count > 1) { memmove (&assembly->data[0], &assembly->data[assembly->index], msg_lens[msg_count]); assembly->index = 0; } assembly->index += msg_lens[msg_count]; } else { assembly->last_frag_num = 0; assembly->index = 0; } } /* * Totem Process Group Abstraction * depends on poll abstraction, POSIX, IPV4 */ void *callback_token_received_handle; int callback_token_received_fn (enum totem_callback_token_type type, void *data) { struct totempg_mcast mcast; struct iovec iovecs[3]; int res; pthread_mutex_lock (&mcast_msg_mutex); if (mcast_packed_msg_count == 0) { pthread_mutex_unlock (&mcast_msg_mutex); return (0); } if (totemmrp_avail() == 0) { pthread_mutex_unlock (&mcast_msg_mutex); return (0); } mcast.fragmented = 0; /* * Was the first message in this buffer a continuation of a * fragmented message? */ mcast.continuation = fragment_continuation; fragment_continuation = 0; mcast.msg_count = mcast_packed_msg_count; iovecs[0].iov_base = &mcast; iovecs[0].iov_len = sizeof (struct totempg_mcast); iovecs[1].iov_base = mcast_packed_msg_lens; iovecs[1].iov_len = mcast_packed_msg_count * sizeof (unsigned short); iovecs[2].iov_base = &fragmentation_data[0]; iovecs[2].iov_len = fragment_size; res = totemmrp_mcast (iovecs, 3, 0); mcast_packed_msg_count = 0; fragment_size = 0; pthread_mutex_unlock (&mcast_msg_mutex); return (0); } /* * Initialize the totem process group abstraction */ int totempg_initialize ( poll_handle poll_handle, struct totem_config *totem_config) { int res; totempg_totem_config = totem_config; totempg_log_level_security = totem_config->totem_logging_configuration.log_level_security; totempg_log_level_error = totem_config->totem_logging_configuration.log_level_error; totempg_log_level_warning = totem_config->totem_logging_configuration.log_level_warning; totempg_log_level_notice = totem_config->totem_logging_configuration.log_level_notice; totempg_log_level_debug = totem_config->totem_logging_configuration.log_level_debug; totempg_log_printf = totem_config->totem_logging_configuration.log_printf; fragmentation_data = malloc (TOTEMPG_PACKET_SIZE); if (fragmentation_data == 0) { return (-1); } res = totemmrp_initialize ( poll_handle, totem_config, totempg_deliver_fn, totempg_confchg_fn); totemmrp_callback_token_create ( &callback_token_received_handle, TOTEM_CALLBACK_TOKEN_RECEIVED, 0, callback_token_received_fn, 0); totemsrp_net_mtu_adjust (totem_config); return (res); } void totempg_finalize (void) { pthread_mutex_lock (&totempg_mutex); totemmrp_finalize (); pthread_mutex_unlock (&totempg_mutex); } /* * Multicast a message */ static int mcast_msg ( struct iovec *iovec, int iov_len, int guarantee) { int res = 0; struct totempg_mcast mcast; struct iovec iovecs[3]; int i; int max_packet_size = 0; int copy_len = 0; int copy_base = 0; int total_size = 0; pthread_mutex_lock (&mcast_msg_mutex); totemmrp_new_msg_signal (); max_packet_size = TOTEMPG_PACKET_SIZE - (sizeof (unsigned short) * (mcast_packed_msg_count + 1)); mcast_packed_msg_lens[mcast_packed_msg_count] = 0; /* * Check if we would overwrite new message queue */ for (i = 0; i < iov_len; i++) { total_size += iovec[i].iov_len; } if (send_ok (total_size + sizeof(unsigned short) * (mcast_packed_msg_count+1)) == 0) { pthread_mutex_unlock (&mcast_msg_mutex); return(-1); } for (i = 0; i < iov_len; ) { mcast.fragmented = 0; mcast.continuation = fragment_continuation; copy_len = iovec[i].iov_len - copy_base; /* * If it all fits with room left over, copy it in. * We need to leave at least sizeof(short) + 1 bytes in the * fragment_buffer on exit so that max_packet_size + fragment_size * doesn't exceed the size of the fragment_buffer on the next call. */ if ((copy_len + fragment_size) < (max_packet_size - sizeof (unsigned short))) { memcpy (&fragmentation_data[fragment_size], iovec[i].iov_base + copy_base, copy_len); fragment_size += copy_len; mcast_packed_msg_lens[mcast_packed_msg_count] += copy_len; copy_len = 0; copy_base = 0; i++; continue; /* * If it just fits or is too big, then send out what fits. */ } else { unsigned char *data_ptr; copy_len = min(copy_len, max_packet_size - fragment_size); if( copy_len == max_packet_size ) data_ptr = iovec[i].iov_base + copy_base; else { data_ptr = fragmentation_data; memcpy (&fragmentation_data[fragment_size], iovec[i].iov_base + copy_base, copy_len); } memcpy (&fragmentation_data[fragment_size], iovec[i].iov_base + copy_base, copy_len); mcast_packed_msg_lens[mcast_packed_msg_count] += copy_len; /* * if we're not on the last iovec or the iovec is too large to * fit, then indicate a fragment. This also means that the next * message will have the continuation of this one. */ if ((i < (iov_len - 1)) || ((copy_base + copy_len) < iovec[i].iov_len)) { if (!next_fragment) { next_fragment++; } fragment_continuation = next_fragment; mcast.fragmented = next_fragment++; assert(fragment_continuation != 0); assert(mcast.fragmented != 0); } else { fragment_continuation = 0; } /* * assemble the message and send it */ mcast.msg_count = ++mcast_packed_msg_count; iovecs[0].iov_base = &mcast; iovecs[0].iov_len = sizeof(struct totempg_mcast); iovecs[1].iov_base = mcast_packed_msg_lens; iovecs[1].iov_len = mcast_packed_msg_count * sizeof(unsigned short); iovecs[2].iov_base = data_ptr; iovecs[2].iov_len = max_packet_size; assert (totemmrp_avail() > 0); res = totemmrp_mcast (iovecs, 3, guarantee); /* * Recalculate counts and indexes for the next. */ mcast_packed_msg_lens[0] = 0; mcast_packed_msg_count = 0; fragment_size = 0; max_packet_size = TOTEMPG_PACKET_SIZE - (sizeof(unsigned short)); /* * If the iovec all fit, go to the next iovec */ if ((copy_base + copy_len) == iovec[i].iov_len) { copy_len = 0; copy_base = 0; i++; /* * Continue with the rest of the current iovec. */ } else { copy_base += copy_len; } } } /* * Bump only if we added message data. This may be zero if * the last buffer just fit into the fragmentation_data buffer * and we were at the last iovec. */ if (mcast_packed_msg_lens[mcast_packed_msg_count]) { mcast_packed_msg_count++; } pthread_mutex_unlock (&mcast_msg_mutex); return (res); } /* * Determine if a message of msg_size could be queued */ static int send_ok ( int msg_size) { int avail = 0; int total; avail = totemmrp_avail (); /* * msg size less then totempg_totem_config->net_mtu - 25 will take up * a full message, so add +1 * totempg_totem_config->net_mtu - 25 is for the totempg_mcast header */ total = (msg_size / (totempg_totem_config->net_mtu - 25)) + 1; return (avail >= total); } int totempg_callback_token_create ( void **handle_out, enum totem_callback_token_type type, int delete, int (*callback_fn) (enum totem_callback_token_type type, void *), void *data) { unsigned int res; pthread_mutex_lock (&callback_token_mutex); res = totemmrp_callback_token_create (handle_out, type, delete, callback_fn, data); pthread_mutex_unlock (&callback_token_mutex); return (res); } void totempg_callback_token_destroy ( void *handle_out) { pthread_mutex_lock (&callback_token_mutex); totemmrp_callback_token_destroy (handle_out); pthread_mutex_unlock (&callback_token_mutex); } /* * vi: set autoindent tabstop=4 shiftwidth=4 : */ int totempg_groups_initialize ( totempg_groups_handle *handle, void (*deliver_fn) ( unsigned int nodeid, struct iovec *iovec, int iov_len, int endian_conversion_required), void (*confchg_fn) ( enum totem_configuration_type configuration_type, unsigned int *member_list, int member_list_entries, unsigned int *left_list, int left_list_entries, unsigned int *joined_list, int joined_list_entries, struct memb_ring_id *ring_id)) { struct totempg_group_instance *instance; unsigned int res; pthread_mutex_lock (&totempg_mutex); res = hdb_handle_create (&totempg_groups_instance_database, sizeof (struct totempg_group_instance), handle); if (res != 0) { goto error_exit; } if (*handle > totempg_max_handle) { totempg_max_handle = *handle; } res = hdb_handle_get (&totempg_groups_instance_database, *handle, (void *)&instance); if (res != 0) { goto error_destroy; } instance->deliver_fn = deliver_fn; instance->confchg_fn = confchg_fn; instance->groups = 0; instance->groups_cnt = 0; hdb_handle_put (&totempg_groups_instance_database, *handle); pthread_mutex_unlock (&totempg_mutex); return (0); error_destroy: hdb_handle_destroy (&totempg_groups_instance_database, *handle); error_exit: pthread_mutex_unlock (&totempg_mutex); return (-1); } int totempg_groups_join ( totempg_groups_handle handle, struct totempg_group *groups, int group_cnt) { struct totempg_group_instance *instance; struct totempg_group *new_groups; unsigned int res; pthread_mutex_lock (&totempg_mutex); res = hdb_handle_get (&totempg_groups_instance_database, handle, (void *)&instance); if (res != 0) { goto error_exit; } new_groups = realloc (instance->groups, sizeof (struct totempg_group) * (instance->groups_cnt + group_cnt)); if (new_groups == 0) { res = ENOMEM; goto error_exit; } memcpy (&new_groups[instance->groups_cnt], groups, group_cnt * sizeof (struct totempg_group)); instance->groups = new_groups; instance->groups_cnt = instance->groups_cnt = group_cnt; hdb_handle_put (&totempg_groups_instance_database, handle); error_exit: pthread_mutex_unlock (&totempg_mutex); return (res); } int totempg_groups_leave ( totempg_groups_handle handle, struct totempg_group *groups, int group_cnt) { struct totempg_group_instance *instance; unsigned int res; pthread_mutex_lock (&totempg_mutex); res = hdb_handle_get (&totempg_groups_instance_database, handle, (void *)&instance); if (res != 0) { goto error_exit; } hdb_handle_put (&totempg_groups_instance_database, handle); error_exit: pthread_mutex_unlock (&totempg_mutex); return (res); } #define MAX_IOVECS_FROM_APP 32 #define MAX_GROUPS_PER_MSG 32 int totempg_groups_mcast_joined ( totempg_groups_handle handle, struct iovec *iovec, int iov_len, int guarantee) { struct totempg_group_instance *instance; unsigned short group_len[MAX_GROUPS_PER_MSG + 1]; struct iovec iovec_mcast[MAX_GROUPS_PER_MSG + 1 + MAX_IOVECS_FROM_APP]; int i; unsigned int res; pthread_mutex_lock (&totempg_mutex); res = hdb_handle_get (&totempg_groups_instance_database, handle, (void *)&instance); if (res != 0) { goto error_exit; } /* * Build group_len structure and the iovec_mcast structure */ group_len[0] = instance->groups_cnt; for (i = 0; i < instance->groups_cnt; i++) { group_len[i + 1] = instance->groups[i].group_len; iovec_mcast[i + 1].iov_len = instance->groups[i].group_len; iovec_mcast[i + 1].iov_base = instance->groups[i].group; } iovec_mcast[0].iov_len = (instance->groups_cnt + 1) * sizeof (unsigned short); iovec_mcast[0].iov_base = group_len; for (i = 0; i < iov_len; i++) { iovec_mcast[i + instance->groups_cnt + 1].iov_len = iovec[i].iov_len; iovec_mcast[i + instance->groups_cnt + 1].iov_base = iovec[i].iov_base; } res = mcast_msg (iovec_mcast, iov_len + instance->groups_cnt + 1, guarantee); hdb_handle_put (&totempg_groups_instance_database, handle); error_exit: pthread_mutex_unlock (&totempg_mutex); return (res); } int totempg_groups_send_ok_joined ( totempg_groups_handle handle, struct iovec *iovec, int iov_len) { struct totempg_group_instance *instance; unsigned int size = 0; unsigned int i; unsigned int res; pthread_mutex_lock (&totempg_mutex); pthread_mutex_lock (&mcast_msg_mutex); res = hdb_handle_get (&totempg_groups_instance_database, handle, (void *)&instance); if (res != 0) { goto error_exit; } for (i = 0; i < instance->groups_cnt; i++) { size += instance->groups[i].group_len; } for (i = 0; i < iov_len; i++) { size += iovec[i].iov_len; } res = send_ok (size); hdb_handle_put (&totempg_groups_instance_database, handle); error_exit: pthread_mutex_unlock (&mcast_msg_mutex); pthread_mutex_unlock (&totempg_mutex); return (res); } int totempg_groups_mcast_groups ( totempg_groups_handle handle, int guarantee, struct totempg_group *groups, int groups_cnt, struct iovec *iovec, int iov_len) { struct totempg_group_instance *instance; unsigned short group_len[MAX_GROUPS_PER_MSG + 1]; struct iovec iovec_mcast[MAX_GROUPS_PER_MSG + 1 + MAX_IOVECS_FROM_APP]; int i; unsigned int res; pthread_mutex_lock (&totempg_mutex); res = hdb_handle_get (&totempg_groups_instance_database, handle, (void *)&instance); if (res != 0) { goto error_exit; } /* * Build group_len structure and the iovec_mcast structure */ group_len[0] = groups_cnt; for (i = 0; i < groups_cnt; i++) { group_len[i + 1] = groups[i].group_len; iovec_mcast[i + 1].iov_len = groups[i].group_len; iovec_mcast[i + 1].iov_base = groups[i].group; } iovec_mcast[0].iov_len = (groups_cnt + 1) * sizeof (unsigned short); iovec_mcast[0].iov_base = group_len; for (i = 0; i < iov_len; i++) { iovec_mcast[i + groups_cnt + 1].iov_len = iovec[i].iov_len; iovec_mcast[i + groups_cnt + 1].iov_base = iovec[i].iov_base; } res = mcast_msg (iovec_mcast, iov_len + groups_cnt + 1, guarantee); hdb_handle_put (&totempg_groups_instance_database, handle); error_exit: pthread_mutex_unlock (&totempg_mutex); return (res); } /* * Returns -1 if error, 0 if can't send, 1 if can send the message */ int totempg_groups_send_ok_groups ( totempg_groups_handle handle, struct totempg_group *groups, int groups_cnt, struct iovec *iovec, int iov_len) { struct totempg_group_instance *instance; unsigned int size = 0; unsigned int i; unsigned int res; pthread_mutex_lock (&totempg_mutex); res = hdb_handle_get (&totempg_groups_instance_database, handle, (void *)&instance); if (res != 0) { goto error_exit; } for (i = 0; i < groups_cnt; i++) { size += groups[i].group_len; } for (i = 0; i < iov_len; i++) { size += iovec[i].iov_len; } res = send_ok (size); hdb_handle_put (&totempg_groups_instance_database, handle); error_exit: pthread_mutex_unlock (&totempg_mutex); return (res); } int totempg_ifaces_get ( unsigned int nodeid, struct totem_ip_address *interfaces, unsigned int *iface_count) { int res; res = totemmrp_interfaces_get ( nodeid, interfaces, iface_count); return (res); } char *totempg_ifaces_print (unsigned int nodeid) { static char iface_string[256 * INTERFACE_MAX]; char one_iface[32]; struct totem_ip_address interfaces[INTERFACE_MAX]; unsigned int iface_count; unsigned int i; int res; iface_string[0] = '\0'; res = totempg_ifaces_get (nodeid, interfaces, &iface_count); if (res == -1) { return ("no interface found for nodeid"); } for (i = 0; i < iface_count; i++) { sprintf (one_iface, "r(%d) ip(%s) ", i, totemip_print (&interfaces[i])); strcat (iface_string, one_iface); } return (iface_string); }