mirror of
https://git.proxmox.com/git/pve-ha-manager
synced 2025-04-28 16:09:26 +00:00
notifications: overhaul fence notification
- try to make template variable names more clear (in preparation for #6143) - add common tempate variables (fqdn, hostname, cluster-name) - Instead of dumping the status-data variable as a JSON blob we add template variables for the most useful information and render it in a structured manner Signed-off-by: Lukas Wagner <l.wagner@proxmox.com>
This commit is contained in:
parent
e3622b0f11
commit
66eddda945
@ -3,6 +3,8 @@ package PVE::HA::NodeStatus;
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use PVE::Notify;
|
||||
|
||||
use JSON;
|
||||
|
||||
my $fence_delay = 60;
|
||||
@ -195,15 +197,38 @@ my $send_fence_state_email = sub {
|
||||
my $haenv = $self->{haenv};
|
||||
my $status = $haenv->read_manager_status();
|
||||
|
||||
my $template_data = {
|
||||
"status-data" => {
|
||||
manager_status => $status,
|
||||
node_status => $self->{status}
|
||||
},
|
||||
"node" => $node,
|
||||
"subject-prefix" => $subject_prefix,
|
||||
"subject" => $subject,
|
||||
};
|
||||
my $template_data = PVE::Notify::common_template_data();
|
||||
# Those two are needed for the expected output for test cases,
|
||||
# see src/PVE/HA/Sim/Env.pm
|
||||
$template_data->{"fence-status"} = $subject;
|
||||
$template_data->{"fence-prefix"} = $subject_prefix;
|
||||
|
||||
$template_data->{"is-success"} = 1 ? $subject_prefix eq "SUCCEED" : 0;
|
||||
|
||||
$template_data->{"failed-node"} = $node;
|
||||
$template_data->{"master-node"} = $status->{master_node};
|
||||
# There is a handlebars helper 'timestamp', we should not
|
||||
# name a variable the same way.
|
||||
$template_data->{"fence-timestamp"} = $status->{timestamp};
|
||||
|
||||
$template_data->{"nodes"} = [];
|
||||
for my $key (sort keys $status->{node_status}->%*) {
|
||||
push $template_data->{"nodes"}->@*, {
|
||||
node => $key,
|
||||
status => $status->{node_status}->{$key}
|
||||
};
|
||||
}
|
||||
|
||||
$template_data->{"resources"} = [];
|
||||
for my $key (sort keys $status->{service_status}->%*) {
|
||||
my $resource_status = $status->{service_status}->{$key};
|
||||
push $template_data->{"resources"}->@*, {
|
||||
resource => $key,
|
||||
state => $resource_status->{state},
|
||||
node => $resource_status->{node},
|
||||
running => $resource_status->{running},
|
||||
};
|
||||
}
|
||||
|
||||
my $metadata_fields = {
|
||||
type => 'fencing',
|
||||
|
@ -299,12 +299,12 @@ sub log {
|
||||
sub send_notification {
|
||||
my ($self, $template_name, $properties) = @_;
|
||||
|
||||
# The template for the subject is "{{subject-prefix}}: {{subject}}"
|
||||
# The template for the subject is "{{fence-status}}: {{fence-message}}"
|
||||
# We have to perform poor-man's template rendering to pass the test cases.
|
||||
|
||||
my $subject = "{{subject-prefix}}: {{subject}}";
|
||||
$subject = $subject =~ s/\{\{subject-prefix}}/$properties->{"subject-prefix"}/r;
|
||||
$subject = $subject =~ s/\{\{subject}}/$properties->{"subject"}/r;
|
||||
my $subject = "{{fence-prefix}}: {{fence-status}}";
|
||||
$subject = $subject =~ s/\{\{fence-prefix}}/$properties->{"fence-prefix"}/r;
|
||||
$subject = $subject =~ s/\{\{fence-status}}/$properties->{"fence-status"}/r;
|
||||
|
||||
# only log subject, do not spam the logs
|
||||
$self->log('email', $subject);
|
||||
|
@ -1,14 +1,43 @@
|
||||
<html>
|
||||
<body>
|
||||
The node '{{node}}' failed and needs manual intervention.<br/><br/>
|
||||
The node '{{failed-node}}' in cluster '{{cluster-name}}' failed and
|
||||
needs manual intervention.<br/><br/>
|
||||
|
||||
The PVE HA manager tries to fence it and recover the configured HA resources to
|
||||
a healthy node if possible.<br/><br/>
|
||||
{{#if is-success~}}
|
||||
The PVE HA manager successfully fenced '{{failed-node}}'.<br/><br/>
|
||||
{{else}}
|
||||
The PVE HA manager will now fence '{{failed-node}}'.<br/><br/>
|
||||
{{/if}}
|
||||
|
||||
Current fence status: {{subject-prefix}}<br/>
|
||||
{{subject}}<br/>
|
||||
<b>Status:</b> {{fence-status}}<br/>
|
||||
<b>Timestamp:</b> {{timestamp fence-timestamp}}<br/>
|
||||
|
||||
<h2 style="font-size: 1em">Overall Cluster status:</h2>
|
||||
{{object status-data}}
|
||||
<h2 style="font-size: 1em">Cluster Node Status:</h2>
|
||||
<ul>
|
||||
{{#each nodes}}
|
||||
<li>
|
||||
{{this.node}}: {{this.status}} {{#if (eq this.node ../master-node)}}[master]{{/if}}
|
||||
</li>
|
||||
{{/each}}
|
||||
</ul>
|
||||
|
||||
<h2 style="font-size: 1em">HA Resources:</h2>
|
||||
The following HA resources were running on the failed node and will be
|
||||
recovered to a healthy node if possible:
|
||||
<ul>
|
||||
{{#each resources}}
|
||||
{{#if (eq this.node ../failed-node)}}
|
||||
<li>{{this.resource}} [{{this.node}}]: {{this.state}}</li>
|
||||
{{/if}}
|
||||
{{/each}}
|
||||
</ul>
|
||||
The other HA resources in this cluster are:
|
||||
<ul>
|
||||
{{#each resources}}
|
||||
{{#if (ne this.node ../failed-node)}}
|
||||
<li>{{this.resource}} [{{this.node}}]: {{this.state}}</li>
|
||||
{{/if}}
|
||||
{{/each}}
|
||||
</ul>
|
||||
</body>
|
||||
</html>
|
||||
|
@ -1,11 +1,35 @@
|
||||
The node '{{node}}' failed and needs manual intervention.
|
||||
The node '{{failed-node}}' in cluster '{{cluster-name}}' failed
|
||||
and needs manual intervention.
|
||||
|
||||
The PVE HA manager tries to fence it and recover the configured HA resources to
|
||||
a healthy node if possible.
|
||||
{{#if is-success~}}
|
||||
The PVE HA manager successfully fenced '{{failed-node}}'.
|
||||
{{else~}}
|
||||
The PVE HA manager will now fence '{{failed-node}}'.
|
||||
{{/if}}
|
||||
Status: {{fence-status}}
|
||||
Timestamp: {{timestamp fence-timestamp}}
|
||||
|
||||
Current fence status: {{subject-prefix}}
|
||||
{{subject}}
|
||||
Cluster Node Status:
|
||||
--------------------
|
||||
{{#each nodes~}}
|
||||
- {{this.node}}: {{this.status}} {{#if (eq this.node ../master-node)}}[master]{{/if}}
|
||||
{{/each}}
|
||||
|
||||
HA Resources:
|
||||
-------------
|
||||
The following HA resources were running on the failed node and will be
|
||||
recovered to a healthy node if possible:
|
||||
|
||||
{{#each resources~}}
|
||||
{{#if (eq this.node ../failed-node)~}}
|
||||
- {{this.resource}} [{{this.node}}]: {{this.state}}
|
||||
{{/if~}}
|
||||
{{/each}}
|
||||
The other HA resources in this cluster are:
|
||||
|
||||
{{#each resources~}}
|
||||
{{#if (ne this.node ../failed-node)~}}
|
||||
- {{this.resource}} [{{this.node}}]: {{this.state}}
|
||||
{{/if~}}
|
||||
{{/each~}}
|
||||
|
||||
Overall Cluster status:
|
||||
-----------------------
|
||||
{{object status-data}}
|
||||
|
@ -1 +1,5 @@
|
||||
{{subject-prefix}}: {{subject}}
|
||||
{{#if is-success~}}
|
||||
Successfully fenced node '{{failed-node}}'
|
||||
{{else}}
|
||||
Trying to fence node '{{failed-node}}'
|
||||
{{/if}}
|
||||
|
Loading…
Reference in New Issue
Block a user