In my environment, MySQL replication is really important because we are splitting different web servers with different database server to balance the load between MySQL servers. It quite traditional ways because this is kind of old database servers which sustain until today.

There is simple way to setup a monitoring script to alert us via email on any replication error. This script need to be run on slave server of MySQL replication.

Variable as below:

Database server: MySQL 5.0.77
Hostname: mysql.mydomain.org
Database host: localhost
Database name: mymarket_data
Database user: root
Database password: Mtuserp@ss

1. Create directory and file for the script:

$ mkdir -p /root/scripts
$ touch /root/scripts/check_slave.php

2. Copy and paste following PHP scripts and put it into a file called check_slave.php:

<?php
/**
 * Description: This script checks the slave status on the configured host
 *              printing "BAD <description>" or "OK <description>" for failure
 *              or success respectively.
 *              The possible failures could include:
 *              1) connection failure
 *              2) query failure (permissions, network, etc.)
 *              3) fetch failure (???)
 *              4) slave or io thread is not running
 *              5) Unknown master state (seconds_behind_master is null)
 *              6) seconds_behind_master has exceeded the configured threshold
 *
 *              If none of these condition occur, we asssume success and return
 *              an "OK" response, otherwise we include the error we can find
 *              (mysqli_connect_error() or $mysqli->error, or problem
 *               description).  A monitoring system need only check for:
 *              /^BAD/ (alert) or /^OK/ (everybody happy)
 */
 
/* **************************
 * Change related value below
 * **************************
 */
    $host = "";
    $user = "";
    $pass = "";
    $mailto = ""; // Your email address
    $mailsubject = "";
    $mailfrom = "";
 
/* ******************************************
 * No need to change anything below this line
 * ******************************************
 */ $mailmessage = "BAD: ".$err_msg."\n";
    $mailheaders = "From:" . $mailfrom;
    error_reporting(E_ALL);
    header("Content-Type: text/plain"); # Not HTML
    $sql = "SHOW SLAVE STATUS";
    $skip_file = 'skip_alerts';
    $link = mysql_connect($host, $user, $pass, null);
 
    if($link)
        $result = mysql_query($sql, $link);
    else {
        printf("BAD: Connection Failed %s", mysql_error());
        mysql_close($link);
        return;
    }
 
    if($result)
        $status = mysql_fetch_assoc($result);
    else {
        printf("BAD: Query failed - %s\n", mysql_error($link));
        mysql_close($link);
        return;
    }
 
    mysql_close($link);
 
    $slave_lag_threshold = 120;
 
    $tests = array(
        'test_slave_io_thread' => array('Slave_IO_Running', "\$var === 'Yes'",
                                        'Slave IO Thread is not running'),
        'test_slave_sql_thread' => array('Slave_SQL_Running', "\$var === 'Yes'",
                                        'Slave SQL Thread is not running'),
        'test_last_err' => array('Last_Errno', "\$var == 0",
                                 "Error encountered during replication - "
                                 .$status['Last_Error']),
        'test_master_status' => array('Seconds_Behind_Master', "isset(\$var)",
                                        'Unknown master status (Seconds_Behind_Master IS NULL)'),
        'test_slave_lag' => array('Seconds_Behind_Master',
                                  "\$var < \$slave_lag_threshold",
                                  "Slave is ${status['Seconds_Behind_Master']}s behind master (threshold=$slave_lag_threshold)")
    );
 
    $epic_fail = false;
    if(is_file($skip_file))
        $epic_fail = false;
    else
    {
        foreach($tests as $test_name => $data) {
            list($field, $expr, $err_msg) = $data;
            $var = $status[$field];
            $val = eval("return $expr;");
            if(!$val) {
                mail($mailto,$mailsubject,$mailmessage,$mailheaders);
                $epic_fail = true;
            }
        }
    }
 
    if(!$epic_fail) {
        print "OK: Checks all completed successfully\n";
    }
?>

3. Change related value on following line:

/* **************************
 * Change related value below
 * **************************
 */
    $host = "localhost";
    $user = "root";
    $pass = "Mtuserp@ss";
    $mailto = "[email protected]"; // Your email address
    $mailsubject = "mysql.mydomain.org Replication Alert";
    $mailfrom = "[email protected]";

4. Setup cron to execute the script every 15 minutes:

*/15 * * * * /usr/bin/php -q /root/scripts/check_slave.php

5. Restart cron service:

$ service crond restart

Done. You should receive notification email if the master/slave replication failed!

 

Update

Following script is being modified by Joel Brock as stated in comment section below:

<!--?php 
/**
* Description: This script checks the slave status on the configured host
* printing "BAD ” or “OK ” for failure
* or success respectively.
* The possible failures could include:
* 1) connection failure
* 2) query failure (permissions, network, etc.)
* 3) fetch failure (???)
* 4) slave or io thread is not running
* 5) Unknown master state (seconds_behind_master is null)
* 6) seconds_behind_master has exceeded the configured threshold
*
* If none of these condition occur, we asssume success and return
* an “OK” response, otherwise we include the error we can find
* (mysqli_connect_error() or $mysqli--->error, or problem
* description). A monitoring system need only check for:
* /^BAD/ (alert) or /^OK/ (everybody happy)
*/
 
/* **************************
* Change related value below
* **************************
*/
$host = array(
// “cr-1″ => “192.168.0.81″,
// “cr-2″ => “192.168.0.82″,
// “cr-3″ => “192.168.0.83″,
// “cr-4″ => “192.168.0.84″,
// “cr-5″ => “192.168.0.85″,
// “jp-1″ => “192.168.100.81″,
// “jp-2″ => “192.168.100.82″,
// “jp-3″ => “192.168.100.83″,
// “jp-4″ => “192.168.100.84″,
// “jp-5″ => “192.168.100.85″,
“cr-test” => “192.168.0.87″
);
$user = “”;
$pass = “”;
$mailto = “”;
$mailfrom = “”;
 
/* ******************************************
* No need to change anything below this line
* ******************************************
*/
 
error_reporting(E_ALL);
header(“Content-Type: text/plain”); # Not HTML
foreach ($host as $key => $value) {
 
$mailsubject =[".$key."] SLAVE REPLICATION ALERT”;
$mailheaders = “From:. $mailfrom;
$sql = “SHOW SLAVE STATUS”;
$skip_file = ‘skip_alerts’;
$link = mysql_connect($value, $user, $pass, null);
 
if($link)
    $result = mysql_query($sql, $link);
else {
    printf(“BAD: Connection Failed %s”, mysql_error());
    mysql_close($link);
    return;
}
 
if($result)
    $status = mysql_fetch_assoc($result);
else {
    printf(“BAD: Query failed – %s\n”, mysql_error($link));
    mysql_close($link);
    return;
}
 
mysql_close($link);
 
$slave_lag_threshold = 120;
 
$tests = array(
    ‘test_slave_io_thread’ => array(‘Slave_IO_Running’, “\$var === ‘Yes’”,
    ‘Slave IO Thread is not running’),
    ‘test_slave_sql_thread’ => array(‘Slave_SQL_Running’, “\$var === ‘Yes’”,
    ‘Slave SQL Thread is not running’),
    ‘test_last_err’ => array(‘Last_Errno’, “\$var == 0,
    “Error encountered during replication – ”
    .$status['Last_Error']),
    ‘test_master_status’ => array(‘Seconds_Behind_Master’,isset(\$var),
    ‘Unknown master status (Seconds_Behind_Master IS NULL)),
    ‘test_slave_lag’ => array(‘Seconds_Behind_Master’,
    “\$var $data) {
        list($field, $expr, $err_msg) = $data;
        $var = $status[$field];
        $val = eval(return $expr;);
        $val1 = (!$val) ? $val1 + 1 : $val11;
        $mailmessage .= “BAD:. $key . ” replication failed. Reason:. $err_msg . “\n”;
        }
    if ($val1 > 0) {
        mail($mailto,$mailsubject,$mailmessage,$mailheaders);
        print $mailmessage . “\n”;
        $epic_fail = true;
        }
    }
 
    if(!$epic_fail) {
        print “OK: Checks all completed successfully on [".$key."]\n”;
    }
}
?>

15 Responses to Linux: Email Alert on MySQL Replication Failure

  1. Ted SNo Gravatar says:

    Very very very helpful script, thanks!

    One small, line $mailmessage = “BAD: “.$err_msg.”\n\”; should not have that final backslash and should not be commented out which seems to have happened during the html paste.

    Thanks again!

  2. hhalatNo Gravatar says:

    Thanks for the script. It helps me a lot.

  3. Jason DoucetteNo Gravatar says:

    I was just thinking about this as we prep for our first replication setup, Googled, found you, and now I can hit off more stuff on my todo list. Thanks for sharing!

  4. Piotr GulbisNo Gravatar says:

    Hello. Thank you very much for your script. It is exactly what I am looking for. However I am having a problem running it:

    PHP Notice: Undefined variable: err_msg in /root/scripts/check_slave.php on line 36
    PHP Fatal error: Call to undefined function mysql_connect() in /root/scripts/check_slave.php on line 42

    Any ideas?

    • SecaGuyNo Gravatar says:

      It seems like you do not compile PHP with MySQL support. Make sure php-mysql package is installed. You can verify this with phpinfo page.
      If RedHat based, run:
      yum install php-mysql
      If Debian based, run:
      apt-get install php5-mysql

  5. Joel BrockNo Gravatar says:

    Very excellent work!! I’ve made a couple changes to the script to adapt it to our needs. Changes include:

    -feed it an array of IPs and run centrally from the server/master, rather than locally on each slave
    -consolidated errors into 1 email, rather than 3 or more per slave failure (hint: feed it your cellphones email address for text notifications)
    -fixed the $err_msg undefined var error

    Not sure what’ll happen if i paste it in here, but here goes…

    <?php
    /**
    * Description: This script checks the slave status on the configured host
    * printing "BAD ” or “OK ” for failure
    * or success respectively.
    * The possible failures could include:
    * 1) connection failure
    * 2) query failure (permissions, network, etc.)
    * 3) fetch failure (???)
    * 4) slave or io thread is not running
    * 5) Unknown master state (seconds_behind_master is null)
    * 6) seconds_behind_master has exceeded the configured threshold
    *
    * If none of these condition occur, we asssume success and return
    * an “OK” response, otherwise we include the error we can find
    * (mysqli_connect_error() or $mysqli->error, or problem
    * description). A monitoring system need only check for:
    * /^BAD/ (alert) or /^OK/ (everybody happy)
    */

    /* **************************
    * Change related value below
    * **************************
    */
    $host = array(
    // “cr-1″ => “192.168.0.81″,
    // “cr-2″ => “192.168.0.82″,
    // “cr-3″ => “192.168.0.83″,
    // “cr-4″ => “192.168.0.84″,
    // “cr-5″ => “192.168.0.85″,
    // “jp-1″ => “192.168.100.81″,
    // “jp-2″ => “192.168.100.82″,
    // “jp-3″ => “192.168.100.83″,
    // “jp-4″ => “192.168.100.84″,
    // “jp-5″ => “192.168.100.85″,
    “cr-test” => “192.168.0.87″
    );
    $user = “”;
    $pass = “”;
    $mailto = “”;
    $mailfrom = “”;

    /* ******************************************
    * No need to change anything below this line
    * ******************************************
    */

    error_reporting(E_ALL);
    header(“Content-Type: text/plain”); # Not HTML
    foreach ($host as $key => $value) {

    $mailsubject = “[".$key."] SLAVE REPLICATION ALERT”;
    $mailheaders = “From:” . $mailfrom;
    $sql = “SHOW SLAVE STATUS”;
    $skip_file = ‘skip_alerts’;
    $link = mysql_connect($value, $user, $pass, null);

    if($link)
    $result = mysql_query($sql, $link);
    else {
    printf(“BAD: Connection Failed %s”, mysql_error());
    mysql_close($link);
    return;
    }

    if($result)
    $status = mysql_fetch_assoc($result);
    else {
    printf(“BAD: Query failed – %s\n”, mysql_error($link));
    mysql_close($link);
    return;
    }

    mysql_close($link);

    $slave_lag_threshold = 120;

    $tests = array(
    ‘test_slave_io_thread’ => array(‘Slave_IO_Running’, “\$var === ‘Yes’”,
    ‘Slave IO Thread is not running’),
    ‘test_slave_sql_thread’ => array(‘Slave_SQL_Running’, “\$var === ‘Yes’”,
    ‘Slave SQL Thread is not running’),
    ‘test_last_err’ => array(‘Last_Errno’, “\$var == 0″,
    “Error encountered during replication – ”
    .$status['Last_Error']),
    ‘test_master_status’ => array(‘Seconds_Behind_Master’, “isset(\$var)”,
    ‘Unknown master status (Seconds_Behind_Master IS NULL)’),
    ‘test_slave_lag’ => array(‘Seconds_Behind_Master’,
    “\$var $data) {
    list($field, $expr, $err_msg) = $data;
    $var = $status[$field];
    $val = eval(“return $expr;”);
    $val1 = (!$val) ? $val1 + 1 : $val1 – 1;
    $mailmessage .= “BAD: ” . $key . ” replication failed. Reason: ” . $err_msg . “\n”;
    }
    if ($val1 > 0) {
    mail($mailto,$mailsubject,$mailmessage,$mailheaders);
    print $mailmessage . “\n”;
    $epic_fail = true;
    }
    }

    if(!$epic_fail) {
    print “OK: Checks all completed successfully on [".$key."]\n”;
    }
    }
    ?>

  6. DunanshiNo Gravatar says:

    I obtain a
    PHP Parse error: syntax error, unexpected T_RETURN, expecting ‘)’
    Wwhen execute the modified script.

    Any ideas ?

  7. qmicNo Gravatar says:

    I don`t know how the hell Joel Brock`s script could work :) .
    I`ve corrected it

    /* ******************************************
    * No need to change anything below this line
    * ******************************************
    */
    error_reporting(E_ALL);
    header("Content-Type: text/plain"); # Not HTML
    foreach ($host as $key => $value) {

    $mailsubject = "[".$key."] SLAVE REPLICATION ALERT";
    $mailheaders = "From:" . $mailfrom;
    $sql = "SHOW SLAVE STATUS";
    $skip_file = 'skip_alerts';
    $link = mysql_connect($value, $user, $pass, null);

    if($link)
    $result = mysql_query($sql, $link);
    else {
    printf("BAD: Connection Failed %s", mysql_error());
    mysql_close($link);
    return;
    }

    if($result)
    $status = mysql_fetch_assoc($result);
    else {
    printf("BAD: Query failed – %s\n", mysql_error($link));
    mysql_close($link);
    return;
    }

    mysql_close($link);

    $slave_lag_threshold = 120;

    $tests = array(
    'test_slave_io_thread' => array('Slave_IO_Running', "\$var === 'Yes'",'Slave IO Thread is not running'),
    'test_slave_sql_thread' => array('Slave_SQL_Running', "\$var === 'Yes'",'Slave SQL Thread is not running'),
    'test_last_err' => array('Last_Errno', "\$var == 0","Error encountered during replication – ".$status['Last_Error']),
    'test_master_status' => array('Seconds_Behind_Master', "isset(\$var)",'Unknown master status', '(Seconds_Behind_Master IS NULL)'),
    'test_slave_lag' => array('Seconds_Behind_Master', "\$var 0) {
    mail($mailto,$mailsubject,$mailmessage,$mailheaders);
    print $mailmessage . "\n";
    $epic_fail = true;
    }
    }

    if(!$epic_fail) {
    print "OK: Checks all completed successfully on [".$key."]\n";
    }

Leave a Reply

Your email address will not be published. Required fields are marked *

You may use these HTML tags and attributes: <a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <strike> <strong>