Ремонт mysql базы nagios

Описание проблемы

Сломались таблицы в mysql базе в которую писались данные системы мониторинга Nagios.
Сам nagios работал, но в лог огромными количествами сыпались сообщения об ощибках ndo2db.

Ошибки ndo2db

Ошибки ndo2db

2017-04-11T13:49:25.025157+03:00 msk-02-noc ndo2db: mysql_error: 'Table './nagios_db/nagios_hoststatus' is marked as crashed and should be repaired'

2017-04-11T13:49:25.025167+03:00 msk-02-noc ndo2db: Error: mysql_query() failed for 'INSERT INTO nagios_hoststatus SET instance_id='1', host_object_id='207', status_update_time=FROM_UNIXTIME(1491907765), output='PING OK - Packet loss = 0%, RTA = 61\.61 ms', long_output='', perfdata='rta=61\.612000ms;3000\.000000;5000\.000000;0\.000000 pl=0%;80;100;0', current_state='0', has_been_checked='1', should_be_scheduled='1', current_check_attempt='1', max_check_attempts='3', last_check=FROM_UNIXTIME(1491907746), next_check=FROM_UNIXTIME(1491907807), check_type='0', last_state_change=FROM_UNIXTIME(1491247748), last_hard_state_change=FROM_UNIXTIME(1489995197), last_hard_state='0', last_time_up=FROM_UNIXTIME(1491907764), last_time_down=FROM_UNIXTIME(1491247676), last_time_unreachable=FROM_UNIXTIME(1491237594), state_type='1', last_notification=FROM_UNIXTIME(0), next_notification=FROM_UNIXTIME(0), no_more_notifications='0', notifications_enabled='1', problem_has_been_acknowledged='0', acknowledgement_type='0', current_notification_number='0', passive_checks_enabled='1', active_checks_enabled='1', event_handler_enabled='1', flap_detection_enabled='0', is_flapping='0', percent_state_change='0.000000', latency='6.762000', execution_time='2.420050', scheduled_downtime_depth='0', failure_prediction_enabled='1', process_performance_data='1', obsess_over_host='1', modified_host_attributes='0', event_handler='', check_command='check-host-alive', normal_check_interval='1.000000', retry_check_interval='0.100000', check_timeperiod_object_id='2' ON DUPLICATE KEY UPDATE instance_id='1', host_object_id='207', status_update_time=FROM_UNIXTIME(1491907765), output='PING OK - Packet loss = 0%, RTA = 61\.61 ms', long_output='', perfdata='rta=61\.612000ms;3000\.000000;5000\.000000;0\.000000 pl=0%;80;100;0', current_state='0', has_been_checked='1', should_be_scheduled='1', current_check_attempt='1', max_check_attempts='3', last_check=FROM_UNIXTIME(1491907746), next_check=FROM_UNIXTIME(1491907807), check_type='0', last_state_change=FROM_UNIXTIME(1491247748), last_hard_state_change=FROM_UNIXTIME(1489995197), last_hard_state='0', last_time_up=FROM_UNIXTIME(1491907764), last_time_down=FROM_UNIXTIME(1491247676), last_time_unreachable=FROM_UNIXTIME(1491237594), state_type='1', last_notification=FROM_UNIXTIME(0), next_notification=FROM_UNIXTIME(0), no_more_notifications='0', notifications_enabled='1', problem_has_been_acknowledged='0', acknowledgement_type='0', current_notification_number='0', passive_checks_enabled='1', active_checks_enabled='1', event_handler_enabled='1', flap_detection_enabled='0', is_flapping='0', percent_state_change='0.000000', latency='6.762000', execution_time='2.420050', scheduled_downtime_depth='0', failure_prediction_enabled='1', process_performance_data='1', obsess_over_host='1', modified_host_attributes='0', event_handler='', check_command='check-host-alive', normal_check_interval='1.000000', retry_check_interval='0.100000', check_timeperiod_object_id='2''


С помощью awk в логах mysql ищем имена сломашихся таблиц.

[root@msk-02-noc log]# grep  'crashed and should be repaired' mysqld.log | awk '{print $6}' | sort | uniq
'./nagios_db/nagios_hoststatus'
'./nagios_db/nagios_servicechecks'
'./nagios_db/nagios_systemcommands'
'./nagios_db/nagios_timedeventqueue'

Решение

Останавливаем nagios и с помощью mysqlcheck чиним таблицы.

Процесс работы mysqlcheck

Процесс работы mysqlcheck

[root@msk-02-noc nagios]# mysqlcheck -unagios_user -p --repair --extended nagios_db
Enter password:
nagios_db.nagios_acknowledgements                  OK
nagios_db.nagios_commands                          OK
nagios_db.nagios_commenthistory                    OK
nagios_db.nagios_comments                          OK
nagios_db.nagios_configfiles                       OK
nagios_db.nagios_configfilevariables               OK
nagios_db.nagios_conninfo                          OK
nagios_db.nagios_contact_addresses                 OK
nagios_db.nagios_contact_notificationcommands      OK
nagios_db.nagios_contactgroup_members              OK
nagios_db.nagios_contactgroups                     OK
nagios_db.nagios_contactnotificationmethods        OK
nagios_db.nagios_contactnotifications              OK
nagios_db.nagios_contacts                          OK
nagios_db.nagios_contactstatus                     OK
nagios_db.nagios_customvariables                   OK
nagios_db.nagios_customvariablestatus              OK
nagios_db.nagios_dbversion                         OK
nagios_db.nagios_downtimehistory                   OK
nagios_db.nagios_eventhandlers                     OK
nagios_db.nagios_externalcommands                  OK
nagios_db.nagios_flappinghistory                   OK
nagios_db.nagios_host_contactgroups                OK
nagios_db.nagios_host_contacts                     OK
nagios_db.nagios_host_parenthosts                  OK
nagios_db.nagios_hostchecks                        OK
nagios_db.nagios_hostdependencies                  OK
nagios_db.nagios_hostescalation_contactgroups      OK
nagios_db.nagios_hostescalation_contacts           OK
nagios_db.nagios_hostescalations                   OK
nagios_db.nagios_hostgroup_members                 OK
nagios_db.nagios_hostgroups                        OK
nagios_db.nagios_hosts                             OK
nagios_db.nagios_hoststatus
info     : Duplicate key  2 for record at 13480 against new record at 0
warning  : Number of rows changed from 268 to 267
status   : OK
nagios_db.nagios_instances                         OK
nagios_db.nagios_logentries                        OK
nagios_db.nagios_notifications                     OK
nagios_db.nagios_objects                           OK
nagios_db.nagios_processevents                     OK
nagios_db.nagios_programstatus                     OK
nagios_db.nagios_runtimevariables                  OK
nagios_db.nagios_scheduleddowntime                 OK
nagios_db.nagios_service_contactgroups             OK
nagios_db.nagios_service_contacts                  OK
nagios_db.nagios_servicechecks
warning  : Number of rows changed from 1095125 to 1094998
status   : OK
nagios_db.nagios_servicedependencies               OK
nagios_db.nagios_serviceescalation_contactgroups   OK
nagios_db.nagios_serviceescalation_contacts        OK
nagios_db.nagios_serviceescalations                OK
nagios_db.nagios_servicegroup_members              OK
nagios_db.nagios_servicegroups                     OK
nagios_db.nagios_services                          OK
nagios_db.nagios_servicestatus                     OK
nagios_db.nagios_statehistory                      OK
nagios_db.nagios_systemcommands
info     : Wrong bytesec:  23- 92-183 at 92223440; Skipped
info     : Wrong bytesec: 108-105- 98 at 395057336; Skipped
info     : Wrong block with wrong total length starting at 91145760
info     : Found block with too small length at 91146520; Skipped
info     : Found block with too small length at 91430372; Skipped
info     : Found block with too small length at 91430380; Skipped
info     : Wrong bytesec: 117-115-114 at 91648060; Skipped
info     : Wrong bytesec: 112-108- 15 at 485197884; Skipped
info     : Found block with too small length at 399791272; Skipped
info     : Wrong bytesec: 111-115- 47 at 91647448; Skipped
warning  : Number of rows changed from 3876808 to 3871726
status   : OK
nagios_db.nagios_timedeventqueue
warning  : Number of rows changed from 1009 to 948
status   : OK
nagios_db.nagios_timedevents                       OK
nagios_db.nagios_timeperiod_timeranges             OK
nagios_db.nagios_timeperiods                       OK