Commit a4b089b2 authored by Vinicius Ruoso's avatar Vinicius Ruoso

Added detection of disks with I/O errors through syslod daemon

When some sync fail, or the raid state are not the most common case, the
fixraid script will try to rebuild the arrays. But some times, it's waste
of time trying to re-add a partition to some array because the sync will
fail.

We are adding detection of this situation, a disk with I/O errors showned
through the syslog deamon log files.
Signed-off-by: default avatarVinicius Ruoso <vkr07@c3sl.ufpr.br>
parent 605ba7fb
......@@ -426,6 +426,51 @@ function checkpartitions(){
return $FAILED
}
# function to check for ioerrors on disk using
# the syslogd to retrieve this information
function checkioerrors(){
INFOFILE="/etc/sdi/fixraid.info"
ERRORS=0
# some IO error has happened before
if test -f "$INFOFILE"; then
DISK="$(cat "$INFOFILE" | cut -d" " -f1)"
if ls /dev/disk/by-id/$DISK &> /dev/null; then
print "IOERRORS: PREVIOUSLY I/O ERROR DETECTED"
ERRORS=1
else
# if the I/O error disk has been removed
# lets give the script a chance to fix it
print "IOERRORS: LAST I/O ERROR DISK REMOVED"
print "IOERRORS: WILL LET TRY TO REBUILD THE ARRAYS"
rm -f "$INFOFILE"
fi
return $ERRORS
fi
# will search for a IO error
SYSLOG=$(egrep "(I/O error|I/O read error)" /var/log/syslog)
if grep -q "sda" <<< $SYSLOG; then
DISK=sda
ERRORS=1
elif grep -q "sdb" <<< $SYSLOG; then
DISK=sdb
ERRORS=1
fi
# add to INFOFILE the I/O error disk
if test "$ERRORS" = 1; then
mkdir -p "/etc/sdi"
DISKID=$(ls -l /dev/disk/by-id/ |grep "${DISK}\$" |head -1 | \
awk '{print $9}')
echo "$DISKID $DISK" > "$INFOFILE"
print "IOERRORS: ADDED $DISK TO LIST OF I/O ERROR DISKS DUE TO SYSLOG"
removefromraid "$DISK"
fi
return $ERRORS
}
# function to check the relation beetwen disks size
# assumes that sda and sdb exists
function checksizes(){
......@@ -484,6 +529,10 @@ elif grep -q "_" /proc/mdstat; then
elif ! test -e /dev/sdb; then
print "NOTHING TO DO: /dev/sdb IS NOT HERE"
# if some disk are in I/O error state
elif ! checkioerrors; then
print "ABORTING"
# if some partition are bad, abort
elif ! checkpartitions; then
print "ABORTING: PARTITIONS ARE NOT READY"
......
#!/bin/bash
# export to correctly get english language
# and INFOFILE path for IO errors detection
export LC_ALL=C
INFOFILE="/etc/sdi/fixraid.info"
# raid must be enabled
if ! test -e /proc/mdstat; then
......@@ -27,6 +29,10 @@ elif grep -q "_" /proc/mdstat; then
elif grep -q "read error" <<< $(sfdisk -d /dev/sdb 2>&1); then
MSG="$MSG sdb_IOERROR"
# check for disks with I/O errors based on syslog
elif test -f "$INFOFILE"; then
MSG="$MSG NOINFO_IO_ERROR_$(cat "$INFOFILE" | cut -d" " -f2)"
# check for faulty partitions
elif grep -q "\(F\)" /proc/mdstat; then
FAULTY=$(grep "\(F\)" /proc/mdstat |awk -F"\(F" '{print $1}'|
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment