4 Replies Latest reply on May 5, 2008 8:22 PM by NDEric

    Linux kernel: end_request: I/O error

    435956
      Hi,

      I am intermitently getting the following error in Oracle-Linux ASM database /var/log/messages. This is a 2-node RAC database. But everything is working fine.

      We are using:

      RHEL4(U4)
      Oracle 10gR2 with ASM and Raw Devices (OCR and Voting disks are also on raw devices).

      Here are some of the config. files:

      Linux error:

      Dec 3 13:28:31 cc-orcl-ld2 kernel: SCSI error : <0 0 0 2> return code = 0x20000
      Dec 3 13:28:31 cc-orcl-ld2 kernel: end_request: I/O error, dev sdc, sector 1606333775
      Dec 3 13:28:31 cc-orcl-ld2 kernel: end_request: I/O error, dev sdc, sector 1606333776


      ##############################################################################################

      # cat /proc/cmdline
      ro root=LABEL=/ rhgb quiet noexec=off numa=off elevator=deadline console=tty0

      ##############################################################################################
      [oracle@cc-orcl-ld2 io-error]$ cat /etc/sysconfig/rawdevices

      # This file and interface are deprecated.
      # Applications needing raw device access should open regular
      # block devices with O_DIRECT.
      # raw device bindings
      # format: <rawdev> <major> <minor>
      # <rawdev> <blockdev>
      # example: /dev/raw/raw1 /dev/sda1
      # /dev/raw/raw2 8 5
      # Voting Disk 500mb
      /dev/raw/raw1 /dev/emcpowerb1
      # OCR-500mb
      /dev/raw/raw2 /dev/emcpowerc1
      # Oracle Data
      /dev/raw/raw3 /dev/emcpowera1
      # New Oracle Volume
      /dev/raw/raw4 /dev/emcpowerd1

      ##############################################################################################
      [oracle@cc-orcl-ld2 io-error]$ cat /etc/fstab
      # This file is edited by fstab-sync - see 'man fstab-sync' for details
      LABEL=/ / ext3 defaults 1 1
      LABEL=/boot /boot ext3 defaults 1 2
      none /dev/pts devpts gid=5,mode=620 0 0
      none /dev/shm tmpfs defaults 0 0
      LABEL=/home /home ext3 defaults 1 2
      none /proc proc defaults 0 0
      none /sys sysfs defaults 0 0
      LABEL=/tmp /tmp ext3 defaults 1 2
      LABEL=/u01 /u01 ext3 defaults 1 2
      LABEL=/var /var ext3 defaults 1 2
      /dev/cciss/c0d0p2 swap swap defaults 0 0

      # NAS Mounts
      cc-file-ap3:/root_vdm_12/fs_e2/vol1 /cc-file-ap3/vol1 nfs rsize=8192,wsize=8192,async,hard,bg 0 0
      cc-file-ap4:/vol/vol2 /cc-file-ap4/vol2 nfs rw,bg,vers=3,tcp,timeo=600,rsize=8192,wsize=8192,hard,intr 0 0
      cc-file-ap2:/root_vdm_10/fs_ccc_2/ccc_2 /cc-file-ap2/ccc_2 nfs rw,bg,vers=3,tcp,timeo=600,rsize=8192,wsize=8192,hard,intr 0 0
      cc-file-ap6:/vol/test6f /cc-file-ap6/test6f nfs nfsvers=3,proto=tcp,hard,intr,rsize=8192,wsize=8192 0 0
      cc-file-ap3:/root_vdm_12/fs_444_3/444_3 /cc-file-ap3/444_3 nfs rsize=8192,wsize=8192,async,hard,bg 0 0
      cc-file-ap3:/root_vdm_12/fs_statefarmtest/statefarmtest /cc-file-ap3/statefarmtest nfs rsize=8192,wsize=8192,async,hard,bg 0 0
      cc-file-ap5:/vol/VOL_688_1 /prodservices/data/custdata/688 nfs rsize=8192,wsize=8192,async,hard,bg 0 0
      ve-orcl-ld1:/u02 /ve-orcl-ld1/u02 nfs rsize=8192,wsize=8192,async,hard,bg 0 0

      # NS ticket 54133
      cc-file-ap1:/root_vdm_8/fs_444_1/444_1 /cc-file-ap1/444_1 nfs rw,bg,vers=3,tcp,timeo=600,rsize=8192,wsize=8192,hard,intr 0 0
      cc-file-ap2:/root_vdm_10/fs_444_2/444_2 /cc-file-ap2/444_2 nfs rw,bg,vers=3,tcp,timeo=600,rsize=8192,wsize=8192,hard,intr 0 0

      #Ticket 55504
      cc-file-ap4:/vol/VOL_686_1 /prodservices/data/custdata/686 nfs rsize=8192,wsize=8192,async,hard,bg 0 0

      # per ticket number 58783 ndm 09/07/07
      cc-file-ap4:/vol/V_699_1 /prodservices/data/custdata/699 nfs rsize=8192,wsize=8192,async,hard,bg 0 0

      # per task id 81662 "Mount NAS volume" NDM 10/24/07
      dev-file-ad4:/vol/fv_cebeta_01 /devservices/data/custdata/beta nfs rsize=8192,wsize=8192,async,hard,bg 0 0


      ##############################################################################################
      # sudo /sbin/powermt display dev=all

      Pseudo name=emcpowerc
      CLARiiON ID=APM00062200016 [sg_cc-orcl-RAC-dev]
      Logical device ID=600601606590180002FB58708710DC11 [cc-orcl-RAC-dev_OCR-log-500M_L13R10RG1SB]
      state=alive; policy=BasicFailover; priority=0; queued-IOs=0
      Owner: default=SP B, current=SP B
      ==============================================================================
      ---------------- Host --------------- - Stor - -- I/O Path - -- Stats ---
      ### HW Path I/O Paths Interf. Mode State Q-IOs Errors
      ==============================================================================
      0 qla2xxx sda SP B1 active alive 0 0
      0 qla2xxx sde SP A0 active alive 0 0

      Pseudo name=emcpowerb
      CLARiiON ID=APM00062200016 [sg_cc-orcl-RAC-dev]
      Logical device ID=600601606590180046DECA868710DC11 [cc-orcl-RAC-dev_voting-data-log_500M_L19R10RG1SB]
      state=alive; policy=BasicFailover; priority=0; queued-IOs=0
      Owner: default=SP B, current=SP B
      ==============================================================================
      ---------------- Host --------------- - Stor - -- I/O Path - -- Stats ---
      ### HW Path I/O Paths Interf. Mode State Q-IOs Errors
      ==============================================================================
      0 qla2xxx sdb SP B1 active alive 0 0
      0 qla2xxx sdf SP A0 active alive 0 0

      Pseudo name=emcpowera
      CLARiiON ID=APM00062200016 [sg_cc-orcl-RAC-dev]
      Logical device ID=600601606590180082D4901F7E10DC11 [cc-orcl-RAC-dev_oracle-data_1.2T_ML381_SB]
      state=alive; policy=BasicFailover; priority=0; queued-IOs=0
      Owner: default=SP B, current=SP B
      ==============================================================================
      ---------------- Host --------------- - Stor - -- I/O Path - -- Stats ---
      ### HW Path I/O Paths Interf. Mode State Q-IOs Errors
      ==============================================================================
      0 qla2xxx sdc SP B1 active alive 0 0
      0 qla2xxx sdg SP A0 active alive 0 0

      Pseudo name=emcpowerd
      CLARiiON ID=APM00062200016 [sg_cc-orcl-RAC-dev]
      Logical device ID=60060160C8FC1C00043E02FC265BDC11 [cc-orcl-RAC-dev_Logs_100G_L68R5RG17SB]
      state=alive; policy=BasicFailover; priority=0; queued-IOs=0
      Owner: default=SP B, current=SP B
      ==============================================================================
      ---------------- Host --------------- - Stor - -- I/O Path - -- Stats ---
      ### HW Path I/O Paths Interf. Mode State Q-IOs Errors
      ==============================================================================
      0 qla2xxx sdd SP B1 active alive 0 0
      0 qla2xxx sdh SP A0 active alive 0 0


      ##############################################################################################
      cat /proc/scsi/scsi
      Attached devices:
      Host: scsi0 Channel: 00 Id: 00 Lun: 00
      Vendor: DGC Model: RAID 10 Rev: 0324
      Type: Direct-Access ANSI SCSI revision: 04
      Host: scsi0 Channel: 00 Id: 00 Lun: 01
      Vendor: DGC Model: RAID 10 Rev: 0324
      Type: Direct-Access ANSI SCSI revision: 04
      Host: scsi0 Channel: 00 Id: 00 Lun: 02
      Vendor: DGC Model: RAID 5 Rev: 0324
      Type: Direct-Access ANSI SCSI revision: 04
      Host: scsi0 Channel: 00 Id: 00 Lun: 03
      Vendor: DGC Model: RAID 5 Rev: 0324
      Type: Direct-Access ANSI SCSI revision: 04
      Host: scsi0 Channel: 00 Id: 01 Lun: 00
      Vendor: DGC Model: RAID 10 Rev: 0324
      Type: Direct-Access ANSI SCSI revision: 04
      Host: scsi0 Channel: 00 Id: 01 Lun: 01
      Vendor: DGC Model: RAID 10 Rev: 0324
      Type: Direct-Access ANSI SCSI revision: 04
      Host: scsi0 Channel: 00 Id: 01 Lun: 02
      Vendor: DGC Model: RAID 5 Rev: 0324
      Type: Direct-Access ANSI SCSI revision: 04
      Host: scsi0 Channel: 00 Id: 01 Lun: 03
      Vendor: DGC Model: RAID 5 Rev: 0324
      Type: Direct-Access ANSI SCSI revision: 04
      Host: scsi1 Channel: 00 Id: 00 Lun: 00
      Vendor: DGC Model: LUNZ Rev: 0219
      Type: Direct-Access ANSI SCSI revision: 04
      Host: scsi1 Channel: 00 Id: 01 Lun: 00
      Vendor: DGC Model: LUNZ Rev: 0219
      Type: Direct-Access ANSI SCSI revision: 04

      ##############################################################################################
      [oracle@ce-orcl-lx2 ~]$ cat /etc/sysctl.conf
      # Kernel sysctl configuration file for Red Hat Linux
      #
      # For binary values, 0 is disabled, 1 is enabled. See sysctl(8) and
      # sysctl.conf(5) for more details.

      # Controls IP packet forwarding
      net.ipv4.ip_forward = 0

      # Controls source route verification
      net.ipv4.conf.default.rp_filter = 1

      # Do not accept source routing
      net.ipv4.conf.default.accept_source_route = 0

      # Controls the System Request debugging functionality of the kernel
      kernel.sysrq = 0

      # Controls whether core dumps will append the PID to the core filename.
      # Useful for debugging multi-threaded applications.
      kernel.core_uses_pid = 1

      #Oracle
      kernel.core_uses_pid = 1
      kernel.shmall = 2097152
      kernel.shmmax = 8589934592
      kernel.shmmni = 4096
      kernel.sem = 250 32000 100 128
      fs.file-max = 658576
      net.ipv4.ip_local_port_range = 1024 65000
      net.core.rmem_default = 262144
      net.core.wmem_default = 262144
      net.core.rmem_max = 1048536
      net.core.wmem_max = 1048536


      Any help is really appreciated.

      thanks
      rk