Gigabyte GA-MA74GM-S2H - FreeBSD - 2011 archive

back to main FreeBSD page for this machine.

2011 archive.

2011-06-03: I upgraded to FreeBSD 8.2-stable, using csup and make world. dmesg output: normal, verbose.

root@kg-f2# uname -a
FreeBSD kg-f2.kg4.no 8.2-STABLE FreeBSD 8.2-STABLE #5: Fri Jun  3 17:20:39 CEST 2011     root@kg-f2.kg4.no:/usr/obj/usr/src/sys/GENERIC  amd64

After booting, I see this value:

root@kg-f2# sysctl vfs.zfs.arc_max
vfs.zfs.arc_max: 2921869312

based on advice from the freebsd-stable mailing list, I'm setting

vfs.zfs.arc_max="2048M"

in /boot/loader.conf. And rebooted again. After the reboot, I have:

root@kg-f2# sysctl vfs.zfs.arc_max
vfs.zfs.arc_max: 2147483648

Hopefully, this will make the machine stable. Various interesting information:

root@kg-f2# sysctl vfs.zfs.zio.use_uma
vfs.zfs.zio.use_uma: 0
root@kg-f2# sysctl hw.physmem hw.usermem hw.realmem
hw.physmem: 4141666304
hw.usermem: 4019376128
hw.realmem: 4966055936

and pciconf output:

root@kg-f2# pciconf -lv
hostb0@pci0:0:0:0:    class=0x060000 card=0x50001458 chip=0x79111002 rev=0x00 hdr=0x00
    vendor     = 'ATI Technologies Inc. / Advanced Micro Devices, Inc.'
    class      = bridge
    subclass   = HOST-PCI
pcib1@pci0:0:1:0:    class=0x060400 card=0x79121002 chip=0x79121002 rev=0x00 hdr=0x01
    vendor     = 'ATI Technologies Inc. / Advanced Micro Devices, Inc.'
    device     = 'RS690 PCI to PCI Bridge (Internal gfx)'
    class      = bridge
    subclass   = PCI-PCI
pcib2@pci0:0:4:0:    class=0x060400 card=0x50001458 chip=0x79141002 rev=0x00 hdr=0x01
    vendor     = 'ATI Technologies Inc. / Advanced Micro Devices, Inc.'
    class      = bridge
    subclass   = PCI-PCI
pcib3@pci0:0:6:0:    class=0x060400 card=0x50001458 chip=0x79161002 rev=0x00 hdr=0x01
    vendor     = 'ATI Technologies Inc. / Advanced Micro Devices, Inc.'
    device     = 'RS690 PCI to PCI Bridge (PCI Express Port 2)'
    class      = bridge
    subclass   = PCI-PCI
atapci0@pci0:0:17:0:    class=0x010601 card=0xb0021458 chip=0x43911002 rev=0x00 hdr=0x00
    vendor     = 'ATI Technologies Inc. / Advanced Micro Devices, Inc.'
    device     = 'SB700 SATA Controller [AHCI mode]'
    class      = mass storage
    subclass   = SATA
ohci0@pci0:0:18:0:    class=0x0c0310 card=0x50041458 chip=0x43971002 rev=0x00 hdr=0x00
    vendor     = 'ATI Technologies Inc. / Advanced Micro Devices, Inc.'
    device     = 'SB700 USB OHCI0 Controller'
    class      = serial bus
    subclass   = USB
ohci1@pci0:0:18:1:    class=0x0c0310 card=0x50041458 chip=0x43981002 rev=0x00 hdr=0x00
    vendor     = 'ATI Technologies Inc. / Advanced Micro Devices, Inc.'
    device     = 'Standard OpenHCD USB-Hostcontroller (SB700)'
    class      = serial bus
    subclass   = USB
ehci0@pci0:0:18:2:    class=0x0c0320 card=0x50041458 chip=0x43961002 rev=0x00 hdr=0x00
    vendor     = 'ATI Technologies Inc. / Advanced Micro Devices, Inc.'
    device     = 'SB700 USB EHCI Controller'
    class      = serial bus
    subclass   = USB
ohci2@pci0:0:19:0:    class=0x0c0310 card=0x50041458 chip=0x43971002 rev=0x00 hdr=0x00
    vendor     = 'ATI Technologies Inc. / Advanced Micro Devices, Inc.'
    device     = 'SB700 USB OHCI0 Controller'
    class      = serial bus
    subclass   = USB
ohci3@pci0:0:19:1:    class=0x0c0310 card=0x50041458 chip=0x43981002 rev=0x00 hdr=0x00
    vendor     = 'ATI Technologies Inc. / Advanced Micro Devices, Inc.'
    device     = 'Standard OpenHCD USB-Hostcontroller (SB700)'
    class      = serial bus
    subclass   = USB
ehci1@pci0:0:19:2:    class=0x0c0320 card=0x50041458 chip=0x43961002 rev=0x00 hdr=0x00
    vendor     = 'ATI Technologies Inc. / Advanced Micro Devices, Inc.'
    device     = 'SB700 USB EHCI Controller'
    class      = serial bus
    subclass   = USB
none0@pci0:0:20:0:    class=0x0c0500 card=0x43851458 chip=0x43851002 rev=0x3c hdr=0x00
    vendor     = 'ATI Technologies Inc. / Advanced Micro Devices, Inc.'
    device     = 'ATI SMBus (ATI RD600/RS600)'
    class      = serial bus
    subclass   = SMBus
atapci1@pci0:0:20:1:    class=0x01018a card=0x50021458 chip=0x439c1002 rev=0x00 hdr=0x00
    vendor     = 'ATI Technologies Inc. / Advanced Micro Devices, Inc.'
    device     = 'PATA 133 Controller (SB7xx)'
    class      = mass storage
    subclass   = ATA
none1@pci0:0:20:2:    class=0x040300 card=0xa0021458 chip=0x43831002 rev=0x00 hdr=0x00
    vendor     = 'ATI Technologies Inc. / Advanced Micro Devices, Inc.'
    device     = 'IXP SB600 High Definition Audio Controller'
    class      = multimedia
    subclass   = HDA
isab0@pci0:0:20:3:    class=0x060100 card=0x50011458 chip=0x439d1002 rev=0x00 hdr=0x00
    vendor     = 'ATI Technologies Inc. / Advanced Micro Devices, Inc.'
    device     = 'SB700 LPC host controller'
    class      = bridge
    subclass   = PCI-ISA
pcib4@pci0:0:20:4:    class=0x060401 card=0x00000000 chip=0x43841002 rev=0x00 hdr=0x01
    vendor     = 'ATI Technologies Inc. / Advanced Micro Devices, Inc.'
    device     = 'IXP SB600 PCI to PCI Bridge'
    class      = bridge
    subclass   = PCI-PCI
ohci4@pci0:0:20:5:    class=0x0c0310 card=0x50041458 chip=0x43991002 rev=0x00 hdr=0x00
    vendor     = 'ATI Technologies Inc. / Advanced Micro Devices, Inc.'
    device     = 'SB700 USB OHCI2 Controller'
    class      = serial bus
    subclass   = USB
hostb1@pci0:0:24:0:    class=0x060000 card=0x00000000 chip=0x12001022 rev=0x00 hdr=0x00
    vendor     = 'Advanced Micro Devices (AMD)'
    device     = '(Family 10h) Athlon64/Opteron/Sempron HyperTransport Technology Configuration'
    class      = bridge
    subclass   = HOST-PCI
hostb2@pci0:0:24:1:    class=0x060000 card=0x00000000 chip=0x12011022 rev=0x00 hdr=0x00
    vendor     = 'Advanced Micro Devices (AMD)'
    device     = '(Family 10h) Athlon64/Opteron/Sempron Address Map'
    class      = bridge
    subclass   = HOST-PCI
hostb3@pci0:0:24:2:    class=0x060000 card=0x00000000 chip=0x12021022 rev=0x00 hdr=0x00
    vendor     = 'Advanced Micro Devices (AMD)'
    device     = '(Family 10h) Athlon64/Opteron/Sempron DRAM Controller'
    class      = bridge
    subclass   = HOST-PCI
hostb4@pci0:0:24:3:    class=0x060000 card=0x00000000 chip=0x12031022 rev=0x00 hdr=0x00
    vendor     = 'Advanced Micro Devices (AMD)'
    device     = '(Family 10h) Athlon64/Opteron/Sempron Miscellaneous Control'
    class      = bridge
    subclass   = HOST-PCI
hostb5@pci0:0:24:4:    class=0x060000 card=0x00000000 chip=0x12041022 rev=0x00 hdr=0x00
    vendor     = 'Advanced Micro Devices (AMD)'
    device     = '(Family 10h) Athlon64/Opteron/Sempron Link Control'
    class      = bridge
    subclass   = HOST-PCI
vgapci0@pci0:1:5:0:    class=0x030000 card=0xd0001458 chip=0x796e1002 rev=0x00 hdr=0x00
    vendor     = 'ATI Technologies Inc. / Advanced Micro Devices, Inc.'
    device     = 'ATI RADEON 2100 (RS690)'
    class      = display
    subclass   = VGA
none2@pci0:1:5:2:    class=0x040300 card=0x79191458 chip=0x79191002 rev=0x00 hdr=0x00
    vendor     = 'ATI Technologies Inc. / Advanced Micro Devices, Inc.'
    device     = 'Radeon X1200 Series Audio Controller'
    class      = multimedia
    subclass   = HDA
siis0@pci0:2:0:0:    class=0x018000 card=0x35311095 chip=0x35311095 rev=0x01 hdr=0x00
    vendor     = 'Silicon Image Inc (Was: CMD Technology Inc)'
    device     = 'SiI 3531 SATA Controller'
    class      = mass storage
re0@pci0:3:0:0:    class=0x020000 card=0xe0001458 chip=0x816810ec rev=0x02 hdr=0x00
    vendor     = 'Realtek Semiconductor'
    device     = 'Gigabit Ethernet NIC(NDIS 6.0) (RTL8168/8111/8111c)'
    class      = network
    subclass   = ethernet

That's it.

2011-06-02: added dumpdev="AUTO" to /etc/rc.conf, and ran /etc/rc.d/dumpon start.

2011-06-02: Today the machine had a panic (this was some time after I had started a scrub (zpool scrub storage). Transcribed here:

panic: kmem_malloc(131072): kmem_map too small: 1324613632 total allocated
cpuid = 1
KDB: stack backtrace:
#0 0xffffffff805df92e at kdb_backtrace+0x5e
#1 0xffffffff805ada77 at panic+0x187
#2 0xffffffff80800190 at kmem_alloc+0
#3 0xffffffff807f7e0a at uma_large_malloc+0x4a
#4 0xffffffff8059aee7 at malloc+0xd7
#5 0xffffffff80ed6763 at vdev_queue_io_to_issue+0x1c3
#6 0xffffffff80ed68e9 at vdev_queue_io_done+0x99
#7 0xffffffff80ee6c9f at zio_vdev_io_done+0x7f
#8 0xffffffff80ee7237 at zio_execute+0x77
#9 0xffffffff80e872f3 at taskq_run_safe+0x13
#10 0xffffffff805ea984 at taskqueue_run+0xa4
#11 0xffffffff805eabf6 at taskqueue_thread_loop+0x46
#12 0xffffffff80584278 at fork_exit+0x118
#13 0xffffffff8087f2fe at fork_trampoline+0xe
Uptime: 109d19h47m1s
Cannot dump. Device not defined or unavailable.
Automatic reboot in 15 seconds - press a key on the console to abort

(the automatic reboot didn't happen - the machine hung here). No resonse from keyboard at all, so I just rebooted it. After reboot, I found out that the scrub wasn't complete:

root@kg-f2# zpool status storage
  pool: storage
 state: ONLINE
 scrub: scrub in progress for 307445734561825858h24m, 4.24% done, 307445734561825817h56m to go
config:

    NAME        STATE     READ WRITE CKSUM
    storage     ONLINE       0     0     0
      raidz1    ONLINE       0     0     0
        ad8     ONLINE       0     0     0
        ad10    ONLINE       0     0     0
        ad12    ONLINE       0     0     0
        ad14    ONLINE       0     0     0
        ada0    ONLINE       0     0     0

errors: No known data errors

We'll see how this goes. Well, the scrub completed without errors:

root@kg-f2# zpool status storage
  pool: storage
 state: ONLINE
 scrub: scrub completed after 307445734561825860h15m with 0 errors on Thu Jun  2 23:23:44 2011
config:

    NAME        STATE     READ WRITE CKSUM
    storage     ONLINE       0     0     0
      raidz1    ONLINE       0     0     0
        ad8     ONLINE       0     0     0
        ad10    ONLINE       0     0     0
        ad12    ONLINE       0     0     0
        ad14    ONLINE       0     0     0
        ada0    ONLINE       0     0     0

errors: No known data errors

Good.

2011-02-12: I stopped a scrub in progress on the storage pool after 142 hours:

root@kg-f2# zpool scrub -s storage
root@kg-f2# zpool status storage
  pool: storage
 state: ONLINE
status: One or more devices has experienced an unrecoverable error.  An
    attempt was made to correct the error.  Applications are unaffected.
action: Determine if the device needs to be replaced, and clear the errors
    using 'zpool clear' or replace the device with 'zpool replace'.
   see: http://www.sun.com/msg/ZFS-8000-9P
 scrub: scrub stopped after 142h24m with 0 errors on Sat Feb 12 16:08:26 2011
config:

    NAME        STATE     READ WRITE CKSUM
    storage     ONLINE       0     0     0
      raidz1    ONLINE       0     0     0
        ad8     ONLINE       0     0     0
        ad10    ONLINE       0     0     0
        ad12    ONLINE       0     0    73  4.54G repaired
        ad14    ONLINE       0     0     0
        ada0    ONLINE       0     0     0

errors: No known data errors

Time to replace ad12. First offline the disk in question:

root@kg-f2# zpool offline storage ad12
root@kg-f2# zpool status storage
  pool: storage
 state: DEGRADED
status: One or more devices has experienced an unrecoverable error.  An
    attempt was made to correct the error.  Applications are unaffected.
action: Determine if the device needs to be replaced, and clear the errors
    using 'zpool clear' or replace the device with 'zpool replace'.
   see: http://www.sun.com/msg/ZFS-8000-9P
 scrub: scrub stopped after 142h24m with 0 errors on Sat Feb 12 16:08:26 2011
config:

    NAME        STATE     READ WRITE CKSUM
    storage     DEGRADED     0     0     0
      raidz1    DEGRADED     0     0     0
        ad8     ONLINE       0     0     0
        ad10    ONLINE       0     0     0
        ad12    OFFLINE      0     0    73  4.54G repaired
        ad14    ONLINE       0     0     0
        ada0    ONLINE       0     0     0

errors: No known data errors

Then, physically replace the drive. Unfortunately, the drive is not seen after the replacement:

root@kg-f2# atacontrol list
ATA channel 0:
    Master:      no device present
    Slave:       no device present
ATA channel 2:
    Master:  ad4 <SAMSUNG HD252HJ/1AC01118> SATA revision 2.x
    Slave:       no device present
ATA channel 3:
    Master:  ad6 <SAMSUNG HD252HJ/1AC01118> SATA revision 2.x
    Slave:       no device present
ATA channel 4:
    Master:  ad8 <SAMSUNG HD103SJ/1AJ100E4> SATA revision 2.x
    Slave:       no device present
ATA channel 5:
    Master: ad10 <SAMSUNG HD103SJ/1AJ100E4> SATA revision 2.x
    Slave:       no device present
ATA channel 6:
    Master:      no device present
    Slave:       no device present
ATA channel 7:
    Master: ad14 <SAMSUNG HD103SJ/1AJ100E4> SATA revision 2.x
    Slave:       no device present
root@kg-f2#

It seems like atacontrol attach and / or reinit commands doesn't have an effect:

root@kg-f2# atacontrol attach ata6
Master:      no device present
Slave:       no device present
root@kg-f2# atacontrol reinit ata6
Master:      no device present
Slave:       no device present

Hmm, do I need to reboot the machine? Ok, a reboot made the drive show itself:

root@kg-f2# atacontrol list
ATA channel 0:
    Master:      no device present
    Slave:       no device present
ATA channel 2:
    Master:  ad4 <SAMSUNG HD252HJ/1AC01118> SATA revision 2.x
    Slave:       no device present
ATA channel 3:
    Master:  ad6 <SAMSUNG HD252HJ/1AC01118> SATA revision 2.x
    Slave:       no device present
ATA channel 4:
    Master:  ad8 <SAMSUNG HD103SJ/1AJ100E4> SATA revision 2.x
    Slave:       no device present
ATA channel 5:
    Master: ad10 <SAMSUNG HD103SJ/1AJ100E4> SATA revision 2.x
    Slave:       no device present
ATA channel 6:
    Master: ad12 <SAMSUNG HD103SJ/1AJ100E4> SATA revision 2.x
    Slave:       no device present
ATA channel 7:
    Master: ad14 <SAMSUNG HD103SJ/1AJ100E4> SATA revision 2.x
    Slave:       no device present

So I'll just replace it:

root@kg-f2# zpool replace storage ad12 ad12
root@kg-f2# zpool status storage
  pool: storage
 state: DEGRADED
status: One or more devices is currently being resilvered.  The pool will
    continue to function, possibly in a degraded state.
action: Wait for the resilver to complete.
 scrub: resilver in progress for 0h0m, 0.12% done, 3h47m to go
config:

    NAME            STATE     READ WRITE CKSUM
    storage         DEGRADED     0     0     0
      raidz1        DEGRADED     0     0     0
        ad8         ONLINE       0     0     0
        ad10        ONLINE       0     0     0
        replacing   DEGRADED     0     0     0
          ad12/old  OFFLINE      0     0     0
          ad12      ONLINE       0     0     0  997M resilvered
        ad14        ONLINE       0     0     0
        ada0        ONLINE       0     0     0

errors: No known data errors

We'll see how that turns out.