xemu/tests/qemu-iotests/060
Alberto Garcia 6bf45d59f9 qcow2: Prevent allocating refcount blocks at offset 0
Each entry in the qcow2 cache contains an offset field indicating the
location of the data in the qcow2 image. If the offset is 0 then it
means that the entry contains no data and is available to be used when
needed.

Because of that it is not possible to store in the cache the first
cluster of the qcow2 image (offset = 0). This is not a problem because
that cluster always contains the qcow2 header and we're not using this
cache for that.

However, if the qcow2 image is corrupted it can happen that we try to
allocate a new refcount block at offset 0, triggering this assertion
and crashing QEMU:

  qcow2_cache_entry_mark_dirty: Assertion `c->entries[i].offset != 0' failed

This patch adds an explicit check for this scenario and a new test
case.

This problem was originally reported here:

   https://bugs.launchpad.net/qemu/+bug/1728615

Reported-by: R.Nageswara Sastry <nasastry@in.ibm.com>
Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 92a2fadd10d58b423f269c1d1a309af161cdc73f.1509718618.git.berto@igalia.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
2017-11-14 18:06:25 +01:00

259 lines
9.1 KiB
Bash
Executable file

#!/bin/bash
#
# Test case for image corruption (overlapping data structures) in qcow2
#
# Copyright (C) 2013 Red Hat, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# creator
owner=mreitz@redhat.com
seq="$(basename $0)"
echo "QA output created by $seq"
here="$PWD"
status=1 # failure is the default!
_cleanup()
{
_cleanup_test_img
}
trap "_cleanup; exit \$status" 0 1 2 3 15
# get standard environment, filters and checks
. ./common.rc
. ./common.filter
# This tests qocw2-specific low-level functionality
_supported_fmt qcow2
_supported_proto file
_supported_os Linux
rt_offset=65536 # 0x10000 (XXX: just an assumption)
rb_offset=131072 # 0x20000 (XXX: just an assumption)
l1_offset=196608 # 0x30000 (XXX: just an assumption)
l2_offset=262144 # 0x40000 (XXX: just an assumption)
l2_offset_after_snapshot=524288 # 0x80000 (XXX: just an assumption)
IMGOPTS="compat=1.1"
OPEN_RW="open -o overlap-check=all $TEST_IMG"
# Overlap checks are done before write operations only, therefore opening an
# image read-only makes the overlap-check option irrelevant
OPEN_RO="open -r $TEST_IMG"
echo
echo "=== Testing L2 reference into L1 ==="
echo
_make_test_img 64M
# Link first L1 entry (first L2 table) onto itself
# (Note the MSb in the L1 entry is set, ensuring the refcount is one - else any
# later write will result in a COW operation, effectively ruining this attempt
# on image corruption)
poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x03\x00\x00"
_check_test_img
# The corrupt bit should not be set anyway
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
# Try to write something, thereby forcing the corrupt bit to be set
$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
# The corrupt bit must now be set
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
# This information should be available through qemu-img info
_img_info --format-specific
# Try to open the image R/W (which should fail)
$QEMU_IO -c "$OPEN_RW" -c "read 0 512" 2>&1 | _filter_qemu_io \
| _filter_testdir \
| _filter_imgfmt
# Try to open it RO (which should succeed)
$QEMU_IO -c "$OPEN_RO" -c "read 0 512" | _filter_qemu_io
# We could now try to fix the image, but this would probably fail (how should an
# L2 table linked onto the L1 table be fixed?)
echo
echo "=== Testing cluster data reference into refcount block ==="
echo
_make_test_img 64M
# Allocate L2 table
truncate -s "$(($l2_offset+65536))" "$TEST_IMG"
poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x00\x00"
# Mark cluster as used
poke_file "$TEST_IMG" "$(($rb_offset+8))" "\x00\x01"
# Redirect new data cluster onto refcount block
poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x02\x00\x00"
_check_test_img
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
# Try to fix it
_check_test_img -r all
# The corrupt bit should be cleared
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
# Look if it's really really fixed
$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
echo
echo "=== Testing cluster data reference into inactive L2 table ==="
echo
_make_test_img 64M
$QEMU_IO -c "$OPEN_RW" -c "write -P 1 0 512" | _filter_qemu_io
$QEMU_IMG snapshot -c foo "$TEST_IMG"
$QEMU_IO -c "$OPEN_RW" -c "write -P 2 0 512" | _filter_qemu_io
# The inactive L2 table remains at its old offset
poke_file "$TEST_IMG" "$l2_offset_after_snapshot" \
"\x80\x00\x00\x00\x00\x04\x00\x00"
_check_test_img
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
$QEMU_IO -c "$OPEN_RW" -c "write -P 3 0 512" | _filter_qemu_io
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
_check_test_img -r all
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
$QEMU_IO -c "$OPEN_RW" -c "write -P 4 0 512" | _filter_qemu_io
$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
# Check data
$QEMU_IO -c "$OPEN_RO" -c "read -P 4 0 512" | _filter_qemu_io
$QEMU_IMG snapshot -a foo "$TEST_IMG"
_check_test_img
$QEMU_IO -c "$OPEN_RO" -c "read -P 1 0 512" | _filter_qemu_io
echo
echo "=== Testing overlap while COW is in flight ==="
echo
# compat=0.10 is required in order to make the following discard actually
# unallocate the sector rather than make it a zero sector - we want COW, after
# all.
IMGOPTS='compat=0.10' _make_test_img 1G
# Write two clusters, the second one enforces creation of an L2 table after
# the first data cluster.
$QEMU_IO -c 'write 0k 64k' -c 'write 512M 64k' "$TEST_IMG" | _filter_qemu_io
# Discard the first cluster. This cluster will soon enough be reallocated and
# used for COW.
$QEMU_IO -c 'discard 0k 64k' "$TEST_IMG" | _filter_qemu_io
# Now, corrupt the image by marking the second L2 table cluster as free.
poke_file "$TEST_IMG" '131084' "\x00\x00" # 0x2000c
# Start a write operation requiring COW on the image stopping it right before
# doing the read; then, trigger the corruption prevention by writing anything to
# any unallocated cluster, leading to an attempt to overwrite the second L2
# table. Finally, resume the COW write and see it fail (but not crash).
echo "open -o file.driver=blkdebug $TEST_IMG
break cow_read 0
aio_write 0k 1k
wait_break 0
write 64k 64k
resume 0" | $QEMU_IO | _filter_qemu_io
echo
echo "=== Testing unallocated image header ==="
echo
_make_test_img 64M
# Create L1/L2
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
poke_file "$TEST_IMG" "$rb_offset" "\x00\x00"
$QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io
echo
echo "=== Testing unaligned L1 entry ==="
echo
_make_test_img 64M
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
# This will be masked with ~(512 - 1) = ~0x1ff, so whether the lower 9 bits are
# aligned or not does not matter
poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00"
$QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io
# Test how well zero cluster expansion can cope with this
_make_test_img 64M
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00"
$QEMU_IMG amend -o compat=0.10 "$TEST_IMG"
echo
echo "=== Testing unaligned L2 entry ==="
echo
_make_test_img 64M
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
$QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io
echo
echo "=== Testing unaligned pre-allocated zero cluster ==="
echo
_make_test_img 64M
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x01"
# zero cluster expansion
$QEMU_IMG amend -o compat=0.10 "$TEST_IMG"
echo
echo "=== Testing unaligned reftable entry ==="
echo
_make_test_img 64M
poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x02\x2a\x00"
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
echo
echo "=== Testing non-fatal corruption on freeing ==="
echo
_make_test_img 64M
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
$QEMU_IO -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io
echo
echo "=== Testing read-only corruption report ==="
echo
_make_test_img 64M
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
# Should only emit a single error message
$QEMU_IO -c "$OPEN_RO" -c "read 0 64k" -c "read 0 64k" | _filter_qemu_io
echo
echo "=== Testing non-fatal and then fatal corruption report ==="
echo
_make_test_img 64M
$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io
poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
poke_file "$TEST_IMG" "$(($l2_offset+8))" "\x80\x00\x00\x00\x00\x06\x2a\x00"
# Should emit two error messages
$QEMU_IO -c "discard 0 64k" -c "read 64k 64k" "$TEST_IMG" | _filter_qemu_io
echo
echo "=== Testing empty refcount table with valid L1 and L2 tables ==="
echo
_make_test_img 64M
$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00"
# Since the first data cluster is already allocated this triggers an
# allocation with an explicit offset (using qcow2_alloc_clusters_at())
# causing a refcount block to be allocated at offset 0
$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io
# success, all done
echo "*** done"
rm -f $seq.full
status=0