public inbox for [email protected]
 help / color / mirror / Atom feed
From: Dave Chinner <[email protected]>
To: John Garry <[email protected]>
Cc: [email protected], [email protected], [email protected], [email protected],
	[email protected], [email protected],
	[email protected], [email protected], [email protected],
	[email protected], [email protected], [email protected],
	[email protected], [email protected],
	[email protected], [email protected], [email protected],
	[email protected], [email protected],
	[email protected], [email protected],
	[email protected], [email protected],
	[email protected]
Subject: Re: [PATCH v4 05/11] block: Add core atomic write support
Date: Tue, 20 Feb 2024 09:58:39 +1100	[thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>

On Mon, Feb 19, 2024 at 01:01:03PM +0000, John Garry wrote:
> Add atomic write support as follows:
> diff --git a/block/blk-merge.c b/block/blk-merge.c
> index 74e9e775f13d..12a75a252ca2 100644
> --- a/block/blk-merge.c
> +++ b/block/blk-merge.c
> @@ -18,6 +18,42 @@
>  #include "blk-rq-qos.h"
>  #include "blk-throttle.h"
>  
> +static bool rq_straddles_atomic_write_boundary(struct request *rq,
> +					unsigned int front,
> +					unsigned int back)
> +{
> +	unsigned int boundary = queue_atomic_write_boundary_bytes(rq->q);
> +	unsigned int mask, imask;
> +	loff_t start, end;
> +
> +	if (!boundary)
> +		return false;
> +
> +	start = rq->__sector << SECTOR_SHIFT;
> +	end = start + rq->__data_len;
> +
> +	start -= front;
> +	end += back;
> +
> +	/* We're longer than the boundary, so must be crossing it */
> +	if (end - start > boundary)
> +		return true;
> +
> +	mask = boundary - 1;
> +
> +	/* start/end are boundary-aligned, so cannot be crossing */
> +	if (!(start & mask) || !(end & mask))
> +		return false;
> +
> +	imask = ~mask;
> +
> +	/* Top bits are different, so crossed a boundary */
> +	if ((start & imask) != (end & imask))
> +		return true;
> +
> +	return false;
> +}

I have no way of verifying this function is doing what it is
supposed to because it's function is undocumented. I have no idea
what the front/back variables are supposed to represent, and so no
clue if they are being applied properly.

That said, it's also applying unsigned 32 bit mask variables to
signed 64 bit quantities and trying to do things like "high bit changed"
checks on the 64 bit variable. This just smells like a future
source of "large offsets don't work like we expected!" bugs.

> diff --git a/block/blk-settings.c b/block/blk-settings.c
> index 06ea91e51b8b..176f26374abc 100644
> --- a/block/blk-settings.c
> +++ b/block/blk-settings.c
> @@ -59,6 +59,13 @@ void blk_set_default_limits(struct queue_limits *lim)
>  	lim->zoned = false;
>  	lim->zone_write_granularity = 0;
>  	lim->dma_alignment = 511;
> +	lim->atomic_write_hw_max_sectors = 0;
> +	lim->atomic_write_max_sectors = 0;
> +	lim->atomic_write_hw_boundary_sectors = 0;
> +	lim->atomic_write_hw_unit_min_sectors = 0;
> +	lim->atomic_write_unit_min_sectors = 0;
> +	lim->atomic_write_hw_unit_max_sectors = 0;
> +	lim->atomic_write_unit_max_sectors = 0;
>  }

Seems to me this function would do better to just

	memset(lim, 0, sizeof(*lim));

and then set all the non-zero fields.

>  
>  /**
> @@ -101,6 +108,44 @@ void blk_queue_bounce_limit(struct request_queue *q, enum blk_bounce bounce)
>  }
>  EXPORT_SYMBOL(blk_queue_bounce_limit);
>  
> +
> +/*
> + * Returns max guaranteed sectors which we can fit in a bio. For convenience of
> + * users, rounddown_pow_of_two() the return value.
> + *
> + * We always assume that we can fit in at least PAGE_SIZE in a segment, apart
> + * from first and last segments.
> + */
> +static unsigned int blk_queue_max_guaranteed_bio_sectors(
> +					struct queue_limits *limits,
> +					struct request_queue *q)
> +{
> +	unsigned int max_segments = min(BIO_MAX_VECS, limits->max_segments);
> +	unsigned int length;
> +
> +	length = min(max_segments, 2) * queue_logical_block_size(q);
> +	if (max_segments > 2)
> +		length += (max_segments - 2) * PAGE_SIZE;
> +
> +	return rounddown_pow_of_two(length >> SECTOR_SHIFT);
> +}
> +
> +static void blk_atomic_writes_update_limits(struct request_queue *q)
> +{
> +	struct queue_limits *limits = &q->limits;
> +	unsigned int max_hw_sectors =
> +		rounddown_pow_of_two(limits->max_hw_sectors);
> +	unsigned int unit_limit = min(max_hw_sectors,
> +		blk_queue_max_guaranteed_bio_sectors(limits, q));
> +
> +	limits->atomic_write_max_sectors =
> +		min(limits->atomic_write_hw_max_sectors, max_hw_sectors);
> +	limits->atomic_write_unit_min_sectors =
> +		min(limits->atomic_write_hw_unit_min_sectors, unit_limit);
> +	limits->atomic_write_unit_max_sectors =
> +		min(limits->atomic_write_hw_unit_max_sectors, unit_limit);
> +}
> +
>  /**
>   * blk_queue_max_hw_sectors - set max sectors for a request for this queue
>   * @q:  the request queue for the device
> @@ -145,6 +190,8 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto
>  				 limits->logical_block_size >> SECTOR_SHIFT);
>  	limits->max_sectors = max_sectors;
>  
> +	blk_atomic_writes_update_limits(q);
> +
>  	if (!q->disk)
>  		return;
>  	q->disk->bdi->io_pages = max_sectors >> (PAGE_SHIFT - 9);
> @@ -182,6 +229,62 @@ void blk_queue_max_discard_sectors(struct request_queue *q,
>  }
>  EXPORT_SYMBOL(blk_queue_max_discard_sectors);
>  
> +/**
> + * blk_queue_atomic_write_max_bytes - set max bytes supported by
> + * the device for atomic write operations.
> + * @q:  the request queue for the device
> + * @bytes: maximum bytes supported
> + */
> +void blk_queue_atomic_write_max_bytes(struct request_queue *q,
> +				      unsigned int bytes)
> +{
> +	q->limits.atomic_write_hw_max_sectors = bytes >> SECTOR_SHIFT;
> +	blk_atomic_writes_update_limits(q);
> +}
> +EXPORT_SYMBOL(blk_queue_atomic_write_max_bytes);

Ok, so this can silently set a limit that is different to what the
caller asked to have set?

How is the caller supposed to find this out if the smaller limit
that was set is not compatible with their configuration?

i.e. shouldn't this return an error if the requested size cannot
be set exactly as specified?

Same concern about the other limits that can be trimmed to be
smaller than requested.

-Dave.
-- 
Dave Chinner
[email protected]

  reply	other threads:[~2024-02-19 22:58 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-19 13:00 [PATCH v4 00/11] block atomic writes John Garry
2024-02-19 13:00 ` [PATCH v4 01/11] block: Pass blk_queue_get_max_sectors() a request pointer John Garry
2024-02-19 18:57   ` Keith Busch
2024-02-19 13:01 ` [PATCH v4 02/11] block: Call blkdev_dio_unaligned() from blkdev_direct_IO() John Garry
2024-02-19 18:57   ` Keith Busch
2024-02-20  8:31     ` John Garry
2024-02-20  6:54   ` Christoph Hellwig
2024-02-19 13:01 ` [PATCH v4 03/11] fs: Initial atomic write support John Garry
2024-02-19 19:16   ` David Sterba
2024-02-20  8:13     ` John Garry
2024-02-19 22:44   ` Dave Chinner
2024-02-20  9:52     ` John Garry
2024-02-24 18:16   ` Ritesh Harjani
2024-02-24 18:20     ` Ritesh Harjani
2024-02-26  8:58       ` John Garry
2024-02-26  9:13         ` Ritesh Harjani
2024-02-26  9:46           ` John Garry
2024-02-26  8:51     ` John Garry
2024-02-19 13:01 ` [PATCH v4 04/11] fs: Add initial atomic write support info to statx John Garry
2024-02-19 22:28   ` Dave Chinner
2024-02-20  9:40     ` John Garry
2024-02-20  8:20   ` Christoph Hellwig
2024-02-20  9:01     ` John Garry
2024-02-24 18:46   ` Ritesh Harjani
2024-02-26  9:07     ` John Garry
2024-02-19 13:01 ` [PATCH v4 05/11] block: Add core atomic write support John Garry
2024-02-19 22:58   ` Dave Chinner [this message]
2024-02-20  8:22     ` Christoph Hellwig
2024-02-20 10:01     ` John Garry
2024-02-25 12:09   ` Ritesh Harjani
2024-02-25 12:21     ` Ritesh Harjani
2024-02-26  9:23     ` John Garry
2024-02-19 13:01 ` [PATCH v4 06/11] block: Add atomic write support for statx John Garry
2024-02-20  8:29   ` Christoph Hellwig
2024-02-20  9:35     ` John Garry
2024-02-25 14:20   ` Ritesh Harjani
2024-02-26  9:36     ` John Garry
2024-02-19 13:01 ` [PATCH v4 07/11] block: Add fops atomic write support John Garry
2024-02-25 14:46   ` Ritesh Harjani
2024-02-26  9:46     ` John Garry
2024-02-19 13:01 ` [PATCH v4 08/11] scsi: sd: Atomic " John Garry
2024-02-19 13:01 ` [PATCH v4 09/11] scsi: scsi_debug: " John Garry
2024-02-20  7:12   ` Ojaswin Mujoo
2024-02-20  9:01     ` John Garry
2024-02-19 13:01 ` [PATCH v4 10/11] nvme: " John Garry
2024-02-19 19:21   ` Keith Busch
2024-02-20  6:55     ` Christoph Hellwig
2024-02-20  8:19       ` John Garry
2024-02-20  8:31   ` Christoph Hellwig
2024-02-20  8:50     ` John Garry
2024-02-19 13:01 ` [PATCH v4 11/11] nvme: Ensure atomic writes will be executed atomically John Garry

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox