Hi, Monty, On Feb 23, Michael Widenius wrote:
commit 367c87a34f8 Author: Michael Widenius <monty@mariadb.org> Date: Sun Feb 18 17:30:01 2024 +0200
diff --git a/include/my_bitmap.h b/include/my_bitmap.h --- a/include/my_bitmap.h +++ b/include/my_bitmap.h @@ -81,14 +79,15 @@ extern void bitmap_union(MY_BITMAP *map, const MY_BITMAP *map2); extern void bitmap_xor(MY_BITMAP *map, const MY_BITMAP *map2); extern void bitmap_invert(MY_BITMAP *map); extern void bitmap_copy(MY_BITMAP *map, const MY_BITMAP *map2); +extern void bitmap_copy_data(MY_BITMAP *map, const uchar *ptr, uint bits);
Why? In all cases where you use it, the source is a bitmap too. you can use a function that copies MY_BITMAP to MY_BITMAP.
extern uint bitmap_lock_set_next(MY_BITMAP *map); extern void bitmap_lock_clear_bit(MY_BITMAP *map, uint bitmap_bit); /* Fast, not thread safe, bitmap functions */ -#define bitmap_buffer_size(bits) (((bits)+31)/32)*4 +#define bitmap_buffer_size(bits) (((bits)+63)/64)*8
may be MY_ALIGN(bits, sizeof(my_bitmap_map)*8)*sizeof(my_bitmap_map) ? and the same below? less error-prone or, even better #define my_bitmap_map_bytes sizeof(my_bitmap_map) #define my_bitmap_map_bits (my_bitmap_map_bytes*8) #define bitmap_buffer_size(bits) MY_ALIGN(bits,my_bitmap_map_bits)*my_bitmap_map_bytes
#define no_bytes_in_map(map) (((map)->n_bits + 7)/8) -#define no_words_in_map(map) (((map)->n_bits + 31)/32) -#define bytes_word_aligned(bytes) (4*((bytes + 3)/4)) +#define no_words_in_map(map) (((map)->n_bits + 63)/64) +#define bytes_word_aligned(bytes) (8*((bytes + 7)/8)) /* The following functions must be compatible with create_last_word_mask()! */ static inline void bitmap_set_bit(MY_BITMAP *map,uint bit) diff --git a/mysys/my_bitmap.c b/mysys/my_bitmap.c --- a/mysys/my_bitmap.c +++ b/mysys/my_bitmap.c @@ -31,95 +42,82 @@ New version written and test program added and some changes to the interface was made by Mikael Ronstrom 2005, with assistance of Tomas Ulin and Mats Kindahl. + Updated to 64 bits and use my_find_first_bit() to speed up + bitmap_get_next_set() by Monty in 2024 */
#include "mysys_priv.h" #include <my_bitmap.h> #include <m_string.h> #include <my_bit.h> +#include <my_byteorder.h> + + +/* Defines to check bitmaps */ + +#define DBUG_ASSERT_BITMAP(M) \ + DBUG_ASSERT((M)->bitmap); \ + DBUG_ASSERT((M)->n_bits > 0); \ + DBUG_ASSERT((M)->last_word_ptr == (M)->bitmap + no_words_in_map(M)-1); \ + DBUG_ASSERT((*(M)->last_word_ptr & (M)->last_word_mask) == 0); + +#define DBUG_ASSERT_BITMAP_AND_BIT(M,B) \ + DBUG_ASSERT_BITMAP(M); \ + DBUG_ASSERT((B) < (M)->n_bits); + +#define DBUG_ASSERT_DIFFERENT_BITMAPS(M,N) \ + DBUG_ASSERT_BITMAP(M); \ + DBUG_ASSERT_BITMAP(N); + +#define DBUG_ASSERT_IDENTICAL_BITMAPS(M,N) \ + DBUG_ASSERT_BITMAP(M); \ + DBUG_ASSERT_BITMAP(N); \ + DBUG_ASSERT((M)->n_bits == (N)->n_bits);
/* - Create a mask with the upper 'unused' bits set and the lower 'used' - bits clear. The bits within each byte is stored in big-endian order. + Create a mask for the usable bits on the LAST my_bitmap_map position for + a bitmap with 'bits' number of bits. + + The lowest 'bits' bits are set to zero and the rest bits are set to 1. + For (bits & 63) == 0 , 0 is returned as in this case all bits in + the my_bitmap_position are significant + + This code assumes the underlying storage is a 64 bit ulonglong.
doesn't have to be if you'll use my_bitmap_map_bits
*/
-static inline uchar invers_last_byte_mask(uint bits) -{ - return last_byte_mask(bits) ^ 255; -} +static inline my_bitmap_map last_word_mask(uint bits) +{ + uint bits_in_last_map= (bits & 63); + return bits_in_last_map ? ~((1ULL << bits_in_last_map)-1) : 0ULL;
you sure a condition is faster than a linear code of few bit operations?
+}
+static inline my_bitmap_map first_word_mask(uint bits) +{ + uint bits_in_last_map= (bits & 63); + return ~((1ULL << bits_in_last_map)-1);
and no condition here. strange. and shouldn't one of these two functions be inverted? supposedly one masks all bits before the first and the other - all bits after the last.
+} -void create_last_word_mask(MY_BITMAP *map) -{ - unsigned char const mask= invers_last_byte_mask(map->n_bits); - - /* - The first bytes are to be set to zero since they represent real bits - in the bitvector. The last bytes are set to 0xFF since they represent - bytes not used by the bitvector. Finally the last byte contains bits - as set by the mask above. - */ - unsigned char *ptr= (unsigned char*)&map->last_word_mask; - - map->last_word_ptr= map->bitmap + no_words_in_map(map)-1; - switch (no_bytes_in_map(map) & 3) { - case 1: - map->last_word_mask= ~0U; - ptr[0]= mask; - return; - case 2: - map->last_word_mask= ~0U; - ptr[0]= 0; - ptr[1]= mask; - return; - case 3: - map->last_word_mask= 0U; - ptr[2]= mask; - ptr[3]= 0xFFU; - return; - case 0: - map->last_word_mask= 0U; - ptr[3]= mask; - return; - } -}
-static inline my_bitmap_map last_word_mask(uint bit) -{ - my_bitmap_map last_word_mask; - uint n_bits= bit + 1; - unsigned char const mask= invers_last_byte_mask(n_bits); - - /* - The first bytes are to be set to zero since they represent real bits - in the bitvector. The last bytes are set to 0xFF since they represent - bytes not used by the bitvector. Finally the last byte contains bits - as set by the mask above. - */ - unsigned char *ptr= (unsigned char*)&last_word_mask; - - switch ((n_bits + 7)/8 & 3) { - case 1: - last_word_mask= ~0U; - ptr[0]= mask; - break; - case 2: - last_word_mask= ~0U; - ptr[0]= 0; - ptr[1]= mask; - break; - case 3: - last_word_mask= 0U; - ptr[2]= mask; - ptr[3]= 0xFFU; - break; - case 0: - last_word_mask= 0U; - ptr[3]= mask; - break; - } - return last_word_mask; -} +/* + Update the bitmap's last_word_ptr and last_word_mask + Also ensure that the last world is all zero to make it + easy to find the next set bit. + + Note that if n_bits is 0, then last_word_ptr will point to + bitmap (safetly). The bitmap will not be usable for almost any operation.
ether "safety" or "safely"
+*/ + +void create_last_word_mask(MY_BITMAP *map) +{ + my_bitmap_map mask= last_word_mask(map->n_bits); + map->last_word_mask= mask; + map->last_word_ptr= map->bitmap + MY_MAX(no_words_in_map(map),1) -1; + if (map->n_bits > 0) + { + *map->last_word_ptr&= ~mask; /* Set not used bits to 0 */ + DBUG_ASSERT_BITMAP(map); + } +}
@@ -326,37 +312,45 @@ void bitmap_set_prefix(MY_BITMAP *map, uint prefix_size) } if ((d= no_bytes_in_map(map)-prefix_bytes)) memset(m, 0, d); + DBUG_ASSERT_BITMAP(map); }
+/** + Check if bitmap is a bitmap of prefix bits set in the beginning + + @param map bitmap + @param prefix_size number of bits that should be set. 0 is allowed. + + @return 1 Yes, prefix bits where set or prefix_size == 0. + @return 0 No +*/ + my_bool bitmap_is_prefix(const MY_BITMAP *map, uint prefix_size) { uint prefix_mask= last_byte_mask(prefix_size); uchar *m= (uchar*) map->bitmap; uchar *end_prefix= m+(prefix_size-1)/8; uchar *end; - DBUG_ASSERT(m); - DBUG_ASSERT(prefix_size <= map->n_bits);
/* Empty prefix is always true */ if (!prefix_size) return 1;
+ DBUG_ASSERT_BITMAP_AND_BIT(map, prefix_size-1); + while (m < end_prefix) if (*m++ != 0xff) return 0;
- end= ((uchar*) map->bitmap) + no_bytes_in_map(map) - 1; - if (m == end) - return ((*m & last_byte_mask(map->n_bits)) == prefix_mask); - + end= ((uchar*) map->last_word_ptr) + sizeof(*map->last_word_ptr)-1;
good, here you use sizeof(), not just 7.
if (*m != prefix_mask) return 0;
- while (++m < end) + while (++m <= end) if (*m != 0) return 0; - return ((*m & last_byte_mask(map->n_bits)) == 0); + return 1; }
@@ -447,50 +442,51 @@ void bitmap_intersect(MY_BITMAP *map, const MY_BITMAP *map2)
/* Check if there is some bit index between start_bit and end_bit, such that - this is bit is set for all bitmaps in bitmap_list. + this is atleast on bit that set for all bitmaps in bitmap_list.
"at least" and "one" and "that is set" ?
SYNOPSIS bitmap_exists_intersection() bitmpap_array [in] a set of MY_BITMAPs - bitmap_count [in] number of elements in bitmpap_array + bitmap_count [in] number of elements in bitmap_array start_bit [in] beginning (inclusive) of the range of bits to search end_bit [in] end (inclusive) of the range of bits to search, must be no bigger than the bits of the shortest bitmap.
- NOTES - This function assumes that for at least one of the bitmaps in bitmap_array all - bits outside the range [start_bit, end_bit] are 0. As a result is not - necessary to take care of the bits outside the range [start_bit, end_bit]. - RETURN TRUE if an intersecion exists FALSE no intersection */
-my_bool bitmap_exists_intersection(const MY_BITMAP **bitmap_array, +my_bool bitmap_exists_intersection(MY_BITMAP **bitmap_array, uint bitmap_count, uint start_bit, uint end_bit) { uint i, j, start_idx, end_idx; - my_bitmap_map cur_res; + my_bitmap_map cur_res, first_map;
DBUG_ASSERT(bitmap_count); DBUG_ASSERT(end_bit >= start_bit); for (j= 0; j < bitmap_count; j++) - DBUG_ASSERT(end_bit < bitmap_array[j]->n_bits); + { + DBUG_ASSERT_BITMAP_AND_BIT(bitmap_array[j], end_bit); + }
start_idx= start_bit/8/sizeof(my_bitmap_map); end_idx= end_bit/8/sizeof(my_bitmap_map);
+ first_map= first_word_mask(start_bit); + cur_res= first_map; for (i= start_idx; i < end_idx; i++) { - cur_res= ~0; for (j= 0; cur_res && j < bitmap_count; j++) cur_res &= bitmap_array[j]->bitmap[i]; if (cur_res) return TRUE; + cur_res= ~(my_bitmap_map) 0; } - cur_res= ~last_word_mask(end_bit); + cur_res= ~last_word_mask(end_bit+1); + if (start_idx == end_idx) + cur_res&= first_map; for (j= 0; cur_res && j < bitmap_count; j++) cur_res &= bitmap_array[j]->bitmap[end_idx]; return cur_res != 0; @@ -501,21 +497,26 @@ my_bool bitmap_exists_intersection(const MY_BITMAP **bitmap_array,
my_bool bitmap_union_is_set_all(const MY_BITMAP *map1, const MY_BITMAP *map2) { - my_bitmap_map *m1= map1->bitmap, *m2= map2->bitmap, *end; + my_bitmap_map *m1= map1->bitmap, *m2= map2->bitmap, *end= map1->last_word_ptr; + DBUG_ASSERT_IDENTICAL_BITMAPS(map1,map2);
- DBUG_ASSERT(map1->bitmap); - DBUG_ASSERT(map2->bitmap); - DBUG_ASSERT(map1->n_bits==map2->n_bits); - end= map1->last_word_ptr; while ( m1 < end) - if ((*m1++ | *m2++) != 0xFFFFFFFF) + if ((*m1++ | *m2++) != 0xFFFFFFFFFFFFFFFFULL)
better ~(my_bitmap_map)0
return FALSE; /* here both maps have the same number of bits - see assert above */ - return ((*m1 | *m2 | map1->last_word_mask) != 0xFFFFFFFF); + return ((*m1 | *m2 | map1->last_word_mask) != 0xFFFFFFFFFFFFFFFFULL); }
+#ifdef NOT_USED + +/* + If we re-introduce this one one, we should change the interface to + use_from_bit and not from_byte. We should also add testing for this + in bitmap-t.c +*/
just remove it, it's easier to write it from scratch again when needed then to undertand and fix this old dead code
+ /* Set/clear all bits above a bit.
@@ -603,45 +601,51 @@ uint bitmap_bits_set(const MY_BITMAP *map) my_bitmap_map *data_ptr= map->bitmap; my_bitmap_map *end= map->last_word_ptr; uint res= 0; - DBUG_ASSERT(map->bitmap); + DBUG_ASSERT_BITMAP(map);
- for (; data_ptr < end; data_ptr++) - res+= my_count_bits_uint32(*data_ptr); + for (; data_ptr <= end; data_ptr++) + res+= my_count_bits(*data_ptr);
- /*Reset last bits to zero*/ - res+= my_count_bits_uint32(*map->last_word_ptr & ~map->last_word_mask); return res; }
void bitmap_copy(MY_BITMAP *map, const MY_BITMAP *map2) { - my_bitmap_map *to= map->bitmap, *from= map2->bitmap, *end; - - DBUG_ASSERT(map->bitmap); - DBUG_ASSERT(map2->bitmap); - DBUG_ASSERT(map->n_bits == map2->n_bits); - end= map->last_word_ptr; + my_bitmap_map *to= map->bitmap, *from= map2->bitmap, *end= map->last_word_ptr; + DBUG_ASSERT_IDENTICAL_BITMAPS(map,map2);
while (to <= end) *to++ = *from++; }
+/* + Copy data into the bitmap from a byte array +*/ + +void bitmap_copy_data(MY_BITMAP *map, const uchar *ptr, uint bits)
not needed, as far as I understand
+{ + my_bitmap_map *to= map->bitmap; + uint length= (bits + 7)/8; + uint map_length= no_bytes_in_map(map); + DBUG_ASSERT_BITMAP_AND_BIT(map, bits-1); + + memcpy(to, ptr, length); + if (map_length != length) + bzero(to + length, map_length - length); + *map->last_word_ptr&= ~map->last_word_mask; +} + + uint bitmap_get_first_set(const MY_BITMAP *map) { - uint i; my_bitmap_map *data_ptr= map->bitmap, *end= map->last_word_ptr; + DBUG_ASSERT_BITMAP(map);
- DBUG_ASSERT(map->bitmap); - - for (i=0; data_ptr < end; data_ptr++, i++) + for (uint i=0; data_ptr <= end; data_ptr++, i++) if (*data_ptr) - goto found; - if (!(*data_ptr & ~map->last_word_mask)) - return MY_BIT_NONE; - -found: - return get_first_set(*data_ptr, i); + return my_find_first_bit(*data_ptr) + i * sizeof(my_bitmap_map)*8; + return MY_BIT_NONE; }
@@ -703,35 +696,18 @@ uint bitmap_get_next_set(const MY_BITMAP *map, uint bitmap_bit)
uint bitmap_get_first(const MY_BITMAP *map)
this should be named bitmap_get_first_clear(). because it's always is_bit_set/is_bit_clear, is_set_all/is_clear_all, test_and_set/test_and_clear, set_bit/clear_bit, etc. there is bitmap_get_first_set, so this one must be bitmap_get_first_clear
{ - uchar *byte_ptr; - uint i,j,k; - my_bitmap_map *data_ptr, *end= map->last_word_ptr; - - DBUG_ASSERT(map->bitmap); - data_ptr= map->bitmap; - *map->last_word_ptr|= map->last_word_mask; + uint i; + my_bitmap_map *data_ptr= map->bitmap, *end= map->last_word_ptr; + DBUG_ASSERT_BITMAP(map);
- for (i=0; data_ptr < end; data_ptr++, i++) - if (*data_ptr != 0xFFFFFFFF) + for (i= 0; data_ptr < end; data_ptr++, i++) + if (*data_ptr != 0xFFFFFFFFFFFFFFFFULL) goto found; - if ((*data_ptr | map->last_word_mask) == 0xFFFFFFFF) + if ((*data_ptr | map->last_word_mask) == 0xFFFFFFFFFFFFFFFFULL) return MY_BIT_NONE; - found: - byte_ptr= (uchar*)data_ptr; - for (j=0; ; j++, byte_ptr++) - { - if (*byte_ptr != 0xFF) - { - for (k=0; ; k++) - { - if (!(*byte_ptr & (1 << k))) - return (i*32) + (j*8) + k; - } - } - } - DBUG_ASSERT(0); - return MY_BIT_NONE; /* Impossible */ + /* find first zero bit by reverting all bits and find first bit */ + return my_find_first_bit(~*data_ptr) + i * sizeof(my_bitmap_map)*8; }
Regards, Sergei Chief Architect, MariaDB Server and security@mariadb.org