libubox/uhtbl.h

376 lines
12 KiB
C
Raw Normal View History

2010-10-13 21:17:51 +02:00
/**
* uhtbl - Generic coalesced hash table implementation
* Copyright (C) 2010 Steven Barth <steven@midlink.org>
* Copyright (C) 2010 John Crispin <blogic@openwrt.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
*
*/
/**
* uhtbl is a coalesced cellared generic hash table implementation with the aim
* to be both small in code size and heap memory requirements. This hash table
* uses a hybrid approach called coalesced addressing which means that pointers
* to other buckets will be used in the case of a collisions. In this case no
* linked lists have to be used and less allocation calls have to be done.
* To improve performance this hash table carries a cellar for collision
* handling which is an additional address area that lies behind the
* hash-addressable space.
*
* Overhead (on x86 32bit):
* Bookkeeping: 32 Bytes (per hash table)
* Metadata: 12 Bytes including pointer to key and keysize (per bucket)
*
*/
#ifndef UHTBL_H_
#define UHTBL_H_
/* compile-time configurables */
#ifndef UHTBL_PAYLOADFACTOR
#define UHTBL_PAYLOADFACTOR 0.86
#endif
#ifndef UHTBL_GROWFACTOR
#define UHTBL_GROWFACTOR 2
#endif
#ifndef UHTBL_MINIMUMSIZE
#define UHTBL_MINIMUMSIZE 16
#endif
#include <stdint.h>
/* Internal flags and values */
#define UHTBL_FLAG_OCCUPIED 0x01
#define UHTBL_FLAG_STRANGER 0x02
#define UHTBL_FLAG_WITHNEXT 0x04
#define UHTBL_FLAG_LOCALKEY 0x08
#define UHTBL_MAXIMUMSIZE 2147483648
/* Status codes */
#define UHTBL_OK 0
#define UHTBL_EINVAL -1
#define UHTBL_ENOMEM -2
#define UHTBL_ENOENT -3
/* API */
#if __GNUC__ >= 4
#ifndef UHTBL_API
#define UHTBL_API
#endif
#define UHTBL_INLINE static inline __attribute__((always_inline))
#else
#ifndef UHTBL_API
#define UHTBL_API
#endif
#define UHTBL_INLINE static inline
#endif
typedef union uhtbl_key uhtbl_key_t;
typedef struct uhtbl_head uhtbl_head_t;
typedef struct uhtbl_bucket uhtbl_bucket_t;
typedef struct uhtbl_config uhtbl_config_t;
typedef struct uhtbl uhtbl_t;
typedef uint32_t uhtbl_size_t;
typedef uhtbl_size_t(uhtbl_hash_t)(const void*, int len);
typedef void(uhtbl_gc_t)(void *bucket);
union uhtbl_key {
void *ptr;
long handle;
};
struct uhtbl_head {
uint8_t user;
uint8_t flags;
uint16_t keysize;
uhtbl_size_t next;
uhtbl_key_t key;
};
struct uhtbl_bucket {
uhtbl_head_t head;
};
struct uhtbl {
uint32_t bucketsize;
uhtbl_size_t size;
uhtbl_size_t used;
uhtbl_size_t payload;
uhtbl_size_t nextfree;
uhtbl_hash_t *fct_hash;
uhtbl_gc_t *fct_gc;
void *buckets;
};
/**
* uhtbl_init() - Initialize a hash table.
* @tbl: hash table
* @bucketsize: size of a bucket
* @sizehint: estimated maximum of needed buckets (optional)
* @fct_hash: hash function
* @fct_gc: bucket garbage collector (optional)
*
* Initializes a new hash table and preallocates memory.
*
* bucketsize is the size in Bytes each bucket will use but note the following:
* Each bucket needs to begin with a struct uhtbl_head_t that keeps its metadata
* in addition to the payload you want it to carry. You are advised to define a
* bucket struct with the first element being a uhtbl_head_t followed by your
* desired payload and pass the size of this struct to bucketsize.
*
* sizehint is a hint on how many distinct entries will be stored in the hash
* table. This will be used to preallocate space for the buckets and is useful
* if you know how many entries will be stored in the hash table as it avoids
* expensive rehashing cycles. sizehint should be a power of 2.
*
* fct_hash is the hash function used. It takes a constant void pointer and a
* integer as size parameter and returns an unsigned (32bit) int.
*
* fct_gc is the garbage collector for buckets. Every time a bucket gets unset
* or the hash table gets cleared or finalized the garbage collector function
* taking a pointer to a bucket will take care of doing any finalization for
* the buckets' payload and key data. You may use uhtbl_key() to get a reference
* to your key pointer or handle for deallocation or cleaning up any other
* references. There is an optionally selectable garbage collector that will
* take care of free()ing key pointers if your keys point to memory areas.
* You have to pass uhtbl_gc_key as fct_gc parameter to use it. You may also
* call this function in your custom garbage collector.
*
* WARNING: Your garbage collector function must otherwise not change the
* metadata in the uhtbl_head_t structure of the bucket else behaviour will be
* undefined for all subsequent actions.
*
*
* Example:
* struct mybucket {
* uhtbl_head_t head;
* int mypayload1;
* int mypayload2;
* }
*
* uhtbl_t table;
* uhtbl_init(&table, sizeof(struct mybucket), 32, MurmurHash2, NULL);
*
* Returns 0 on success or a negative error code.
*/
UHTBL_API int uhtbl_init(uhtbl_t *tbl, uint32_t bucketsize,
uhtbl_size_t sizehint, uhtbl_hash_t *fct_hash, uhtbl_gc_t *fct_gc);
/**
* uhtbl_get() - Get a bucket by its key.
* @tbl: hash table
* @key: key
* @len: length of key
*
* Finds and returns the bucket with a given key.
*
* Key can either be:
* 1. A pointer to a memory area then len is its length (must be < 64KB)
* 2. A NULL-pointer then len is a locally stored numerical key
*
*
* Example:
* struct mybucket *bucket;
* bucket = uhtbl_get(table, "foo", sizeof("foo"));
* printf("%i", bucket->mypayload1);
*
* bucket = uhtbl_get(table, NULL, 42);
* printf("%i", bucket->mypayload1);
*
* Returns the bucket or NULL if no bucket with given key was found.
*/
UHTBL_API void* uhtbl_get(uhtbl_t *tbl, const void *key, long len);
/**
* uhtbl_set() - Sets a bucket for given key.
* @tbl: hash table
* @key: key
* @len: length of key
*
* Sets a new bucket for the given key and returns a pointer to the bucket for
* you to assign your payload data. If there is already a bucket with that key
* it will be unset first.
*
* Key can either be:
* 1. A pointer to a memory area then len is its length (must be < 64KB)
* 2. A NULL-pointer then len is a locally stored numerical key
*
* NOTE: If key is a pointer memory management of it will be your business.
* You might want to use a garbage collection function (see uhtbl_init())
*
* NOTE: The payload area of your bucket is NOT initialized to zeroes.
*
* WARNING: Note the following side effects when setting previously unset keys:
* 1. A set may trigger several moving actions changing the order of buckets.
* 2. A set may trigger a rehashing cycle if all buckets are occupied.
* Therefore accessing any previously acquired pointers to any bucket results in
* undefined behaviour. In addition iterations which have started before may
* result in unwanted behaviour (e.g. buckets may be skipped or visited twice).
*
*
* Example:
* struct mybucket *bucket;
* bucket = uhtbl_set(table, "foo", sizeof("foo"));
* bucket->mypayload1 = 42:
*
* bucket = uhtbl_set(table, NULL, 42);
* bucket->mypayload1 = 1337;
*
*
* Returns the bucket or NULL if no bucket could be allocated (out of memory).
*/
UHTBL_API void* uhtbl_set(uhtbl_t *tbl, void *key, long len);
/**
* uhtbl_next() - Iterates over all entries of the hash table.
* @tbl: hash table
* @iter: Iteration counter
*
* Iterates over all entries of the hash table.
*
* iter is a pointer to a numeric variable that should be set to zero before
* the first call and will save the iteration state.
*
* NOTE: You may safely do several iterations in parallel. You may also safely
* unset any buckets of the hashtable or set keys that are currently in the
* hash table. However setting buckets with keys that don't have an assigned
* bucket yet results in undefined behaviour.
*
* Example:
* uint32_t iter = 0;
* struct mybucket *bucket;
* while ((bucket = uhtbl_next(table, &iter))) {
* printf("%i", bucket->mypayload1);
* }
*
* Return the next bucket or NULL if all buckets were already visited.
*/
UHTBL_API void* uhtbl_next(uhtbl_t *tbl, uhtbl_size_t *iter);
/**
* uhtbl_unset() - Unsets the bucket with given key.
* @tbl: hash table
* @key: key
* @len: length of key (optional)
*
* Unsets the bucket with given key and calls the garbage collector to free
* any payload resources - if any.
*
* Key can either be:
* 1. A pointer to a memory area then len is its length (must be < 64KB)
* 2. A NULL-pointer then len is a locally stored numerical key
*
* Example:
* uhtbl_unset(table, NULL, 42);
*
* Returns 0 on success or a negative error code if there was no matching bucket
*/
UHTBL_API int uhtbl_unset(uhtbl_t *tbl, const void *key, long len);
/**
* uhtbl_remove() - Unsets a bucket.
* @tbl: hash table
* @head: bucket head
*
* Unsets the bucket with given address and calls the garbage collector to free
* any payload resources - if any.
*
* Example:
* uhtbl_remove(table, &bucket->head);
*
* Returns 0 on success or a negative error code if the bucket was not found
*/
UHTBL_API int uhtbl_remove(uhtbl_t *tbl, uhtbl_head_t *head);
/**
* uhtbl_clear() - Clears the hashtable without freeing its memory.
* @tbl: hash table
*
* Clears all buckets of the hashtable invoking the garbage collector - if any
* but does not free the memory of the hash table. This is usually more
* efficient than iterating and using unset.
*
* Returns nothing.
*/
UHTBL_API void uhtbl_clear(uhtbl_t *tbl);
/**
* uhtbl_resize() - Resizes and rehashes the hash table.
* @tbl: hash table
* @payload: Buckets to reserve.
*
* Resizes the hash table and rehashes its entries.
*
* payload is the number of buckets the hashtable should allocate. It must be
* greater or at least equal to the number of buckets currently occupied.
*
* NOTE: Rehashing is an expensive process which should be avoided if possible.
* However resizing will be automatically done if you try to set a new bucket
* but all buckets are already occupied. In this case the payload bucket count
* is usually doubled. There is currently no automatic resizing done when the
* bucket usage count decreases. You have to take care of this by yourself.
*
* Returns 0 on success or a negative error code if out of memory.
*/
UHTBL_API int uhtbl_resize(uhtbl_t *tbl, uhtbl_size_t payload);
/**
* uhtbl_clear() - Clears the hashtable and frees the bucket memory.
* @tbl: hash table
*
* Clears all buckets of the hashtable invoking the garbage collector - if any
* and frees the memory occupied by the buckets.
*
* Returns nothing.
*/
UHTBL_API void uhtbl_finalize(uhtbl_t *tbl);
/**
* uhtbl_key() - Returns the key parameters as passed to set.
* @head: Bucket head
* @key: Pointer where key pointer should be stored (optional)
* @len: Pointer where key len should be stored (optional)
*
* This function might be useful to obtain the key parameters of a bucket
* when doing garbage collection.
*
* Returns nothing.
*/
UHTBL_API void uhtbl_key(uhtbl_head_t *head, void **key, long *len);
/**
* uhtbl_gc_key() - Basic garbage collector that frees key memory.
* @bucket: Bucket
*
* This function is a basic garbage collector that will free any key pointers.
* However it will not touch your payload data.
*
* WARNING: You must not call this function directly on any bucket otherwise
* behaviour will be unspecified. Instead you may pass this function to the
* uhtbl_init function. You may also call this function from inside a custom
* garbage collector.
*
* Returns nothing.
*/
UHTBL_API void uhtbl_gc_key(void *bucket);
#endif /* UHTBL_H_ */