kanidm/kanidmd/idm/src/be/idl_arc_sqlite.rs

use std::collections::BTreeSet;
use std::convert::TryInto;
use std::ops::DerefMut;
use std::sync::Arc;
use std::time::Duration;

use concread::arcache::{ARCache, ARCacheBuilder, ARCacheReadTxn, ARCacheWriteTxn};
use concread::cowcell::*;
use hashbrown::HashMap;
use idlset::v2::IDLBitRange;
use idlset::AndNot;
use kanidm_proto::v1::{ConsistencyError, OperationError};
use tracing::trace;
use uuid::Uuid;

use crate::be::idl_sqlite::{
    IdlSqlite, IdlSqliteReadTransaction, IdlSqliteTransaction, IdlSqliteWriteTransaction,
};
use crate::be::idxkey::{
    IdlCacheKey, IdlCacheKeyRef, IdlCacheKeyToRef, IdxKey, IdxKeyRef, IdxKeyToRef, IdxSlope,
};
use crate::be::{BackendConfig, IdList, IdRawEntry};
use crate::entry::{Entry, EntryCommitted, EntrySealed};
use crate::prelude::*;
use crate::value::{IndexType, Value};

// use std::borrow::Borrow;

// Appears to take about ~500MB on some stress tests
const DEFAULT_CACHE_TARGET: usize = 2048;
const DEFAULT_IDL_CACHE_RATIO: usize = 32;
const DEFAULT_NAME_CACHE_RATIO: usize = 8;
const DEFAULT_CACHE_RMISS: usize = 0;
const DEFAULT_CACHE_WMISS: usize = 4;

#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
enum NameCacheKey {
    Name2Uuid(String),
    Uuid2Rdn(Uuid),
    Uuid2Spn(Uuid),
}

#[derive(Debug, Clone)]
enum NameCacheValue {
    U(Uuid),
    R(String),
    S(Box<Value>),
}

pub struct IdlArcSqlite {
    db: IdlSqlite,
    entry_cache: ARCache<u64, Arc<EntrySealedCommitted>>,
    idl_cache: ARCache<IdlCacheKey, Box<IDLBitRange>>,
    name_cache: ARCache<NameCacheKey, NameCacheValue>,
    op_ts_max: CowCell<Option<Duration>>,
    allids: CowCell<IDLBitRange>,
    maxid: CowCell<u64>,
}

pub struct IdlArcSqliteReadTransaction<'a> {
    db: IdlSqliteReadTransaction,
    entry_cache: ARCacheReadTxn<'a, u64, Arc<EntrySealedCommitted>, ()>,
    idl_cache: ARCacheReadTxn<'a, IdlCacheKey, Box<IDLBitRange>, ()>,
    name_cache: ARCacheReadTxn<'a, NameCacheKey, NameCacheValue, ()>,
    allids: CowCellReadTxn<IDLBitRange>,
}

pub struct IdlArcSqliteWriteTransaction<'a> {
    db: IdlSqliteWriteTransaction,
    entry_cache: ARCacheWriteTxn<'a, u64, Arc<EntrySealedCommitted>, ()>,
    idl_cache: ARCacheWriteTxn<'a, IdlCacheKey, Box<IDLBitRange>, ()>,
    name_cache: ARCacheWriteTxn<'a, NameCacheKey, NameCacheValue, ()>,
    op_ts_max: CowCellWriteTxn<'a, Option<Duration>>,
    allids: CowCellWriteTxn<'a, IDLBitRange>,
    maxid: CowCellWriteTxn<'a, u64>,
}

macro_rules! get_identry {
    (
        $self:expr,
        $idl:expr,
        $is_read_op:expr
    ) => {{
        let mut result: Vec<Arc<EntrySealedCommitted>> = Vec::new();
        match $idl {
            IdList::Partial(idli) | IdList::PartialThreshold(idli) | IdList::Indexed(idli) => {
                let mut nidl = IDLBitRange::new();

                idli.into_iter().for_each(|i| {
                    // For all the id's in idl.
                    // is it in the cache?
                    match $self.entry_cache.get(&i) {
                        Some(eref) => result.push(eref.clone()),
                        None => unsafe { nidl.push_id(i) },
                    }
                });

                if !nidl.is_empty() {
                    // Now, get anything from nidl that is needed.
                    let mut db_result = $self.db.get_identry(&IdList::Partial(nidl))?;
                    // Clone everything from db_result into the cache.
                    if $is_read_op {
                        db_result.iter().for_each(|e| {
                            $self.entry_cache.insert(e.get_id(), e.clone());
                        });
                    }
                    // Merge the two vecs
                    result.append(&mut db_result);
                }
            }
            IdList::AllIds => {
                // VERY similar to above, but we skip adding the entries to the cache
                // on miss to prevent scan/invalidation attacks.
                let idli = (*$self.allids).clone();
                let mut nidl = IDLBitRange::new();

                (&idli)
                    .into_iter()
                    .for_each(|i| match $self.entry_cache.get(&i) {
                        Some(eref) => result.push(eref.clone()),
                        None => unsafe { nidl.push_id(i) },
                    });

                if !nidl.is_empty() {
                    // Now, get anything from nidl that is needed.
                    let mut db_result = $self.db.get_identry(&IdList::Partial(nidl))?;
                    // Merge the two vecs
                    result.append(&mut db_result);
                }
            }
        };
        // Return
        Ok(result)
    }};
}

macro_rules! get_identry_raw {
    (
        $self:expr,
        $idl:expr
    ) => {{
        // As a cache we have no concept of this, so we just bypass to the db.
        $self.db.get_identry_raw($idl)
    }};
}

macro_rules! exists_idx {
    (
        $self:expr,
        $attr:expr,
        $itype:expr
    ) => {{
        // As a cache we have no concept of this, so we just bypass to the db.
        $self.db.exists_idx($attr, $itype)
    }};
}

macro_rules! get_idl {
    (
        $self:expr,
        $attr:expr,
        $itype:expr,
        $idx_key:expr
    ) => {{
                // SEE ALSO #259: Find a way to implement borrow for this properly.
                // I don't think this is possible. When we make this dyn, the arc
                // needs the dyn trait to be sized so that it *could* claim a clone
                // for hit tracking reasons. That also means that we need From and
                // some other traits that just seem incompatible. And in the end,
                // we clone a few times in arc, and if we miss we need to insert anyway
                //
                // So the best path could be to replace IdlCacheKey with a compressed
                // or smaller type. Perhaps even a small cache of the IdlCacheKeys that
                // are allocated to reduce some allocs? Probably over thinking it at
                // this point.
                //
                // First attempt to get from this cache.
                let cache_key = IdlCacheKeyRef {
                    a: $attr,
                    i: $itype,
                    k: $idx_key,
                };
                let cache_r = $self.idl_cache.get(&cache_key as &dyn IdlCacheKeyToRef);
                // If hit, continue.
                if let Some(ref data) = cache_r {
                    trace!(
                        cached_index = ?$itype,
                        attr = ?$attr,
                        idl = %data,
                    );
                    return Ok(Some(data.as_ref().clone()));
                }
                // If miss, get from db *and* insert to the cache.
                let db_r = $self.db.get_idl($attr, $itype, $idx_key)?;
                if let Some(ref idl) = db_r {
                    let ncache_key = IdlCacheKey {
                        a: $attr.into(),
                        i: $itype.clone(),
                        k: $idx_key.into(),
                    };
                    $self.idl_cache.insert(ncache_key, Box::new(idl.clone()))
                }
                Ok(db_r)
    }};
}

macro_rules! name2uuid {
    (
        $self:expr,
        $name:expr
    ) => {{
        let cache_key = NameCacheKey::Name2Uuid($name.to_string());
        let cache_r = $self.name_cache.get(&cache_key);
        if let Some(NameCacheValue::U(uuid)) = cache_r {
            trace!(?uuid, "Got cached name2uuid");
            return Ok(Some(uuid.clone()));
        } else {
            trace!("Cache miss uuid for name2uuid");
        }

        let db_r = $self.db.name2uuid($name)?;
        if let Some(uuid) = db_r {
            $self
                .name_cache
                .insert(cache_key, NameCacheValue::U(uuid.clone()))
        }
        Ok(db_r)
    }};
}

macro_rules! uuid2spn {
    (
        $self:expr,
        $uuid:expr
    ) => {{
        let cache_key = NameCacheKey::Uuid2Spn($uuid);
        let cache_r = $self.name_cache.get(&cache_key);
        if let Some(NameCacheValue::S(ref spn)) = cache_r {
            trace!(?spn, "Got cached uuid2spn");
            return Ok(Some(spn.as_ref().clone()));
        } else {
            trace!("Cache miss spn for uuid2spn");
        }

        let db_r = $self.db.uuid2spn($uuid)?;
        if let Some(ref data) = db_r {
            $self
                .name_cache
                .insert(cache_key, NameCacheValue::S(Box::new(data.clone())))
        }
        Ok(db_r)
    }};
}

macro_rules! uuid2rdn {
    (
        $self:expr,
        $uuid:expr
    ) => {{
        let cache_key = NameCacheKey::Uuid2Rdn($uuid);
        let cache_r = $self.name_cache.get(&cache_key);
        if let Some(NameCacheValue::R(ref rdn)) = cache_r {
            return Ok(Some(rdn.clone()));
        } else {
            trace!("Cache miss rdn for uuid2rdn");
        }

        let db_r = $self.db.uuid2rdn($uuid)?;
        if let Some(ref data) = db_r {
            $self
                .name_cache
                .insert(cache_key, NameCacheValue::R(data.clone()))
        }
        Ok(db_r)
    }};
}

macro_rules! verify {
    (
        $self:expr
    ) => {{
        let mut r = $self.db.verify();
        if r.is_empty() && !$self.is_dirty() {
            // Check allids.
            match $self.db.get_allids() {
                Ok(db_allids) => {
                    if !db_allids.is_compressed() || !(*($self).allids).is_compressed() {
                        admin_warn!("Inconsistent ALLIDS compression state");
                        r.push(Err(ConsistencyError::BackendAllIdsSync))
                    }
                    if db_allids != (*($self).allids) {
                        // might want to redo how large key-values are formatted considering what this could look like
                        admin_warn!(
                            db_allids = ?(&db_allids).andnot(&($self).allids),
                            arc_allids = ?(&(*($self).allids)).andnot(&db_allids),
                            "Inconsistent ALLIDS set"
                        );
                        r.push(Err(ConsistencyError::BackendAllIdsSync))
                    }
                }
                Err(_) => r.push(Err(ConsistencyError::Unknown)),
            };
        };
        r
    }};
}

pub trait IdlArcSqliteTransaction {
    fn get_identry(
        &mut self,
        idl: &IdList,
    ) -> Result<Vec<Arc<EntrySealedCommitted>>, OperationError>;

    fn get_identry_raw(&self, idl: &IdList) -> Result<Vec<IdRawEntry>, OperationError>;

    fn exists_idx(&mut self, attr: &str, itype: IndexType) -> Result<bool, OperationError>;

    fn get_idl(
        &mut self,
        attr: &str,
        itype: IndexType,
        idx_key: &str,
    ) -> Result<Option<IDLBitRange>, OperationError>;

    fn get_db_s_uuid(&self) -> Result<Option<Uuid>, OperationError>;

    fn get_db_d_uuid(&self) -> Result<Option<Uuid>, OperationError>;

    fn get_db_ts_max(&self) -> Result<Option<Duration>, OperationError>;

    fn verify(&self) -> Vec<Result<(), ConsistencyError>>;

    fn is_dirty(&self) -> bool;

    fn name2uuid(&mut self, name: &str) -> Result<Option<Uuid>, OperationError>;

    fn uuid2spn(&mut self, uuid: Uuid) -> Result<Option<Value>, OperationError>;

    fn uuid2rdn(&mut self, uuid: Uuid) -> Result<Option<String>, OperationError>;

    fn list_idxs(&self) -> Result<Vec<String>, OperationError>;

    fn list_id2entry(&self) -> Result<Vec<(u64, String)>, OperationError>;

    fn list_index_content(
        &self,
        index_name: &str,
    ) -> Result<Vec<(String, IDLBitRange)>, OperationError>;

    fn get_id2entry(&self, id: u64) -> Result<(u64, String), OperationError>;
}

impl<'a> IdlArcSqliteTransaction for IdlArcSqliteReadTransaction<'a> {
    fn get_identry(
        &mut self,
        idl: &IdList,
    ) -> Result<Vec<Arc<EntrySealedCommitted>>, OperationError> {
        get_identry!(self, idl, true)
    }

    fn get_identry_raw(&self, idl: &IdList) -> Result<Vec<IdRawEntry>, OperationError> {
        get_identry_raw!(self, idl)
    }

    fn exists_idx(&mut self, attr: &str, itype: IndexType) -> Result<bool, OperationError> {
        exists_idx!(self, attr, itype)
    }

    fn get_idl(
        &mut self,
        attr: &str,
        itype: IndexType,
        idx_key: &str,
    ) -> Result<Option<IDLBitRange>, OperationError> {
        get_idl!(self, attr, itype, idx_key)
    }

    fn get_db_s_uuid(&self) -> Result<Option<Uuid>, OperationError> {
        self.db.get_db_s_uuid()
    }

    fn get_db_d_uuid(&self) -> Result<Option<Uuid>, OperationError> {
        self.db.get_db_d_uuid()
    }

    fn get_db_ts_max(&self) -> Result<Option<Duration>, OperationError> {
        self.db.get_db_ts_max()
    }

    fn verify(&self) -> Vec<Result<(), ConsistencyError>> {
        verify!(self)
    }

    fn is_dirty(&self) -> bool {
        false
    }

    fn name2uuid(&mut self, name: &str) -> Result<Option<Uuid>, OperationError> {
        name2uuid!(self, name)
    }

    fn uuid2spn(&mut self, uuid: Uuid) -> Result<Option<Value>, OperationError> {
        uuid2spn!(self, uuid)
    }

    fn uuid2rdn(&mut self, uuid: Uuid) -> Result<Option<String>, OperationError> {
        uuid2rdn!(self, uuid)
    }

    fn list_idxs(&self) -> Result<Vec<String>, OperationError> {
        // This is only used in tests or debug tools, so bypass the cache.
        self.db.list_idxs()
    }

    fn list_id2entry(&self) -> Result<Vec<(u64, String)>, OperationError> {
        // This is only used in tests or debug tools, so bypass the cache.
        self.db.list_id2entry()
    }

    fn list_index_content(
        &self,
        index_name: &str,
    ) -> Result<Vec<(String, IDLBitRange)>, OperationError> {
        // This is only used in tests or debug tools, so bypass the cache.
        self.db.list_index_content(index_name)
    }

    fn get_id2entry(&self, id: u64) -> Result<(u64, String), OperationError> {
        // This is only used in tests or debug tools, so bypass the cache.
        self.db.get_id2entry(id)
    }
}

impl<'a> IdlArcSqliteTransaction for IdlArcSqliteWriteTransaction<'a> {
    fn get_identry(
        &mut self,
        idl: &IdList,
    ) -> Result<Vec<Arc<EntrySealedCommitted>>, OperationError> {
        get_identry!(self, idl, false)
    }

    fn get_identry_raw(&self, idl: &IdList) -> Result<Vec<IdRawEntry>, OperationError> {
        get_identry_raw!(self, idl)
    }

    fn exists_idx(&mut self, attr: &str, itype: IndexType) -> Result<bool, OperationError> {
        exists_idx!(self, attr, itype)
    }

    fn get_idl(
        &mut self,
        attr: &str,
        itype: IndexType,
        idx_key: &str,
    ) -> Result<Option<IDLBitRange>, OperationError> {
        get_idl!(self, attr, itype, idx_key)
    }

    fn get_db_s_uuid(&self) -> Result<Option<Uuid>, OperationError> {
        self.db.get_db_s_uuid()
    }

    fn get_db_d_uuid(&self) -> Result<Option<Uuid>, OperationError> {
        self.db.get_db_d_uuid()
    }

    fn get_db_ts_max(&self) -> Result<Option<Duration>, OperationError> {
        match *self.op_ts_max {
            Some(ts) => Ok(Some(ts)),
            None => self.db.get_db_ts_max(),
        }
    }

    fn verify(&self) -> Vec<Result<(), ConsistencyError>> {
        verify!(self)
    }

    fn is_dirty(&self) -> bool {
        self.entry_cache.is_dirty()
    }

    fn name2uuid(&mut self, name: &str) -> Result<Option<Uuid>, OperationError> {
        name2uuid!(self, name)
    }

    fn uuid2spn(&mut self, uuid: Uuid) -> Result<Option<Value>, OperationError> {
        uuid2spn!(self, uuid)
    }

    fn uuid2rdn(&mut self, uuid: Uuid) -> Result<Option<String>, OperationError> {
        uuid2rdn!(self, uuid)
    }

    fn list_idxs(&self) -> Result<Vec<String>, OperationError> {
        // This is only used in tests or debug tools, so bypass the cache.
        self.db.list_idxs()
    }

    fn list_id2entry(&self) -> Result<Vec<(u64, String)>, OperationError> {
        // This is only used in tests or debug tools, so bypass the cache.
        self.db.list_id2entry()
    }

    fn list_index_content(
        &self,
        index_name: &str,
    ) -> Result<Vec<(String, IDLBitRange)>, OperationError> {
        // This is only used in tests or debug tools, so bypass the cache.
        self.db.list_index_content(index_name)
    }

    fn get_id2entry(&self, id: u64) -> Result<(u64, String), OperationError> {
        // This is only used in tests or debug tools, so bypass the cache.
        self.db.get_id2entry(id)
    }
}

impl<'a> IdlArcSqliteWriteTransaction<'a> {
    #[instrument(level = "debug", name = "idl_arc_sqlite::commit", skip_all)]
    pub fn commit(self) -> Result<(), OperationError> {
        let IdlArcSqliteWriteTransaction {
            db,
            mut entry_cache,
            mut idl_cache,
            mut name_cache,
            op_ts_max,
            allids,
            maxid,
        } = self;

        // Write any dirty items to the disk.
        entry_cache
            .iter_mut_mark_clean()
            .try_for_each(|(k, v)| match v {
                Some(e) => db.write_identry(e),
                None => db.delete_identry(*k),
            })
            .map_err(|e| {
                admin_error!(?e, "Failed to sync entry cache to sqlite");
                e
            })?;

        idl_cache
            .iter_mut_mark_clean()
            .try_for_each(|(k, v)| {
                match v {
                    Some(idl) => db.write_idl(k.a.as_str(), k.i, k.k.as_str(), idl),
                    #[allow(clippy::unreachable)]
                    None => {
                        // Due to how we remove items, we always write an empty idl
                        // to the cache, so this should never be none.
                        //
                        // If it is none, this means we have memory corruption so we MUST
                        // panic.
                        // Why is `v` the `Option` type then?
                        unreachable!();
                    }
                }
            })
            .map_err(|e| {
                admin_error!(?e, "Failed to sync idl cache to sqlite");
                e
            })?;

        name_cache
            .iter_mut_mark_clean()
            .try_for_each(|(k, v)| match (k, v) {
                (NameCacheKey::Name2Uuid(k), Some(NameCacheValue::U(v))) => {
                    db.write_name2uuid_add(k, *v)
                }
                (NameCacheKey::Name2Uuid(k), None) => db.write_name2uuid_rem(k),
                (NameCacheKey::Uuid2Spn(uuid), Some(NameCacheValue::S(v))) => {
                    db.write_uuid2spn(*uuid, Some(v))
                }
                (NameCacheKey::Uuid2Spn(uuid), None) => db.write_uuid2spn(*uuid, None),
                (NameCacheKey::Uuid2Rdn(uuid), Some(NameCacheValue::R(v))) => {
                    db.write_uuid2rdn(*uuid, Some(v))
                }
                (NameCacheKey::Uuid2Rdn(uuid), None) => db.write_uuid2rdn(*uuid, None),

                _ => Err(OperationError::InvalidCacheState),
            })
            .map_err(|e| {
                admin_error!(?e, "Failed to sync name cache to sqlite");
                e
            })?;

        // Undo the caches in the reverse order.
        db.commit().map(|()| {
            op_ts_max.commit();
            name_cache.commit();
            idl_cache.commit();
            entry_cache.commit();
            allids.commit();
            maxid.commit();
        })
    }

    pub fn get_id2entry_max_id(&self) -> Result<u64, OperationError> {
        Ok(*self.maxid)
    }

    pub fn set_id2entry_max_id(&mut self, mid: u64) {
        assert!(mid > *self.maxid);
        *self.maxid = mid;
    }

    pub fn write_identries<'b, I>(&'b mut self, mut entries: I) -> Result<(), OperationError>
    where
        I: Iterator<Item = &'b Entry<EntrySealed, EntryCommitted>>,
    {
        entries.try_for_each(|e| {
            trace!("Inserting {:?} to cache", e.get_id());
            if e.get_id() == 0 {
                Err(OperationError::InvalidEntryId)
            } else {
                (*self.allids).insert_id(e.get_id());
                self.entry_cache
                    .insert_dirty(e.get_id(), Arc::new(e.clone()));
                Ok(())
            }
        })
    }

    pub fn write_identries_raw<I>(&mut self, entries: I) -> Result<(), OperationError>
    where
        I: Iterator<Item = IdRawEntry>,
    {
        // Drop the entry cache.
        self.entry_cache.clear();
        // Write the raw ents
        self.db
            .write_identries_raw(entries)
            .and_then(|()| self.db.get_allids())
            .map(|mut ids| {
                // Update allids since we cleared them and need to reset it in the cache.
                std::mem::swap(self.allids.deref_mut(), &mut ids);
            })
    }

    pub fn delete_identry<I>(&mut self, mut idl: I) -> Result<(), OperationError>
    where
        I: Iterator<Item = u64>,
    {
        idl.try_for_each(|i| {
            trace!("Removing {:?} from cache", i);
            if i == 0 {
                Err(OperationError::InvalidEntryId)
            } else {
                (*self.allids).remove_id(i);
                self.entry_cache.remove_dirty(i);
                Ok(())
            }
        })
    }

    pub fn write_idl(
        &mut self,
        attr: &str,
        itype: IndexType,
        idx_key: &str,
        idl: &IDLBitRange,
    ) -> Result<(), OperationError> {
        let cache_key = IdlCacheKey {
            a: attr.into(),
            i: itype,
            k: idx_key.into(),
        };
        // On idl == 0 the db will remove this, and synthesise an empty IdList on a miss
        // but we can cache this as a new empty IdList instead, so that we can avoid the
        // db lookup on this idl.
        if idl.is_empty() {
            self.idl_cache
                .insert_dirty(cache_key, Box::new(IDLBitRange::new()));
        } else {
            self.idl_cache
                .insert_dirty(cache_key, Box::new(idl.clone()));
        }
        // self.db.write_idl(audit, attr, itype, idx_key, idl)
        Ok(())
    }

    pub fn optimise_dirty_idls(&mut self) {
        self.idl_cache.iter_mut_dirty().for_each(|(k, maybe_idl)| {
            if let Some(idl) = maybe_idl {
                if idl.maybe_compress() {
                    trace!(?k, "Compressed idl");
                }
            }
        })
    }

    pub fn is_idx_slopeyness_generated(&self) -> Result<bool, OperationError> {
        self.db.is_idx_slopeyness_generated()
    }

    pub fn get_idx_slope(&self, ikey: &IdxKey) -> Result<Option<IdxSlope>, OperationError> {
        self.db.get_idx_slope(ikey)
    }

    /// Index Slope Analysis. For the purpose of external modules you can consider this as a
    /// module that generates "weights" for each index that we have. Smaller values are faster
    /// indexes - larger values are more costly ones. This is not intended to yield perfect
    /// weights. The intent is to seperate over obviously more effective indexes rather than
    /// to min-max the fine tuning of these. Consider name=foo vs class=*. name=foo will always
    /// be better than class=*, but comparing name=foo to spn=foo is "much over muchness" since
    /// both are really fast.
    pub fn analyse_idx_slopes(&mut self) -> Result<(), OperationError> {
        /*
         * Inside of this analysis there are two major factors we need to understand
         *
         * * What is the variation of idl lengths within an index?
         * * How man keys are stored in this index?
         *
         * Since we have the filter2idl threshold, we want to find "what is the smallest
         * and most unique index asap so we can exit faster". This allows us to avoid
         * loading larger most costly indexs that either have large idls, high variation
         * or few keys and are likely to miss and have to go out to disk.
         *
         * A few methods were proposed, but thanks to advice from Perri Boulton (psychology
         * researcher with a background in statistics), we were able to device a reasonable
         * approach.
         *
         * These are commented in line to help understand the process.
         */

        /*
         * Step 1 - we have an index like "idx_eq_member". It has data that looks somewhat
         * like:
         *
         *  key    | idl
         *  -------+------------
         *  uuid_a | [1, 2, 3, ...]
         *  -------+------------
         *  uuid_b | [4, 5, 6, ...]
         *
         * We need to collect this into a single vec of "how long is each idl". Since we have
         * each idl in the vec, the length of the vec is also the number of keys in the set.
         * This yields for us:
         *
         *   idx_eq_member: [4.0, 5.0, ...]
         * where each f64 value is the float representation of the length of idl.
         *
         * We then assemble these to a map so we have each idxkey and it's associated list
         * of idl lens.
         */

        let mut data: HashMap<IdxKey, Vec<f64>> = HashMap::new();
        self.idl_cache.iter_dirty().for_each(|(k, maybe_idl)| {
            if let Some(idl) = maybe_idl {
                let idl_len: u32 = idl.len().try_into().unwrap_or(u32::MAX);
                // Convert to something we can use.
                let idl_len = f64::from(idl_len);

                let kref = IdxKeyRef::new(&k.a, &k.i);
                if idl_len > 0.0 {
                    // It's worth looking at. Anything len 0 will be removed.
                    if let Some(lens) = data.get_mut(&kref as &dyn IdxKeyToRef) {
                        lens.push(idl_len)
                    } else {
                        data.insert(kref.as_key(), vec![idl_len]);
                    }
                }
            }
        });

        /*
        * So now for each of our sets:
        *
        *   idx_eq_member: [4.0, 5.0, ...]
        *   idx_eq_name  : [1.0, 1.0, 1.0, ...]
        *
        * To get the variability, we calculate the normal distribution of the set of values
        * and then using this variance we use the 1st deviation (~85%) value to assert that
        * 85% or more of the values in this set will be "equal or less" than this length.*
        *
        * So given say:
        *  [1.0, 1.0, 1.0, 1.0]
        * We know that the sd_1 will be 1.0. Given:
        *  [1.0, 1.0, 2.0, 3.0]
        * We know that it will be ~2.57 (mean 1.75 + sd of 0.82).
        *
        * The other factor is number of keys. This is thankfully easy! We have that from
        * vec.len().
        *
        * We can now calculate the index slope. Why is it a slope you ask? Because we
        * plot the data out on a graph, with "variability" on the y axis, and number of
        * keys on the x.
        *
        * Lets plot our data we just added.
        *
        *    |
        *  4 +
        *    |
        *  3 +
        *    |
        *  2 +           *  eq_member
        *    |
        *  1 +           *  eq_name
        *    |
        *    +--+--+--+--+--
        *       1  2  3  4
        *
        * Now, if we were to connect a line from (0,0) to each point we get a line with an angle.
        *
        *    |
        *  4 +
        *    |
        *  3 +
        *    |
        *  2 +           *  eq_member
        *    |
        *  1 +           *  eq_name
        *    |/---------/
        *    +--+--+--+--+--
        *       1  2  3  4

        *    |
        *  4 +
        *    |
        *  3 +
        *    |
        *  2 +           *  eq_member
        *    |        /--/
        *  1 +    /--/   *  eq_name
        *    |/--/
        *    +--+--+--+--+--
        *       1  2  3  4
        *
        * (Look it's ascii art, don't judge.).
        *
        * Point is that eq_member is "steeper" and eq_name is "shallower". This is what we call
        * the "slopeyness" aka the jank of the line, or more precisely, the angle.
        *
        * Now we need a way to numerically compare these lines. Since the points could be
        * anywere on our graph:
        *
        *    |
        *  4 +  *
        *    |
        *  3 +         *
        *    |
        *  2 +     *
        *    |
        *  1 +           *
        *    |
        *    +--+--+--+--+--
        *       1  2  3  4
        *
        * While we can see what's obvious or best here, a computer has to know it. So we now
        * assume that these points construct a triangle, going through (0,0), (x, 0) and (x, y).
        *
        *
        *                Λ│
        *               ╱ │
        *              ╱  │
        *             ╱   │
        *            ╱    │
        *           ╱     │
        *          ╱      │
        *         ╱       │ sd_1
        *        ╱        │
        *       ╱         │
        *      ───────────┼
        *         nkeys
        *
        * Since this is right angled we can use arctan to work out the degress of the line. This
        * gives us a value from 1.0 to 90.0 (We clamp to a minimum of 1.0, because we use 0 as "None"
        * in the NonZeroU8 type in filter.rs, which allows ZST optimisation)
        *
        * The problem is that we have to go from float to u8 - this means we lose decimal precision
        * in the conversion. To lessen this, we multiply by 2 to give some extra weight to each angle
        * to minimise this loss and then we convert.
        *
        * And there we have it! A slope factor of the index! A way to compare these sets quickly
        * at query optimisation time to minimse index access.
        */
        let slopes: HashMap<_, _> = data
            .into_iter()
            .filter_map(|(k, lens)| {
                let slope_factor = Self::calculate_sd_slope(&lens);
                if slope_factor == 0 || slope_factor == IdxSlope::MAX {
                    None
                } else {
                    Some((k, slope_factor))
                }
            })
            .collect();
        trace!(?slopes, "Generated slopes");
        // Write the data down
        self.db.store_idx_slope_analysis(&slopes)
    }

    fn calculate_sd_slope(data: &[f64]) -> IdxSlope {
        let (n_keys, sd_1) = if data.len() >= 2 {
            // We can only do SD on sets greater than 2
            let l: u32 = data.len().try_into().unwrap_or(u32::MAX);
            let c = f64::from(l);
            let mean = data.iter().take(u32::MAX as usize).sum::<f64>() / c;
            let varience: f64 = data
                .iter()
                .take(u32::MAX as usize)
                .map(|len| {
                    let delta = mean - len;
                    delta * delta
                })
                .sum::<f64>()
                / (c - 1.0);

            let sd = varience.sqrt();

            // This is saying ~85% of values will be at least this len or less.
            let sd_1 = mean + sd;
            (c, sd_1)
        } else if data.len() == 1 {
            (1.0, data[0])
        } else {
            // Cant resolve.
            return IdxSlope::MAX;
        };

        // Now we know sd_1 and number of keys. We can use this as a triangle to work out
        // the angle along the hypotenuse. We use this angle - or slope - to show which
        // elements have the smallest sd_1 and most keys available. Then because this
        // is bound between 0.0 -> 90.0, we "unfurl" this around a half circle by multipling
        // by 2. This gives us a little more precision when we drop the decimal point.
        let sf = (sd_1 / n_keys).atan().to_degrees() * 2.8;

        // Now these are fractions, and we can't use those in u8, so we clamp the min/max values
        // that we expect to be yielded.
        let sf = sf.clamp(1.0, 254.0);
        if !sf.is_finite() {
            IdxSlope::MAX
        } else {
            // SAFETY
            // `sf` is clamped between 1.0 and 180.0 above, ensuring it is
            // always in range.
            unsafe { sf.to_int_unchecked::<IdxSlope>() }
        }
    }

    pub fn create_name2uuid(&self) -> Result<(), OperationError> {
        self.db.create_name2uuid()
    }

    pub fn write_name2uuid_add(
        &mut self,
        uuid: Uuid,
        add: BTreeSet<String>,
    ) -> Result<(), OperationError> {
        add.into_iter().for_each(|k| {
            let cache_key = NameCacheKey::Name2Uuid(k);
            let cache_value = NameCacheValue::U(uuid);
            self.name_cache.insert_dirty(cache_key, cache_value)
        });
        Ok(())
    }

    pub fn write_name2uuid_rem(&mut self, rem: BTreeSet<String>) -> Result<(), OperationError> {
        rem.into_iter().for_each(|k| {
            // why not just a for loop here...
            let cache_key = NameCacheKey::Name2Uuid(k);
            self.name_cache.remove_dirty(cache_key)
        });
        Ok(())
    }

    pub fn create_uuid2spn(&self) -> Result<(), OperationError> {
        self.db.create_uuid2spn()
    }

    pub fn write_uuid2spn(&mut self, uuid: Uuid, k: Option<Value>) -> Result<(), OperationError> {
        let cache_key = NameCacheKey::Uuid2Spn(uuid);
        match k {
            Some(v) => self
                .name_cache
                .insert_dirty(cache_key, NameCacheValue::S(Box::new(v))),
            None => self.name_cache.remove_dirty(cache_key),
        }
        Ok(())
    }

    pub fn create_uuid2rdn(&self) -> Result<(), OperationError> {
        self.db.create_uuid2rdn()
    }

    pub fn write_uuid2rdn(&mut self, uuid: Uuid, k: Option<String>) -> Result<(), OperationError> {
        let cache_key = NameCacheKey::Uuid2Rdn(uuid);
        match k {
            Some(s) => self
                .name_cache
                .insert_dirty(cache_key, NameCacheValue::R(s)),
            None => self.name_cache.remove_dirty(cache_key),
        }
        Ok(())
    }

    pub fn create_idx(&self, attr: &str, itype: IndexType) -> Result<(), OperationError> {
        // We don't need to affect this, so pass it down.
        self.db.create_idx(attr, itype)
    }

    pub unsafe fn purge_idxs(&mut self) -> Result<(), OperationError> {
        self.db.purge_idxs().map(|()| {
            self.idl_cache.clear();
        })
    }

    pub unsafe fn purge_id2entry(&mut self) -> Result<(), OperationError> {
        self.db.purge_id2entry().map(|()| {
            let mut ids = IDLBitRange::new();
            ids.compress();
            std::mem::swap(self.allids.deref_mut(), &mut ids);
            self.entry_cache.clear();
        })
    }

    pub fn write_db_s_uuid(&self, nsid: Uuid) -> Result<(), OperationError> {
        self.db.write_db_s_uuid(nsid)
    }

    pub fn write_db_d_uuid(&self, nsid: Uuid) -> Result<(), OperationError> {
        self.db.write_db_d_uuid(nsid)
    }

    pub fn set_db_ts_max(&mut self, ts: Duration) -> Result<(), OperationError> {
        *self.op_ts_max = Some(ts);
        self.db.set_db_ts_max(ts)
    }

    pub(crate) fn get_db_index_version(&self) -> i64 {
        self.db.get_db_index_version()
    }

    pub(crate) fn set_db_index_version(&self, v: i64) -> Result<(), OperationError> {
        self.db.set_db_index_version(v)
    }

    pub fn setup(&mut self) -> Result<(), OperationError> {
        self.db
            .setup()
            .and_then(|()| self.db.get_allids())
            .map(|mut ids| {
                std::mem::swap(self.allids.deref_mut(), &mut ids);
            })
            .and_then(|()| self.db.get_id2entry_max_id())
            .map(|mid| {
                *self.maxid = mid;
            })
    }
}

impl IdlArcSqlite {
    pub fn new(cfg: &BackendConfig, vacuum: bool) -> Result<Self, OperationError> {
        let db = IdlSqlite::new(cfg, vacuum)?;

        // Autotune heuristic.
        let mut cache_size = cfg.arcsize.unwrap_or_else(|| {
            // Due to changes in concread, we can now scale this up! We now aim for 120%
            // of entries.
            db.get_allids_count()
                .map(|c| {
                    let tmpsize = ((c / 5) as usize) * 6;
                    // if our calculation's too small anyway, just set it to the minimum target
                    std::cmp::max(tmpsize, DEFAULT_CACHE_TARGET)
                })
                .unwrap_or(DEFAULT_CACHE_TARGET)
        });

        if cache_size < DEFAULT_CACHE_TARGET {
            admin_warn!(
                old = cache_size,
                new = DEFAULT_CACHE_TARGET,
                "Configured Arc Cache size too low, increasing..."
            );
            cache_size = DEFAULT_CACHE_TARGET; // this being above the log was an uncaught bug
        }

        let entry_cache = ARCacheBuilder::new()
            .set_expected_workload(
                cache_size,
                cfg.pool_size as usize,
                DEFAULT_CACHE_RMISS,
                DEFAULT_CACHE_WMISS,
                false,
            )
            .set_reader_quiesce(true)
            .build()
            .ok_or_else(|| {
                admin_error!("Failed to construct entry_cache");
                OperationError::InvalidState
            })?;
        // The idl cache should have smaller items, and is critical for fast searches
        // so we allow it to have a higher ratio of items relative to the entries.
        let idl_cache = ARCacheBuilder::new()
            .set_expected_workload(
                cache_size * DEFAULT_IDL_CACHE_RATIO,
                cfg.pool_size as usize,
                DEFAULT_CACHE_RMISS,
                DEFAULT_CACHE_WMISS,
                false,
            )
            .set_reader_quiesce(true)
            .build()
            .ok_or_else(|| {
                admin_error!("Failed to construct idl_cache");
                OperationError::InvalidState
            })?;

        let name_cache = ARCacheBuilder::new()
            .set_expected_workload(
                cache_size * DEFAULT_NAME_CACHE_RATIO,
                cfg.pool_size as usize,
                DEFAULT_CACHE_RMISS,
                DEFAULT_CACHE_WMISS,
                true,
            )
            .set_reader_quiesce(true)
            .build()
            .ok_or_else(|| {
                admin_error!("Failed to construct name_cache");
                OperationError::InvalidState
            })?;

        let allids = CowCell::new(IDLBitRange::new());

        let maxid = CowCell::new(0);

        let op_ts_max = CowCell::new(None);

        Ok(IdlArcSqlite {
            db,
            entry_cache,
            idl_cache,
            name_cache,
            op_ts_max,
            allids,
            maxid,
        })
    }

    pub fn try_quiesce(&self) {
        self.entry_cache.try_quiesce();
        self.idl_cache.try_quiesce();
        self.name_cache.try_quiesce();
    }

    pub fn read(&self) -> IdlArcSqliteReadTransaction {
        // IMPORTANT! Always take entrycache FIRST
        let entry_cache_read = self.entry_cache.read();
        let idl_cache_read = self.idl_cache.read();
        let name_cache_read = self.name_cache.read();
        let allids_read = self.allids.read();
        let db_read = self.db.read();

        IdlArcSqliteReadTransaction {
            db: db_read,
            entry_cache: entry_cache_read,
            idl_cache: idl_cache_read,
            name_cache: name_cache_read,
            allids: allids_read,
        }
    }

    pub fn write(&self) -> IdlArcSqliteWriteTransaction {
        // IMPORTANT! Always take entrycache FIRST
        let entry_cache_write = self.entry_cache.write();
        let idl_cache_write = self.idl_cache.write();
        let name_cache_write = self.name_cache.write();
        let op_ts_max_write = self.op_ts_max.write();
        let allids_write = self.allids.write();
        let maxid_write = self.maxid.write();
        let db_write = self.db.write();
        IdlArcSqliteWriteTransaction {
            db: db_write,
            entry_cache: entry_cache_write,
            idl_cache: idl_cache_write,
            name_cache: name_cache_write,
            op_ts_max: op_ts_max_write,
            allids: allids_write,
            maxid: maxid_write,
        }
    }

    /*
    pub fn stats_audit(&self, audit: &mut AuditScope) {
        let entry_stats = self.entry_cache.view_stats();
        let idl_stats = self.idl_cache.view_stats();
        ladmin_info!(audit, "entry_cache stats -> {:?}", *entry_stats);
        ladmin_info!(audit, "idl_cache stats -> {:?}", *idl_stats);
    }
    */
}