Cachetools¶

Cachetools is a synchronous caching library that is based on collections. This means that it can only be used to cache data in a synchronous way. However, it is a more lightweight library than Aiocache.

example¶

maxsize: max number of items in the cache ttl: unit is second

import time
import numpy as np
import pandas as pd
from cachetools import cached, TTLCache

def d1(m, n):
    data = np.random.randn(m, n)
    cols = [f'col{j}' for j in range(1,n+1)]
    df = pd.DataFrame(data, columns=cols)
    return df

cache=TTLCache(maxsize=float('inf'),ttl=60)
@cached(cache)
def f1(m,n):
    df=d1(m,n)
    return df

t0 = time.time()
d1 = f1(100000,10)
print(f'time: {time.time() - t0:.3f}')
t0 = time.time()
d2 = f1(100000,10)
print(f'time: {time.time() - t0:.3f}')
t0 = time.time()
d3 = f1(100000,10)
print(f'time: {time.time() - t0:.3f}')

custom key¶

caveat: to be thread-safe, we must provide a Lock object to the cached decorator.

import threading
from cachetools import cached, TTLCache

CACHE_TIME_LIMIT = 1 * 60 * 60 # one hour
cachetools_cache = TTLCache(maxsize=float('inf'), ttl=CACHE_TIME_LIMIT)

def key_builder(f, namespace, exclude, *args, **kwargs):
    params = {}
    special_type = ''
    params['args'] = args
    if isinstance(exclude, str):
        exclude = [exclude]
    for k, v in kwargs.items():
        if exclude is not None and k in exclude:
            continue
        if k == 'special' and isinstance(v, Request):
            special_type = v.headers.get('Accept')
        else:
            if isinstance(v, AzureBlobFileSystem):
                v = {
                    'account_url': v.account_url,
                    'account_name': v.account_name,
                }
            params[k] = v
    return f'{f.__name__}:{namespace}{json.dumps(params)}`{special_type}'

def cachetools_cachedx(f):
    return cached(
        cachetools_cache,
        key=lambda *args, **kwargs: (key_builder(f, *args, **kwargs)),
    )(f)

def cachetools_cached(
    namespace: str = '',
    exclude: list[str] = None,
    key_builder: Callable = key_builder,
):
    def decorator(f):
        return cached(
            cachetools_cache,
            key=lambda *args, **kwargs:
                (key_builder(f, namespace, exclude, *args, **kwargs)),
            lock=threading.Lock(), #ensure thread-safe
        )(f)
    return decorator

@cachetools_cached(namespace='dev', exclude='fs', key_builder=key_builder)
def read_parquet_cache(
    *,
    fs: AzureBlobFileSystem,
    path: str,
    columns: list[str],
) -> pd.DataFrame:
    with fs.open(path) as f:
        df = pd.read_parquet(path=f, columns=columns)
    return df