Compare commits

...

3 commits

Author SHA1 Message Date
Martin Pépin 23c1de0fc6
Make dumpstuff more efficient 2020-02-06 20:03:40 +01:00
Martin Pépin 8baaa1d339
Add randomly generated account ids in kfet dump 2020-02-06 19:46:08 +01:00
Martin Pépin 16ec62fc4f
Dump script 2020-02-06 19:11:15 +01:00
2 changed files with 107 additions and 0 deletions

31
example.py Normal file
View file

@ -0,0 +1,31 @@
"""Exemple d'utilisation des dumps."""
import json
from collections import Counter
def parse(filename):
with open(filename, "r") as file:
return json.load(file)
if __name__ == "__main__":
Articles = {a["id"]: a for a in parse("article.dump.json")}
Operations = parse("operation.dump.json")
# Dépenses faites en K-Fêt depuis K-Psul
total_expenses = -sum((float(op["amount"]) for op in Operations))
print(f"total expenses: {total_expenses:.2f}")
# Montant moyen d'une transaction
nb_ops = len(Operations)
print(f"Average transaction amount: {total_expenses / nb_ops:.2f}")
# Nombre de ventes par article
articles_sales = Counter()
for operation in Operations:
articles_sales[operation["article"]] += operation["number"]
print("Most sold articles:")
for id, nb in articles_sales.most_common(3):
article_name = Articles[id]["name"]
print(f" - {article_name} : {nb}")

View file

@ -0,0 +1,76 @@
import json
import random
from typing import Dict
from django.core.management.base import BaseCommand
from kfet.models import Account, Article, Operation
def gen_anonymisation_table() -> Dict[int, str]:
random.seed()
hashes = {}
for account_id in Account.objects.values_list("id", flat=True):
h = random.getrandbits(128)
hashes[account_id] = "{:032x}".format(h)
return hashes
def dump_articles(filename: str) -> None:
articles = [
{
"id": article.id,
"name": article.name,
"price": str(article.price),
"category": article.category.name,
"box_type": article.box_type,
"box_capacity": article.box_capacity,
}
for article in Article.objects.all()
]
with open(filename, "w") as file:
json.dump(articles, file, indent=4)
def dump_operations(filename: str, accounts_hashes: Dict[int, str]) -> None:
not_canceled_purchases = (
Operation.objects.filter(type=Operation.PURCHASE)
.filter(canceled_at__isnull=True)
.values_list(
"amount",
"article__id",
"article_nb",
"group__at",
"group__is_cof",
"group__on_acc__id",
)
)
operations = [
{
"amount": str(amount),
"article": article_id,
"number": nb,
"date": str(at),
"is_cof": is_cof,
"on_account": accounts_hashes[on_acc],
}
for amount, article_id, nb, at, is_cof, on_acc in not_canceled_purchases
]
with open(filename, "w") as file:
json.dump(operations, file, indent=4)
class Command(BaseCommand):
help = 'Dump un historique "anonymisé".'
def handle(self, *args, **options):
# XXX. This is not great for privacy.
accounts_hashes = gen_anonymisation_table()
article_file = "article.dump.json"
self.stdout.write('Dumping articles to "{}"'.format(article_file))
dump_articles(article_file)
operation_file = "operation.dump.json"
self.stdout.write('Dumping operations to "{}"'.format(operation_file))
dump_operations(operation_file, accounts_hashes)