config.py

import json
import os

import yaml
from dotenv import load_dotenv

load_dotenv()


def load_yaml_config(file_path):
	# Check if file exists
	if not os.path.exists(file_path):
		raise FileNotFoundError(f"Config file not found: {file_path}")
	with open(file_path, "r", encoding="utf-8") as file:
		config = yaml.safe_load(file)
	return config


#
# Model config file (yaml) containing the model parameters (see /config/ folder)
#
MODEL_CONFIG_FILE = os.getenv("MODEL_CONFIG_FILE", "gemma-2-9b-it.yaml")

#
# Hugging Face access token, used to download models if needed
#
HF_ACCESS_TOKEN = os.getenv("HF_ACCESS_TOKEN")

#
# Model cache directory, used to store downloaded models
#
MODEL_CACHE_DIR = os.path.join(os.getcwd(), "storage", "models")

#
# Folder containing the documents used for RAG, including an index.yam l
#
RAG_DOCUMENTS_FOLDER = os.path.join(os.getcwd(), os.getenv("RAG_DOCUMENTS_FOLDER", "documents"))

#
# Folder used for storing/loading the Faiss index
#
RAG_INDEX_FOLDER = os.path.join(os.getcwd(), "storage", "faiss-index")

#
# Load model config
#
MODEL_CONFIG = load_yaml_config(os.path.join(os.getcwd(), "config", MODEL_CONFIG_FILE))


#
# Avatar
#
CHATBOT_AVATAR_URL = os.getenv("CHATBOT_AVATAR_URL", "https://www.wur.nl/favicon.ico")

#
# Sound mapping for audio generator
#
word_sound_mapping_str = os.getenv("WORD_SOUND_MAPPING", "{}")
WORD_SOUND_MAPPING = json.loads(word_sound_mapping_str)