gisaf-backend/src/gisaf/registry.py

"""
Define the models for the ORM
"""
import logging
import importlib
import pkgutil
from collections import defaultdict
from importlib.metadata import entry_points
from typing import Any, ClassVar, Literal

from pydantic import create_model
from pydantic_core import PydanticUndefined
from sqlalchemy import text
from sqlalchemy.orm import selectinload
from sqlmodel import SQLModel, select, inspect
import pandas as pd

from gisaf.config import conf
from gisaf.models import (misc, category as category_module,
                     project, reconcile, map_bases, tags)
from gisaf.models.geo_models_base import (
    LiveGeoModel,
    PlottableModel,
    GeoModel,
    SurveyModel,
    RawSurveyBaseModel,
    LineWorkSurveyModel,
    GeoPointSurveyModel,
    GeoLineSurveyModel,
    GeoPolygonSurveyModel,
)
from gisaf.utils import ToMigrate
from gisaf.models.category import Category, CategoryGroup
from gisaf.database import db_session
from gisaf import models
from gisaf.models.metadata import survey, raw_survey

logger = logging.getLogger(__name__)

category_model_mapper = {
    'Point': GeoPointSurveyModel,
    'Line': GeoLineSurveyModel,
    'Polygon': GeoPolygonSurveyModel,
}

class NotInRegistry(Exception):
    pass

def import_submodules(package, recursive=True):
    """ Import all submodules of a module, recursively, including subpackages

    :param package: package (name or actual module)
    :type package: str | module
    :param recursive: scan package recursively
    :rtype: dict[str, types.ModuleType]
    """
    if isinstance(package, str):
        package = importlib.import_module(package)
    results = {}
    for loader, name, is_pkg in pkgutil.walk_packages(package.__path__):
        full_name = package.__name__ + '.' + name
        results[full_name] = importlib.import_module(full_name)
        if recursive and is_pkg:
            results.update(import_submodules(full_name))
    return results


class ModelRegistry:
    """
    Collect, categorize, and initialize the SQLAlchemy data models.
    Maintains registries for all kind of model types, eg. geom, data, values...
    Provides tools to get the models from their names, table names, etc.
    """
    stores: pd.DataFrame
    categories: pd.DataFrame
    values: dict[str, PlottableModel]
    geom: dict[str, GeoModel]
    geom_live: dict[str, LiveGeoModel]
    geom_live_defs: dict[str, dict[str, Any]]
    geom_custom: dict[str, GeoModel]
    geom_custom_store: dict[str, GeoModel]
    other: dict[str, SQLModel]
    misc: dict[str, SQLModel]
    raw_survey_models: dict[str, RawSurveyBaseModel]
    survey_models: dict[str, SurveyModel]
    ## TODO: implement multiple values for a model (search for values_for_model)
    values_for_model: dict[GeoModel, list[PlottableModel]]

    def __init__(self) -> None:
        """
        Get geo models
        :return: None
        """
        self.geom_custom = {}
        self.geom_custom_store = {}
        self.geom_live = {}
        self.geom_live_defs = {}
        self.values = {}
        self.other = {}
        self.misc = {}
        self.raw_survey_models = {}
        self.survey_models = {}

    async def make_registry(self) -> None:
        """
        Make (or refresh) the registry of models.
        :return:
        """
        logger.debug('make_registry')
        await self.make_category_models()
        self.scan()
        await self.build()
        ## If ogcapi is in app (i.e. not with scheduler):
        ## Now that the models are refreshed, tells the ogcapi to (re)build
        #await app.extra['ogcapi'].build()

    async def make_category_models(self) -> None:
        """
        Make geom models from the category model
        and update raw_survey_models and survey_models
        Important notes:
        - the db must be bound before running this function
        - the db must be rebound after running this function,
        so that the models created are actually bound to the db connection
        :return:
        """
        logger.debug('make_category_models')
        async with db_session() as session:
            query = select(Category).order_by(Category.long_name).options(selectinload(Category.category_group))
            data = await session.exec(query)
            categories: list[Category] = data.all()
        for category in categories:
            ## Several statuses can coexist for the same model, so
            ## consider only the ones with the 'E' (existing) status
            ## The other statuses are defined only for import (?)
            if getattr(category, 'status', 'E') != 'E':
                continue

            ## Use pydantic create_model, supported by SQLModel
            ## See https://github.com/tiangolo/sqlmodel/issues/377
            store_name = f'{survey.schema}.{category.table_name}'
            raw_store_name = f'{raw_survey.schema}.RAW_{category.table_name}'
            raw_survey_field_definitions = {
                ## FIXME: RawSurveyBaseModel.category should be a Category, not category.name
                'category_name': (ClassVar[str], category.name),
                ## FIXME: Same for RawSurveyBaseModel.group
                'group_name': (ClassVar[str], category.category_group.name),
                'viewable_role': (ClassVar[str], category.viewable_role),
                'store_name': (ClassVar[str], raw_store_name),
                # 'icon': (str, ''),
            }
            ## Raw survey points
            try:
                self.raw_survey_models[store_name] = create_model(
                    __base__=RawSurveyBaseModel,
                    __model_name=category.raw_survey_table_name,
                    __cls_kwargs__={
                        'table': True,
                        '__tablename__': category.raw_survey_table_name,
                    },
                    **raw_survey_field_definitions
                )
            except Exception as err:
                logger.exception(err)
                logger.warning(err)
            else:
                logger.debug('Discovered {:s}'.format(category.raw_survey_table_name))

            model_class = category_model_mapper.get(category.geom_type)
            ## Final geometries
            try:
                if model_class:
                    survey_field_definitions = {
                        'category_name': (ClassVar[str], category.name),
                        'group_name': (ClassVar[str], category.category_group.name),
                        'raw_store_name': (ClassVar[str], raw_store_name),
                        'viewable_role': (ClassVar[str], category.viewable_role),
                        'symbol': (ClassVar[str], category.symbol),
                        #'raw_model': (str, self.raw_survey_models.get(raw_store_name)),
                        # 'icon': (str, f'{survey.schema}-{category.table_name}'),
                    }
                    self.survey_models[store_name] = create_model(
                        __base__= model_class,
                        __model_name=category.table_name,
                        __cls_kwargs__={
                            'table': True,
                            '__tablename__': category.table_name,
                        },
                        **survey_field_definitions,
                        )
            except Exception as err:
                logger.warning(err)
            else:
                logger.debug('Discovered {:s}'.format(category.table_name))

        logger.info('Discovered {:d} models'.format(len(categories)))

    def scan(self) -> None:
        """
        Scan all models defined explicitely (not the survey ones,
        which are defined by categories), and store them for reference.
        """
        logger.debug('scan')

        ## Scan the models defined in modules
        for module_name, module in import_submodules(models).items():
            if module_name.rsplit('.', 1)[-1] in (
                'geo_models_base',
                'models_base',
            ):
                continue
            for name in dir(module):
                obj = getattr(module, name)
                if hasattr(obj, '__module__') and obj.__module__.startswith(module.__name__)\
                        and hasattr(obj, '__tablename__') and hasattr(obj, 'get_store_name'):
                    geom_type = self.add_model(obj)
                    logger.debug(f'Model {obj.get_store_name()} added in the registry from gisaf source tree as {geom_type}')

        ## Scan the models defined in plugins (setuptools' entry points)
        for module_name, model in self.scan_entry_points(name='gisaf_extras.models').items():
            geom_type = self.add_model(model)
            logger.debug(f'Model {model.get_store_name()} added in the registry from {module_name} entry point as {geom_type}')

        for module_name, store in self.scan_entry_points(name='gisaf_extras.stores').items():
            self.add_store(store)
            logger.debug(f'Store {store} added in the registry from {module_name} gisaf_extras.stores entry point')

        ## Add misc models
        for module in misc, category_module, project, reconcile, map_bases, tags:
            for name in dir(module):
                obj = getattr(module, name)
                if hasattr(obj, '__module__') and hasattr(obj, '__tablename__'):
                    self.misc[name] = obj

    async def build(self) -> None:
        """
        Build the registry: organize all models in a common reference point.
        This should be executed after the discovery of surey models (categories)
        and the scan of custom/module defined models.
        """
        logger.debug('build')
        ## Combine all geom models (auto and custom)
        self.geom = {**self.survey_models, **self.geom_custom}

        await self.make_stores()

        ## Some lists of table, by usage
        ## XXX: Gino: doesn't set __tablename__ and __table__ , or engine not started???
        ##      So, hack the table names of auto_geom
        #self.geom_tables = [model.__tablename__
        #self.geom_tables = [getattr(model, "__tablename__", None)
        #                    for model in sorted(list(self.geom.values()),
        #                                        key=lambda a: a.z_index)]

        values_tables = [model.__tablename__ for model in self.values.values()]
        other_tables = [model.__tablename__ for model in self.other.values()]

        self.data_tables = values_tables + other_tables
        self.populate_values_for_model()
        self.make_menu()

    def populate_values_for_model(self):
        '''
        Build a dict for quick access to the values from a model
        '''
        self.values_for_model = {}
        for model_value in self.values.values():
            for relationship in inspect(model_value).relationships:
                model = self.stores.loc[relationship.target.fullname, 'model']
                if model not in self.values_for_model:
                    self.values_for_model[model] = []
                self.values_for_model[model].append(model_value)

    def scan_entry_points(self, name):
        """
        Get the entry points in gisaf_extras.models, and return their models
        :return: dict of name: models
        """
        named_objects = {}
        for entry_point in entry_points().select(group=name):
            try:
                named_objects.update({entry_point.name: entry_point.load()})
            except ModuleNotFoundError as err:
                logger.warning(err)
        return named_objects

    def add_model(self, model) -> Literal['GeoModel', 'PlottableModel', 'Other model']:
        """
        Add the model to its proper dict for reference, return the type
        """
        # if not hasattr(model, 'get_store_name'):
        #     raise NotInRegistry()
        table_name = model.get_store_name()
        if issubclass(model, GeoModel) and not \
            issubclass(model, RawSurveyBaseModel) and not model.hidden:
            self.geom_custom[table_name] = model
            return 'GeoModel'
        elif issubclass(model, PlottableModel):
            self.values[table_name] = model
            return 'PlottableModel'
        else:
            self.other[table_name] = model
            return 'Other model'

    def add_store(self, store) -> None:
        self.geom_custom_store[store.name] = store

    def make_menu(self):
        """
        Build the Admin menu
        :return:
        """
        self.menu = defaultdict(list)
        for name, model in self.stores.model.items():
            if hasattr(model, 'Admin'):
                self.menu[model.Admin.menu].append(model)

    # def get_raw_survey_model_mapping(self):
    #     """
    #     Get a mapping of category_name -> model for categories
    #     :return: dict of name -> model (class)
    #     """
    #     ## TODO: add option to pass a single item
    #     ## Local imports, avoiding cyclic dependencies
    #     ## FIXME: Gino
    #     categories = db.session.query(Category)
    #     return {category.name: self.raw_survey_models[category.table_name]
    #             for category in categories
    #             if self.raw_survey_models.get(category.table_name)}

    async def get_model_id_params(self, model, id):
        """
        Return the parameters for this item (table name, id), displayed in info pane
        """
        if not model:
            return {}
        item = await model.load(**model.get_join_with()).query.where(model.id==id).gino.first()
        if not item:
            return {}
        resp = {}
        resp['itemName'] = item.caption
        resp['geoInfoItems'] = await item.get_geo_info()
        resp['surveyInfoItems'] = await item.get_survey_info()
        resp['infoItems'] = await item.get_info()
        resp['tags'] = await item.get_tags()
        if hasattr(item, 'get_categorized_info'):
            resp['categorized_info_items'] = await item.get_categorized_info()
        if hasattr(item, 'get_graph'):
            resp['graph'] = item.get_graph()
        if hasattr(item, 'Attachments'):
            if hasattr(item.Attachments, 'files'):
                resp['files'] = await item.Attachments.files(item)
            if hasattr(item.Attachments, 'images'):
                resp['images'] = await item.Attachments.images(item)
        if hasattr(item, 'get_external_record_url'):
            resp['externalRecordUrl'] = item.get_external_record_url()
        return resp

    async def make_stores(self):
        """
        Make registry for primary groups, categories and survey stores using Pandas dataframes.
        Used in GraphQl queries.
        """
        ## Utility functions used with apply method (dataframes)
        def fill_columns_from_custom_models(row) -> tuple[str, str, str]:
            return (
                row.model.__name__,
                row.model.description,
                row.model.metadata.schema
            )

        def fill_columns_from_custom_stores(row) -> tuple[str, str, None]:
            return (
                row.model.description,
                row.model.description,
                None  ## Schema
            )

        def get_store_name(category) -> str:
            fragments = ['V', category.group, category.minor_group_1]
            if category.minor_group_2 != '----':
                fragments.append(category.minor_group_2)
            return '.'.join([
                survey.schema,
                '_'.join(fragments)
            ])

        self.categories = await Category.get_df()
        self.categories['title'] = self.categories.long_name.fillna(self.categories.description)

        self.categories['store'] = self.categories.apply(get_store_name, axis=1)

        self.categories['count'] = pd.Series(dtype=pd.Int64Dtype())
        self.categories.set_index('name', inplace=True)

        df_models = pd.DataFrame(self.geom.items(),
                                 columns=['store', 'model']
                                 ).set_index('store')
        df_raw_models = pd.DataFrame(self.raw_survey_models.items(),
                                     columns=('store', 'raw_model')
                                     ).set_index('store')
        self.categories = self.categories.merge(df_models, left_on='store', right_index=True)
        self.categories = self.categories.merge(df_raw_models, left_on='store', right_index=True)
        self.categories['custom'] = False
        self.categories['is_db'] = True
        self.categories.sort_index(inplace=True)
        # self.categories['name_letter'] = self.categories.index.str.slice(0, 1)
        # self.categories['name_number'] = self.categories.index.str.slice(1).astype('int64')
        # self.categories.sort_values(['name_letter', 'name_number'], inplace=True)

        ## Set in the stores dataframe some useful properties, from the model class
        ## Maybe at some point it makes sense to get away from class-based definitions
        if len(self.categories) > 0:
            ## XXX: redundant self.categories['store_name'] with self.categories['store']
            #self.categories['store_name'] = self.categories.apply(
            #    lambda row: row.model.get_store_name(),
            #    axis=1
            #)
            #self.categories['raw_model_store_name'] = self.categories.apply(
            #    lambda row: row.raw_model.store_name,
            #    axis=1
            #)
            self.categories['is_line_work'] = self.categories.apply(
                lambda row: issubclass(row.model, LineWorkSurveyModel),
                axis=1
            )
        else:
            self.categories['store_name'] = None
            self.categories['raw_model_store_name'] = None
            self.categories['is_line_work'] = None
            self.categories['raw_survey_model'] = None

        ## Custom models (Misc)
        self.custom_models = pd.DataFrame(
            self.geom_custom.items(),
            columns=['store', 'model']
        ).set_index('store')
        self.custom_models['group'] = 'Misc'
        self.custom_models['custom'] = True
        self.custom_models['is_db'] = True
        self.custom_models['raw_model_store_name'] = ''
        self.custom_models['in_menu'] = self.custom_models.apply(
            lambda row: getattr(row.model, 'in_menu', True),
            axis=1
        )
        self.custom_models = self.custom_models.loc[self.custom_models.in_menu]
        self.custom_models['auto_import'] = False
        self.custom_models['is_line_work'] = False

        if len(self.custom_models) > 0:
            self.custom_models['long_name'],\
            self.custom_models['custom_description'],\
            self.custom_models['db_schema'],\
            = zip(*self.custom_models.apply(fill_columns_from_custom_models, axis=1))
            ## Try to give a meaningful description, eg. including the source (db_schema)
            self.custom_models['description'] = self.custom_models['custom_description'].fillna(self.custom_models['long_name'] + '-' + self.custom_models['db_schema'])
            self.custom_models['title'] = self.custom_models['long_name']

        ## Custom stores (Community)
        self.custom_stores = pd.DataFrame(
            self.geom_custom_store.items(),
            columns=['store', 'model']
        ).set_index('store')
        self.custom_stores['group'] = 'Community'
        self.custom_stores['custom'] = True
        self.custom_stores['is_db'] = False
        if len(self.custom_stores) == 0:
            self.custom_stores['in_menu'] = False
        else:
            self.custom_stores['in_menu'] = self.custom_stores.apply(
                lambda row: getattr(row.model, 'in_menu', True),
                axis=1
            )
        self.custom_stores = self.custom_stores.loc[self.custom_stores.in_menu]
        self.custom_stores['auto_import'] = False
        self.custom_stores['is_line_work'] = False

        if len(self.custom_stores) > 0:
            self.custom_stores['long_name'],\
            self.custom_stores['description'],\
            self.custom_stores['db_schema'],\
            = zip(*self.custom_stores.apply(fill_columns_from_custom_stores, axis=1))
            self.custom_stores['title'] = self.custom_stores['long_name']

        ## Combine Misc (custom) and survey (auto) stores
        ## Retain only one status per category (defaultStatus, 'E'/existing by default)
        self.stores = pd.concat([
            self.categories[self.categories.status==conf.map.defaultStatus[0]].reset_index().set_index('store').sort_values('title'),
            self.custom_models,
            self.custom_stores
        ])#.drop(columns=['store_name'])
        self.stores['in_menu'] = self.stores['in_menu'].astype(bool)

        ## Set in the stores dataframe some useful properties, from the model class
        ## Maybe at some point it makes sense to get away from class-based definitions
        def fill_columns_from_model(row):
            return (
                # row.model.icon,
                # row.model.symbol,
                row.model.mapbox_type, # or None,
                row.model.base_gis_type,
                row.model.z_index,
                row.model.attribution,
            )

        # self.stores['icon'],\
        # self.stores['symbol'],\
        self.stores['mapbox_type_default'], \
        self.stores['base_gis_type'], \
        self.stores['z_index'], \
        self.stores['attribution'] \
        = zip(*self.stores.apply(fill_columns_from_model, axis=1))

        #self.stores['mapbox_type_custom'] = self.stores['mapbox_type_custom'].replace('', np.nan).fillna(np.nan)
        self.stores['mapbox_type'] = self.stores['mapbox_type_custom'].fillna(
            self.stores['mapbox_type_default']
        )

        self.stores['viewable_role'] = self.stores.apply(
            lambda row: getattr(row.model, 'viewable_role', None),
            axis=1,
        )
        self.stores['viewable_role'].replace('', None, inplace=True)

        #self.stores['gql_object_type'] = self.stores.apply(make_model_gql_object_type, axis=1)
        self.stores['is_live'] = False
        self.stores['description'].fillna('', inplace=True)

        ## Layer groups: Misc, survey's primary groups, Live
        self.primary_groups = await CategoryGroup.get_df()
        self.primary_groups.sort_values('name', inplace=True)
        self.primary_groups['title'] = self.primary_groups['long_name']

        ## Add Misc and Live
        self.primary_groups.loc[-1] = (
            'Misc',
            False,
            'Misc and old layers (not coming from our survey; they will be organized, '
            'eventually as the surveys get more complete)',
            'Misc',
        )
        self.primary_groups.index = self.primary_groups.index + 1

        self.primary_groups.loc[len(self.primary_groups)] = (
            'Live',
            False,
            'Layers from data processing, sensors, etc, and are updated automatically',
            'Live',
        )

        self.primary_groups.loc[len(self.primary_groups)] = (
            'Community',
            False,
            'Layers from community',
            'Community',
        )

        self.primary_groups.sort_index(inplace=True)

        #def make_group(group):
        #    return GeomGroup(
        #        name=group['name'],
        #        title=group['title'],
        #        description=group['long_name']
        #    )
        #self.primary_groups['gql_object_type'] = self.primary_groups.apply(make_group, axis=1)
        await self.update_stores_counts()

    async def get_stores(self):
        """
        Get information about the available stores
        """
        raise DeprecationWarning('get_stores was for graphql')

    async def update_stores_counts(self):
        """
        Update the counts of the stores fro the DB
        """
        query = "SELECT schemaname, relname, n_live_tup FROM pg_stat_user_tables"
        # async with db.acquire(reuse=False) as connection:
        async with db_session() as session:
            rows = await session.exec(text(query))
        all_tables_count = pd.DataFrame(rows, columns=['schema', 'table', 'count'])
        all_tables_count['store'] = all_tables_count['schema'] + '.' + all_tables_count['table']
        all_tables_count.set_index(['store'], inplace=True)

        ## TODO: a DB VACUUM can be triggered if all counts are 0?

        ## Update the count in registry's stores
        self.stores.loc[:, 'count'] = all_tables_count['count']
        # ## FIXME: count for custom stores
        # store_df = self.stores.loc[(self.stores['count'] != 0) | (self.stores['is_live'])]
        # def set_count(row):
        #     row.gql_object_type.count = row['count']

        # store_df[store_df.is_db].apply(set_count, axis=1)

        # return store_df.gql_object_type.to_list()

    def update_live_layers(self):
        """
        Update the live layers, using the list of model definitions found in
        self.geom_live_defs, which is normally updated by the redis store
        """
        ## Remove existing live layers
        self.geom_live = {}
        self.stores.drop(self.stores[self.stores.is_live == True].index,  # noqa: E712
                         inplace=True)
        df_live = pd.DataFrame.from_dict(self.geom_live_defs.values(),
                                         orient='columns'
                                         )
        if len(df_live) == 0:
            return
        df_live.set_index('store', inplace=True)
        ## Adjust column names
        ## and add columns, to make sure pandas dtypes are not changed when the
        ## dataframes are concat
        ## TODO: standardize names across the whole workflow,
        ## then remove the rename below:
        df_live.rename(
            columns={
                'live': 'is_live',
                'zIndex': 'z_index',
                'gisType': 'geom_type',
                # 'type': 'mapbox_type',
                'viewableRole': 'viewable_role',
            }, inplace=True
        )
        ## Add columns
        df_live['auto_import'] = False
        df_live['base_gis_type'] = df_live['gis_type']
        df_live['custom'] = False
        df_live['group'] = ''
        df_live['in_menu'] = True
        df_live['is_db'] = False
        df_live['is_line_work'] = False
        df_live['long_name'] = df_live['name']
        df_live['mapbox_type_custom'] = df_live['mapbox_type']
        df_live['minor_group_1'] = ''
        df_live['minor_group_2'] = ''
        df_live['status'] = 'E'
        df_live['style'] = None
        df_live['title'] = df_live['name']
        registry.stores = pd.concat([registry.stores, df_live])
        for store, model_info in self.geom_live_defs.items():
            ## Add provided live layers in the stores df
            # Create the pydantic model
            # NOTE: Unused at this point, but might be usedful
            field_definitions = {
                k: (ClassVar[v.__class__], v)
                for k, v in model_info.items()
            }
            self.geom_live[store] = create_model(
                        __model_name=store,
                        __base__= LiveGeoModel,
                        **field_definitions
            )

# Accessible as global
registry: ModelRegistry = ModelRegistry()


## Below, some unused code, maybe to be used later for displaying layers in a tree structure

## Some magic for making a tree from enumarables,
## https://gist.github.com/hrldcpr/2012250
#Tree = lambda: defaultdict(Tree)
#
#
#def add(t, path):
#    for node in path:
#        t = t[node]
#
#
#dicts = lambda t: {k: dicts(t[k]) for k in t}
#
#
#def get_geom_models_tree():
#    tree = Tree()
#    for model in models.geom_custom:
#        full_name = model.__module__[len('gisaf.models')+1:]
#        add(tree, full_name.split('.'))
#        add(tree, full_name.split('.') + [model])
#    return dicts(tree)