gisaf-backend/src/gisaf/registry.py

684 lines
27 KiB
Python

"""
Define the models for the ORM
"""
import logging
import importlib
import pkgutil
from collections import defaultdict
from importlib.metadata import entry_points
from typing import Any, ClassVar, Literal
from pydantic import create_model
from pydantic_core import PydanticUndefined
from sqlalchemy import text
from sqlalchemy.orm import selectinload
from sqlmodel import SQLModel, select, inspect
import pandas as pd
from gisaf.config import conf
from gisaf.models import (misc, category as category_module,
project, reconcile, map_bases, tags)
from gisaf.models.geo_models_base import (
LiveGeoModel,
PlottableModel,
GeoModel,
SurveyModel,
RawSurveyBaseModel,
LineWorkSurveyModel,
GeoPointSurveyModel,
GeoLineSurveyModel,
GeoPolygonSurveyModel,
)
from gisaf.utils import ToMigrate
from gisaf.models.category import Category, CategoryGroup
from gisaf.database import db_session
from gisaf import models
from gisaf.models.metadata import survey, raw_survey
logger = logging.getLogger(__name__)
category_model_mapper = {
'Point': GeoPointSurveyModel,
'Line': GeoLineSurveyModel,
'Polygon': GeoPolygonSurveyModel,
}
class NotInRegistry(Exception):
pass
def import_submodules(package, recursive=True):
""" Import all submodules of a module, recursively, including subpackages
:param package: package (name or actual module)
:type package: str | module
:param recursive: scan package recursively
:rtype: dict[str, types.ModuleType]
"""
if isinstance(package, str):
package = importlib.import_module(package)
results = {}
for loader, name, is_pkg in pkgutil.walk_packages(package.__path__):
full_name = package.__name__ + '.' + name
results[full_name] = importlib.import_module(full_name)
if recursive and is_pkg:
results.update(import_submodules(full_name))
return results
class ModelRegistry:
"""
Collect, categorize, and initialize the SQLAlchemy data models.
Maintains registries for all kind of model types, eg. geom, data, values...
Provides tools to get the models from their names, table names, etc.
"""
stores: pd.DataFrame
categories: pd.DataFrame
values: dict[str, PlottableModel]
geom: dict[str, GeoModel]
geom_live: dict[str, LiveGeoModel]
geom_live_defs: dict[str, dict[str, Any]]
geom_custom: dict[str, GeoModel]
geom_custom_store: dict[str, GeoModel]
other: dict[str, SQLModel]
misc: dict[str, SQLModel]
raw_survey_models: dict[str, RawSurveyBaseModel]
survey_models: dict[str, SurveyModel]
## TODO: implement multiple values for a model (search for values_for_model)
values_for_model: dict[GeoModel, list[PlottableModel]]
def __init__(self) -> None:
"""
Get geo models
:return: None
"""
self.geom_custom = {}
self.geom_custom_store = {}
self.geom_live = {}
self.geom_live_defs = {}
self.values = {}
self.other = {}
self.misc = {}
self.raw_survey_models = {}
self.survey_models = {}
async def make_registry(self) -> None:
"""
Make (or refresh) the registry of models.
:return:
"""
logger.debug('make_registry')
await self.make_category_models()
self.scan()
await self.build()
## If ogcapi is in app (i.e. not with scheduler):
## Now that the models are refreshed, tells the ogcapi to (re)build
#await app.extra['ogcapi'].build()
async def make_category_models(self) -> None:
"""
Make geom models from the category model
and update raw_survey_models and survey_models
Important notes:
- the db must be bound before running this function
- the db must be rebound after running this function,
so that the models created are actually bound to the db connection
:return:
"""
logger.debug('make_category_models')
async with db_session() as session:
query = select(Category).order_by(Category.long_name).options(selectinload(Category.category_group))
data = await session.exec(query)
categories: list[Category] = data.all()
for category in categories:
## Several statuses can coexist for the same model, so
## consider only the ones with the 'E' (existing) status
## The other statuses are defined only for import (?)
if getattr(category, 'status', 'E') != 'E':
continue
## Use pydantic create_model, supported by SQLModel
## See https://github.com/tiangolo/sqlmodel/issues/377
store_name = f'{survey.schema}.{category.table_name}'
raw_store_name = f'{raw_survey.schema}.RAW_{category.table_name}'
raw_survey_field_definitions = {
## FIXME: RawSurveyBaseModel.category should be a Category, not category.name
'category_name': (ClassVar[str], category.name),
## FIXME: Same for RawSurveyBaseModel.group
'group_name': (ClassVar[str], category.category_group.name),
'viewable_role': (ClassVar[str], category.viewable_role),
'store_name': (ClassVar[str], raw_store_name),
# 'icon': (str, ''),
}
## Raw survey points
try:
self.raw_survey_models[store_name] = create_model(
__base__=RawSurveyBaseModel,
__model_name=category.raw_survey_table_name,
__cls_kwargs__={
'table': True,
'__tablename__': category.raw_survey_table_name,
},
**raw_survey_field_definitions
)
except Exception as err:
logger.exception(err)
logger.warning(err)
else:
logger.debug('Discovered {:s}'.format(category.raw_survey_table_name))
model_class = category_model_mapper.get(category.geom_type)
## Final geometries
try:
if model_class:
survey_field_definitions = {
'category_name': (ClassVar[str], category.name),
'group_name': (ClassVar[str], category.category_group.name),
'raw_store_name': (ClassVar[str], raw_store_name),
'viewable_role': (ClassVar[str], category.viewable_role),
'symbol': (ClassVar[str], category.symbol),
#'raw_model': (str, self.raw_survey_models.get(raw_store_name)),
# 'icon': (str, f'{survey.schema}-{category.table_name}'),
}
self.survey_models[store_name] = create_model(
__base__= model_class,
__model_name=category.table_name,
__cls_kwargs__={
'table': True,
'__tablename__': category.table_name,
},
**survey_field_definitions,
)
except Exception as err:
logger.warning(err)
else:
logger.debug('Discovered {:s}'.format(category.table_name))
logger.info('Discovered {:d} models'.format(len(categories)))
def scan(self) -> None:
"""
Scan all models defined explicitely (not the survey ones,
which are defined by categories), and store them for reference.
"""
logger.debug('scan')
## Scan the models defined in modules
for module_name, module in import_submodules(models).items():
if module_name.rsplit('.', 1)[-1] in (
'geo_models_base',
'models_base',
):
continue
for name in dir(module):
obj = getattr(module, name)
if hasattr(obj, '__module__') and obj.__module__.startswith(module.__name__)\
and hasattr(obj, '__tablename__') and hasattr(obj, 'get_store_name'):
geom_type = self.add_model(obj)
logger.debug(f'Model {obj.get_store_name()} added in the registry from gisaf source tree as {geom_type}')
## Scan the models defined in plugins (setuptools' entry points)
for module_name, model in self.scan_entry_points(name='gisaf_extras.models').items():
geom_type = self.add_model(model)
logger.debug(f'Model {model.get_store_name()} added in the registry from {module_name} entry point as {geom_type}')
for module_name, store in self.scan_entry_points(name='gisaf_extras.stores').items():
self.add_store(store)
logger.debug(f'Store {store} added in the registry from {module_name} gisaf_extras.stores entry point')
## Add misc models
for module in misc, category_module, project, reconcile, map_bases, tags:
for name in dir(module):
obj = getattr(module, name)
if hasattr(obj, '__module__') and hasattr(obj, '__tablename__'):
self.misc[name] = obj
async def build(self) -> None:
"""
Build the registry: organize all models in a common reference point.
This should be executed after the discovery of surey models (categories)
and the scan of custom/module defined models.
"""
logger.debug('build')
## Combine all geom models (auto and custom)
self.geom = {**self.survey_models, **self.geom_custom}
await self.make_stores()
## Some lists of table, by usage
## XXX: Gino: doesn't set __tablename__ and __table__ , or engine not started???
## So, hack the table names of auto_geom
#self.geom_tables = [model.__tablename__
#self.geom_tables = [getattr(model, "__tablename__", None)
# for model in sorted(list(self.geom.values()),
# key=lambda a: a.z_index)]
values_tables = [model.__tablename__ for model in self.values.values()]
other_tables = [model.__tablename__ for model in self.other.values()]
self.data_tables = values_tables + other_tables
self.populate_values_for_model()
self.make_menu()
def populate_values_for_model(self):
'''
Build a dict for quick access to the values from a model
'''
self.values_for_model = {}
for model_value in self.values.values():
for relationship in inspect(model_value).relationships:
model = self.stores.loc[relationship.target.fullname, 'model']
if model not in self.values_for_model:
self.values_for_model[model] = []
self.values_for_model[model].append(model_value)
def scan_entry_points(self, name):
"""
Get the entry points in gisaf_extras.models, and return their models
:return: dict of name: models
"""
named_objects = {}
for entry_point in entry_points().select(group=name):
try:
named_objects.update({entry_point.name: entry_point.load()})
except ModuleNotFoundError as err:
logger.warning(err)
return named_objects
def add_model(self, model) -> Literal['GeoModel', 'PlottableModel', 'Other model']:
"""
Add the model to its proper dict for reference, return the type
"""
# if not hasattr(model, 'get_store_name'):
# raise NotInRegistry()
table_name = model.get_store_name()
if issubclass(model, GeoModel) and not \
issubclass(model, RawSurveyBaseModel) and not model.hidden:
self.geom_custom[table_name] = model
return 'GeoModel'
elif issubclass(model, PlottableModel):
self.values[table_name] = model
return 'PlottableModel'
else:
self.other[table_name] = model
return 'Other model'
def add_store(self, store) -> None:
self.geom_custom_store[store.name] = store
def make_menu(self):
"""
Build the Admin menu
:return:
"""
self.menu = defaultdict(list)
for name, model in self.stores.model.items():
if hasattr(model, 'Admin'):
self.menu[model.Admin.menu].append(model)
# def get_raw_survey_model_mapping(self):
# """
# Get a mapping of category_name -> model for categories
# :return: dict of name -> model (class)
# """
# ## TODO: add option to pass a single item
# ## Local imports, avoiding cyclic dependencies
# ## FIXME: Gino
# categories = db.session.query(Category)
# return {category.name: self.raw_survey_models[category.table_name]
# for category in categories
# if self.raw_survey_models.get(category.table_name)}
async def get_model_id_params(self, model, id):
"""
Return the parameters for this item (table name, id), displayed in info pane
"""
if not model:
return {}
item = await model.load(**model.get_join_with()).query.where(model.id==id).gino.first()
if not item:
return {}
resp = {}
resp['itemName'] = item.caption
resp['geoInfoItems'] = await item.get_geo_info()
resp['surveyInfoItems'] = await item.get_survey_info()
resp['infoItems'] = await item.get_info()
resp['tags'] = await item.get_tags()
if hasattr(item, 'get_categorized_info'):
resp['categorized_info_items'] = await item.get_categorized_info()
if hasattr(item, 'get_graph'):
resp['graph'] = item.get_graph()
if hasattr(item, 'Attachments'):
if hasattr(item.Attachments, 'files'):
resp['files'] = await item.Attachments.files(item)
if hasattr(item.Attachments, 'images'):
resp['images'] = await item.Attachments.images(item)
if hasattr(item, 'get_external_record_url'):
resp['externalRecordUrl'] = item.get_external_record_url()
return resp
async def make_stores(self):
"""
Make registry for primary groups, categories and survey stores using Pandas dataframes.
Used in GraphQl queries.
"""
## Utility functions used with apply method (dataframes)
def fill_columns_from_custom_models(row) -> tuple[str, str, str]:
return (
row.model.__name__,
row.model.description,
row.model.metadata.schema
)
def fill_columns_from_custom_stores(row) -> tuple[str, str, None]:
return (
row.model.description,
row.model.description,
None ## Schema
)
def get_store_name(category) -> str:
fragments = ['V', category.group, category.minor_group_1]
if category.minor_group_2 != '----':
fragments.append(category.minor_group_2)
return '.'.join([
survey.schema,
'_'.join(fragments)
])
self.categories = await Category.get_df()
self.categories['title'] = self.categories.long_name.fillna(self.categories.description)
self.categories['store'] = self.categories.apply(get_store_name, axis=1)
self.categories['count'] = pd.Series(dtype=pd.Int64Dtype())
self.categories.set_index('name', inplace=True)
df_models = pd.DataFrame(self.geom.items(),
columns=['store', 'model']
).set_index('store')
df_raw_models = pd.DataFrame(self.raw_survey_models.items(),
columns=('store', 'raw_model')
).set_index('store')
self.categories = self.categories.merge(df_models, left_on='store', right_index=True)
self.categories = self.categories.merge(df_raw_models, left_on='store', right_index=True)
self.categories['custom'] = False
self.categories['is_db'] = True
self.categories.sort_index(inplace=True)
# self.categories['name_letter'] = self.categories.index.str.slice(0, 1)
# self.categories['name_number'] = self.categories.index.str.slice(1).astype('int64')
# self.categories.sort_values(['name_letter', 'name_number'], inplace=True)
## Set in the stores dataframe some useful properties, from the model class
## Maybe at some point it makes sense to get away from class-based definitions
if len(self.categories) > 0:
## XXX: redundant self.categories['store_name'] with self.categories['store']
#self.categories['store_name'] = self.categories.apply(
# lambda row: row.model.get_store_name(),
# axis=1
#)
#self.categories['raw_model_store_name'] = self.categories.apply(
# lambda row: row.raw_model.store_name,
# axis=1
#)
self.categories['is_line_work'] = self.categories.apply(
lambda row: issubclass(row.model, LineWorkSurveyModel),
axis=1
)
else:
self.categories['store_name'] = None
self.categories['raw_model_store_name'] = None
self.categories['is_line_work'] = None
self.categories['raw_survey_model'] = None
## Custom models (Misc)
self.custom_models = pd.DataFrame(
self.geom_custom.items(),
columns=['store', 'model']
).set_index('store')
self.custom_models['group'] = 'Misc'
self.custom_models['custom'] = True
self.custom_models['is_db'] = True
self.custom_models['raw_model_store_name'] = ''
self.custom_models['in_menu'] = self.custom_models.apply(
lambda row: getattr(row.model, 'in_menu', True),
axis=1
)
self.custom_models = self.custom_models.loc[self.custom_models.in_menu]
self.custom_models['auto_import'] = False
self.custom_models['is_line_work'] = False
if len(self.custom_models) > 0:
self.custom_models['long_name'],\
self.custom_models['custom_description'],\
self.custom_models['db_schema'],\
= zip(*self.custom_models.apply(fill_columns_from_custom_models, axis=1))
## Try to give a meaningful description, eg. including the source (db_schema)
self.custom_models['description'] = self.custom_models['custom_description'].fillna(self.custom_models['long_name'] + '-' + self.custom_models['db_schema'])
self.custom_models['title'] = self.custom_models['long_name']
## Custom stores (Community)
self.custom_stores = pd.DataFrame(
self.geom_custom_store.items(),
columns=['store', 'model']
).set_index('store')
self.custom_stores['group'] = 'Community'
self.custom_stores['custom'] = True
self.custom_stores['is_db'] = False
if len(self.custom_stores) == 0:
self.custom_stores['in_menu'] = False
else:
self.custom_stores['in_menu'] = self.custom_stores.apply(
lambda row: getattr(row.model, 'in_menu', True),
axis=1
)
self.custom_stores = self.custom_stores.loc[self.custom_stores.in_menu]
self.custom_stores['auto_import'] = False
self.custom_stores['is_line_work'] = False
if len(self.custom_stores) > 0:
self.custom_stores['long_name'],\
self.custom_stores['description'],\
self.custom_stores['db_schema'],\
= zip(*self.custom_stores.apply(fill_columns_from_custom_stores, axis=1))
self.custom_stores['title'] = self.custom_stores['long_name']
## Combine Misc (custom) and survey (auto) stores
## Retain only one status per category (defaultStatus, 'E'/existing by default)
self.stores = pd.concat([
self.categories[self.categories.status==conf.map.defaultStatus[0]].reset_index().set_index('store').sort_values('title'),
self.custom_models,
self.custom_stores
])#.drop(columns=['store_name'])
self.stores['in_menu'] = self.stores['in_menu'].astype(bool)
## Set in the stores dataframe some useful properties, from the model class
## Maybe at some point it makes sense to get away from class-based definitions
def fill_columns_from_model(row):
return (
# row.model.icon,
# row.model.symbol,
row.model.mapbox_type, # or None,
row.model.base_gis_type,
row.model.z_index,
row.model.attribution,
)
# self.stores['icon'],\
# self.stores['symbol'],\
self.stores['mapbox_type_default'], \
self.stores['base_gis_type'], \
self.stores['z_index'], \
self.stores['attribution'] \
= zip(*self.stores.apply(fill_columns_from_model, axis=1))
#self.stores['mapbox_type_custom'] = self.stores['mapbox_type_custom'].replace('', np.nan).fillna(np.nan)
self.stores['mapbox_type'] = self.stores['mapbox_type_custom'].fillna(
self.stores['mapbox_type_default']
)
self.stores['viewable_role'] = self.stores.apply(
lambda row: getattr(row.model, 'viewable_role', None),
axis=1,
)
self.stores['viewable_role'].replace('', None, inplace=True)
#self.stores['gql_object_type'] = self.stores.apply(make_model_gql_object_type, axis=1)
self.stores['is_live'] = False
self.stores['description'].fillna('', inplace=True)
## Layer groups: Misc, survey's primary groups, Live
self.primary_groups = await CategoryGroup.get_df()
self.primary_groups.sort_values('name', inplace=True)
self.primary_groups['title'] = self.primary_groups['long_name']
## Add Misc and Live
self.primary_groups.loc[-1] = (
'Misc',
False,
'Misc and old layers (not coming from our survey; they will be organized, '
'eventually as the surveys get more complete)',
'Misc',
)
self.primary_groups.index = self.primary_groups.index + 1
self.primary_groups.loc[len(self.primary_groups)] = (
'Live',
False,
'Layers from data processing, sensors, etc, and are updated automatically',
'Live',
)
self.primary_groups.loc[len(self.primary_groups)] = (
'Community',
False,
'Layers from community',
'Community',
)
self.primary_groups.sort_index(inplace=True)
#def make_group(group):
# return GeomGroup(
# name=group['name'],
# title=group['title'],
# description=group['long_name']
# )
#self.primary_groups['gql_object_type'] = self.primary_groups.apply(make_group, axis=1)
await self.update_stores_counts()
async def get_stores(self):
"""
Get information about the available stores
"""
raise DeprecationWarning('get_stores was for graphql')
async def update_stores_counts(self):
"""
Update the counts of the stores fro the DB
"""
query = "SELECT schemaname, relname, n_live_tup FROM pg_stat_user_tables"
# async with db.acquire(reuse=False) as connection:
async with db_session() as session:
rows = await session.exec(text(query))
all_tables_count = pd.DataFrame(rows, columns=['schema', 'table', 'count'])
all_tables_count['store'] = all_tables_count['schema'] + '.' + all_tables_count['table']
all_tables_count.set_index(['store'], inplace=True)
## TODO: a DB VACUUM can be triggered if all counts are 0?
## Update the count in registry's stores
self.stores.loc[:, 'count'] = all_tables_count['count']
# ## FIXME: count for custom stores
# store_df = self.stores.loc[(self.stores['count'] != 0) | (self.stores['is_live'])]
# def set_count(row):
# row.gql_object_type.count = row['count']
# store_df[store_df.is_db].apply(set_count, axis=1)
# return store_df.gql_object_type.to_list()
def update_live_layers(self):
"""
Update the live layers, using the list of model definitions found in
self.geom_live_defs, which is normally updated by the redis store
"""
## Remove existing live layers
self.geom_live = {}
self.stores.drop(self.stores[self.stores.is_live == True].index, # noqa: E712
inplace=True)
df_live = pd.DataFrame.from_dict(self.geom_live_defs.values(),
orient='columns'
)
if len(df_live) == 0:
return
df_live.set_index('store', inplace=True)
## Adjust column names
## and add columns, to make sure pandas dtypes are not changed when the
## dataframes are concat
## TODO: standardize names across the whole workflow,
## then remove the rename below:
df_live.rename(
columns={
'live': 'is_live',
'zIndex': 'z_index',
'gisType': 'geom_type',
# 'type': 'mapbox_type',
'viewableRole': 'viewable_role',
}, inplace=True
)
## Add columns
df_live['auto_import'] = False
df_live['base_gis_type'] = df_live['gis_type']
df_live['custom'] = False
df_live['group'] = ''
df_live['in_menu'] = True
df_live['is_db'] = False
df_live['is_line_work'] = False
df_live['long_name'] = df_live['name']
df_live['mapbox_type_custom'] = df_live['mapbox_type']
df_live['minor_group_1'] = ''
df_live['minor_group_2'] = ''
df_live['status'] = 'E'
df_live['style'] = None
df_live['title'] = df_live['name']
registry.stores = pd.concat([registry.stores, df_live])
for store, model_info in self.geom_live_defs.items():
## Add provided live layers in the stores df
# Create the pydantic model
# NOTE: Unused at this point, but might be usedful
field_definitions = {
k: (ClassVar[v.__class__], v)
for k, v in model_info.items()
}
self.geom_live[store] = create_model(
__model_name=store,
__base__= LiveGeoModel,
**field_definitions
)
# Accessible as global
registry: ModelRegistry = ModelRegistry()
## Below, some unused code, maybe to be used later for displaying layers in a tree structure
## Some magic for making a tree from enumarables,
## https://gist.github.com/hrldcpr/2012250
#Tree = lambda: defaultdict(Tree)
#
#
#def add(t, path):
# for node in path:
# t = t[node]
#
#
#dicts = lambda t: {k: dicts(t[k]) for k in t}
#
#
#def get_geom_models_tree():
# tree = Tree()
# for model in models.geom_custom:
# full_name = model.__module__[len('gisaf.models')+1:]
# add(tree, full_name.split('.'))
# add(tree, full_name.split('.') + [model])
# return dicts(tree)