diff --git a/src/gisaf/database.py b/src/gisaf/database.py index 9767af6..c0e7b36 100644 --- a/src/gisaf/database.py +++ b/src/gisaf/database.py @@ -1,11 +1,14 @@ from contextlib import asynccontextmanager -from typing import Annotated +from typing import Annotated, Literal, Any from collections.abc import AsyncGenerator from sqlalchemy.ext.asyncio import create_async_engine +from sqlalchemy.orm import joinedload, QueryableAttribute, InstrumentedAttribute +from sqlmodel import SQLModel, select from sqlmodel.ext.asyncio.session import AsyncSession from fastapi import Depends import pandas as pd +import geopandas as gpd from gisaf.config import conf @@ -28,4 +31,52 @@ async def db_session() -> AsyncGenerator[AsyncSession]: def pandas_query(session, query): return pd.read_sql_query(query, session.connection()) +def geopandas_query(session, query, *, crs=None, cast=True): + return gpd.GeoDataFrame.from_postgis(query, session.connection(), crs=crs) + +class BaseModel(SQLModel): + @classmethod + def selectinload(cls) -> list[Literal['*'] | QueryableAttribute[Any]]: + return [] + + @classmethod + async def get_df(cls, where=None, with_related=True, **kwargs) -> pd.DataFrame: + return await cls._get_df(pandas_query, where=None, with_related=True, **kwargs) + + @classmethod + async def get_gdf(cls, *, where=None, with_related=True, **kwargs) -> gpd.GeoDataFrame: + return await cls._get_df(geopandas_query, where=None, with_related=True, **kwargs) + + @classmethod + async def _get_df(cls, method, *, where=None, with_related=True, **kwargs) -> pd.DataFrame | gpd.GeoDataFrame: + async with db_session() as session: + query = select(cls) + if where is not None: + query.append_whereclause(where) + ## Get the joined tables + joined_tables = cls.selectinload() + if with_related and len(joined_tables) > 0: + query = query.options(*(joinedload(jt) for jt in joined_tables)) + df = await session.run_sync(method, query, **kwargs) + ## Chamge column names to reflect the joined tables + ## Leave the first columns unchanged, as their names come straight + ## from the model's fields + joined_columns = list(df.columns[len(cls.model_fields):]) + renames: dict[str, str] = {} + ## Match colum names with the joined tables + ## Important: this assumes that orders of the joined tables + ## and their columns is preserved by pandas' read_sql + for joined_table in joined_tables: + target = joined_table.property.target # type: ignore + target_name = target.name + for col in target.columns: + ## Pop the column from the colujmn list and make a new name + renames[joined_columns.pop(0)] = f'{target.schema}_{target_name}_{col.name}' + df.rename(columns=renames, inplace=True) + ## Finally, set the index of the df as the index of cls + df.set_index([c.name for c in cls.__table__.primary_key.columns], # type: ignore + inplace=True) + return df + + fastapi_db_session = Annotated[AsyncSession, Depends(get_db_session)] \ No newline at end of file diff --git a/src/gisaf/geoapi.py b/src/gisaf/geoapi.py index 12927e4..73ca32f 100644 --- a/src/gisaf/geoapi.py +++ b/src/gisaf/geoapi.py @@ -120,15 +120,16 @@ async def get_geojson(store_name, raise err except Exception as err: logger.exception(err) - raise status.HTTP_500_INTERNAL_SERVER_ERROR + raise err #status.HTTP_500_INTERNAL_SERVER_ERROR ## The query of category defined models gets the status (not sure how and this could be skipped) ## Other models do not have: just add it manually from the model itself if 'status' not in gdf.columns: gdf['status'] = model.status if 'popup' not in gdf.columns: gdf['popup'] = await model.get_popup(gdf) + # Add properties properties = await model.get_properties(gdf) - columns = ['geometry', 'status', 'popup'] + columns = ['geom', 'status', 'popup'] for property, values in properties.items(): columns.append(property) gdf[property] = values diff --git a/src/gisaf/ipynb_tools.py b/src/gisaf/ipynb_tools.py index c456aee..8efd0d6 100644 --- a/src/gisaf/ipynb_tools.py +++ b/src/gisaf/ipynb_tools.py @@ -285,7 +285,7 @@ class Gisaf: ## Drop existing if replace_all: - engine.execute('DELETE FROM "{}"."{}"'.format(model.metadata.schema, model.__tablename__)) + engine.execute('DELETE FROM "{}"'.format(model.__table__.fullname)) else: raise NotImplementedError('ipynb_tools.Gisaf.to_layer does not support updates yet') diff --git a/src/gisaf/models/category.py b/src/gisaf/models/category.py index 8045af6..4d1b092 100644 --- a/src/gisaf/models/category.py +++ b/src/gisaf/models/category.py @@ -2,10 +2,10 @@ from typing import Any, ClassVar from sqlalchemy import String from pydantic import computed_field, ConfigDict -from sqlmodel import Field, Relationship, SQLModel, JSON, TEXT, select +from sqlmodel import Field, Relationship, JSON, TEXT from gisaf.models.metadata import gisaf_survey -from gisaf.database import db_session, pandas_query +from gisaf.database import BaseModel mapbox_type_mapping = { 'Point': 'symbol', @@ -13,13 +13,6 @@ mapbox_type_mapping = { 'Polygon': 'fill', } -class BaseModel(SQLModel): - @classmethod - async def get_df(cls): - async with db_session() as session: - query = select(cls) - return await session.run_sync(pandas_query, query) - class CategoryGroup(BaseModel, table=True): metadata = gisaf_survey @@ -45,7 +38,7 @@ class CategoryModelType(BaseModel, table=True): class CategoryBase(BaseModel): - model_config = ConfigDict(protected_namespaces=()) # type: ignore + # model_config = ConfigDict(protected_namespaces=()) # type: ignore class Admin: menu = 'Other' flask_admin_model_view = 'CategoryModelView' @@ -53,20 +46,20 @@ class CategoryBase(BaseModel): name: str | None = Field(default=None, primary_key=True) domain: ClassVar[str] = 'V' description: str | None - group: str = Field(min_length=4, max_length=4, + group: str = Field(sa_type=String(4), foreign_key="category_group.name", index=True) - minor_group_1: str = Field(min_length=4, max_length=4, default='----') - minor_group_2: str = Field(min_length=4, max_length=4, default='----') - status: str = Field(min_length=1, max_length=1) + minor_group_1: str = Field(sa_type=String(4), default='----') + minor_group_2: str = Field(sa_type=String(4), default='----') + status: str = Field(sa_type=String(1)) custom: bool | None auto_import: bool = True - gis_type: str = Field(max_length=50, + gis_type: str = Field(sa_type=String(50), foreign_key='category_model_type.name', default='Point') - long_name: str | None = Field(max_length=50) + long_name: str | None = Field(sa_type=String(50)) style: str | None = Field(sa_type=TEXT) - symbol: str | None = Field(max_length=1) - mapbox_type_custom: str | None = Field(max_length=32) + symbol: str | None = Field(sa_type=String(1)) + mapbox_type_custom: str | None = Field(sa_type=String(12)) mapbox_paint: dict[str, Any] | None = Field(sa_type=JSON(none_as_null=True)) mapbox_layout: dict[str, Any] | None = Field(sa_type=JSON(none_as_null=True)) viewable_role: str | None diff --git a/src/gisaf/models/geo_models_base.py b/src/gisaf/models/geo_models_base.py index 8533086..9182414 100644 --- a/src/gisaf/models/geo_models_base.py +++ b/src/gisaf/models/geo_models_base.py @@ -12,7 +12,7 @@ import geopandas as gpd # type: ignore import shapely # type: ignore import pyproj -from sqlmodel import SQLModel, Field +from sqlmodel import SQLModel, Field, Relationship from sqlmodel.ext.asyncio.session import AsyncSession from pydantic import BaseModel @@ -83,9 +83,13 @@ class BaseSurveyModel(BaseModel): """ id: int | None = Field(sa_type=BigInteger, primary_key=True, default=None) equip_id: int = Field(foreign_key='equipment.id') + # equipment: Equipment = Relationship() srvyr_id: int = Field('surveyor.id') + # surveyor: Surveyor = Relationship() accur_id: int = Field('accuracy.id') + # accuracy: Accuracy = Relationship() project_id: int = Field('project.id') + # project: Project = Relationship() orig_id: str date: date @@ -140,7 +144,7 @@ class SurveyModel(BaseSurveyModel): Base mixin class for defining final (reprojected) survey data, with a status """ metadata: ClassVar[MetaData] = survey - status: ClassVar[str] = Field(sa_type=String(1)) + # status: str = Field(sa_type=String(1)) get_gdf_with_related: ClassVar[bool] = False @@ -251,7 +255,7 @@ class SurveyModel(BaseSurveyModel): query = sql_query_for_geojson.format( model=cls, category=category, - schema=cls.metadata.schema, + schema=cls.__table__.schema, #table=cls.__tablename__, # FIXME: should be __tablename__, but see SQLModel.__tablename__ which use lower(__name__) table=cls.__name__, @@ -282,12 +286,10 @@ class SurveyModel(BaseSurveyModel): ] -class GeoModel(Model): +class GeoModelNoStatus(Model): """ Base class for all geo models """ - #__abstract__ = True - id: int | None = Field(default=None, primary_key=True) description: ClassVar[str] = '' @@ -337,11 +339,6 @@ class GeoModel(Model): Style for the model, used in the map, etc """ - # status: ClassVar[str] = 'E' - # """ - # Status (ISO layers definition) of the layer. E -> Existing. - # """ - _join_with: ClassVar[dict[str, Any]] = { } """ @@ -474,7 +471,7 @@ class GeoModel(Model): shapely_geom = self.shapely_geom if simplify_tolerance: - shapely_geom = shapely_geom.simplify(simplify_tolerance / conf.geo['simplify_geom_factor'], + shapely_geom = shapely_geom.simplify(simplify_tolerance / conf.geo.simplify_geom_factor, preserve_topology=False) if shapely_geom.is_empty: raise NoPoint @@ -682,7 +679,7 @@ class GeoModel(Model): if not gpd.options.use_pygeos: logger.warn(f'Using get_geos_df for {cls} but gpd.options.use_pygeos not set') if not crs: - crs = conf.crs['geojson'] + crs = conf.crs.geojson df = await cls.get_df(where=where, **kwargs) df.set_index('id', inplace=True) df.rename(columns={'geom': 'wkb'}, inplace=True) @@ -694,78 +691,84 @@ class GeoModel(Model): @classmethod async def get_geo_df(cls, where=None, crs=None, reproject=False, - filter_columns=False, with_popup=False, **kwargs): + filter_columns=False, with_popup=False, **kwargs) -> gpd.GeoDataFrame: """ - Return a Pandas dataframe of all records + Return a GeoPandas GeoDataFrame of all records :param where: where clause for the query (eg. Model.attr=='foo') :param crs: coordinate system (eg. 'epsg:4326') (priority over the reproject parameter) :param reproject: should reproject to conf.srid_for_proj :return: """ - df = await cls.get_df(where=where, **kwargs) - df.dropna(subset=['geom'], inplace=True) - df.set_index('id', inplace=True) - df.sort_index(inplace=True) - df_clean = df[df.geom != None] - ## Drop coordinates - df_clean.drop(columns=set(df_clean.columns).intersection(['ST_X_1', 'ST_Y_1', 'ST_Z_1']), - inplace=True) - if not crs: - crs = conf.crs['geojson'] + return await cls.get_gdf(where=where, **kwargs) - ## XXX: is it right? Waiting for application.py to remove pygeos support to know more. - if getattr(gpd.options, 'use_pygeos', False): - geometry = shapely.from_wkb(df_clean.geom) - else: - geometry = [wkb.loads(geom) for geom in df_clean.geom] + # df = await cls.get_df(where=where, **kwargs) + # df.dropna(subset=['geom'], inplace=True) + # df.set_index('id', inplace=True) + # df.sort_index(inplace=True) + # df_clean = df[~df.geom.isna()] + # ## Drop coordinates + # df_clean.drop(columns=set(df_clean.columns).intersection(['ST_X_1', 'ST_Y_1', 'ST_Z_1']), + # inplace=True) + # if not crs: + # crs = conf.crs.geojson - gdf = gpd.GeoDataFrame( - df_clean.drop('geom', axis=1), - crs=crs, - geometry=geometry - ) + # ## XXX: is it right? Waiting for application.py to remove pygeos support to know more. + # # if getattr(gpd.options, 'use_pygeos', False): + # # geometry = shapely.from_wkb(df_clean.geom) + # # else: + # # geometry = [wkb.loads(geom) for geom in df_clean.geom] - if hasattr(cls, 'simplify') and cls.simplify: - #shapely_geom = shapely_geom.simplify(simplify_tolerance / conf.geo['simplify_geom_factor'], - #preserve_topology=False) - gdf['geometry'] = gdf['geometry'].simplify( - float(cls.simplify) / conf.geo['simplify_geom_factor'], - preserve_topology=False) + # gdf = gpd.GeoDataFrame( + # df_clean.drop('geom', axis=1), + # crs=crs, + # geometry=geometry + # ) - if reproject: - gdf.to_crs(crs=conf.crs['for_proj'], inplace=True) + # if hasattr(cls, 'simplify') and cls.simplify: + # #shapely_geom = shapely_geom.simplify(simplify_tolerance / conf.geo.simplify_geom_factor, + # #preserve_topology=False) + # gdf['geometry'] = gdf['geometry'].simplify( + # float(cls.simplify) / conf.geo.simplify_geom_factor, + # preserve_topology=False) - ## Filter out columns - if filter_columns: - gdf.drop(columns=set(gdf.columns).intersection(cls.filtered_columns_on_map), - inplace=True) + # if reproject: + # gdf.to_crs(crs=conf.crs.for_proj, inplace=True) - if with_popup: - gdf['popup'] = await cls.get_popup(gdf) + # ## Filter out columns + # if filter_columns: + # gdf.drop(columns=set(gdf.columns).intersection(cls.filtered_columns_on_map), + # inplace=True) - return gdf + # if with_popup: + # gdf['popup'] = await cls.get_popup(gdf) + + # return gdf @classmethod def get_attachment_dir(cls): - return f'{cls.__table__.schema}.{cls.__table__.name}' + return cls.__table__.fullname @classmethod def get_attachment_base_dir(cls): - return Path(conf.attachments['base_dir'])/cls.get_attachment_dir() + return Path(conf.attachments.base_dir) / cls.get_attachment_dir() + +class GeoModel(GeoModelNoStatus): + status: ClassVar[str] = 'E' + """ + Status (ISO layers definition) of the layer. E -> Existing. + """ class LiveGeoModel(GeoModel): - status: ClassVar[str] = 'E' store: ClassVar[str] group: ClassVar[str] ='Live' custom: ClassVar[bool] = True is_live: ClassVar[bool] = True is_db: ClassVar[bool] = False -class Geom(str): - pass +# class Geom(str): +# pass -class GeoPointModel(GeoModel): - #__abstract__ = True +class GeoPointModelNoStatus(GeoModelNoStatus): shapefile_model: ClassVar[int] = POINT ## geometry typing, see https://stackoverflow.com/questions/77333100/geoalchemy2-geometry-schema-for-pydantic-fastapi geom: Annotated[str, WKBElement] = Field(sa_type=Geometry('POINT', srid=conf.geo.srid)) @@ -827,9 +830,11 @@ class GeoPointModel(GeoModel): info['latitude'] = '{:.6f}'.format(self.shapely_geom.y) return info +class GeoPointModel(GeoPointModelNoStatus, GeoModel): + ... + class GeoPointZModel(GeoPointModel): - #__abstract__ = True geom: Annotated[str, WKBElement] = Field(sa_type=Geometry('POINTZ', dimension=3, srid=conf.geo.srid)) shapefile_model: ClassVar[int] = POINTZ @@ -843,13 +848,11 @@ class GeoPointZModel(GeoPointModel): class GeoPointMModel(GeoPointZModel): - #__abstract__ = True shapefile_model: ClassVar[int] = POINTZ geom: Annotated[str, WKBElement] = Field(sa_type=Geometry('POINTZ', dimension=3, srid=conf.geo.srid)) class GeoLineModel(GeoModel): - #__abstract__ = True shapefile_model: ClassVar[int] = POLYLINE geom: Annotated[str, WKBElement] = Field(sa_type=Geometry('LINESTRING', srid=conf.geo.srid)) mapbox_type: ClassVar[str] = 'line' @@ -913,7 +916,6 @@ class GeoLineModel(GeoModel): class GeoLineModelZ(GeoLineModel): - #__abstract__ = True shapefile_model: ClassVar[int] = POLYLINEZ geom: Annotated[str, WKBElement] = Field(sa_type=Geometry('LINESTRINGZ', dimension=3, srid=conf.geo.srid)) @@ -930,7 +932,6 @@ class GeoLineModelZ(GeoLineModel): class GeoPolygonModel(GeoModel): - #__abstract__ = True shapefile_model: ClassVar[int] = POLYGON geom: Annotated[str, WKBElement] = Field(sa_type=Geometry('POLYGON', srid=conf.geo.srid)) mapbox_type: ClassVar[str] = 'fill' @@ -1002,7 +1003,6 @@ class GeoPolygonModel(GeoModel): class GeoPolygonModelZ(GeoPolygonModel): - #__abstract__ = True shapefile_model: ClassVar[int] = POLYGONZ geom: Annotated[str, WKBElement] = Field(sa_type=Geometry('POLYGONZ', dimension=3, srid=conf.geo.srid)) @@ -1023,17 +1023,13 @@ class GeoPolygonModelZ(GeoPolygonModel): class GeoPointSurveyModel(SurveyModel, GeoPointMModel): - #__abstract__ = True - ## raw_model is set in category_models_maker.make_category_models - raw_model: ClassVar['RawSurveyBaseModel'] = None + raw_model: ClassVar['RawSurveyBaseModel'] class LineWorkSurveyModel(SurveyModel): - #__abstract__ = True - ## raw_model is set in category_models_maker.make_category_models - raw_model: ClassVar['RawSurveyBaseModel'] = None + raw_model: ClassVar['RawSurveyBaseModel'] def match_raw_points(self): reprojected_geom = transform(reproject_func, self.shapely_geom) @@ -1046,20 +1042,17 @@ class LineWorkSurveyModel(SurveyModel): class GeoLineSurveyModel(LineWorkSurveyModel, GeoLineModelZ): - #__abstract__ = True pass class GeoPolygonSurveyModel(LineWorkSurveyModel, GeoPolygonModelZ): - #__abstract__ = True pass -class RawSurveyBaseModel(BaseSurveyModel, GeoPointMModel): +class RawSurveyBaseModel(BaseSurveyModel, GeoPointModelNoStatus): """ Abstract base class for category based raw survey point models """ - #__abstract__ = True metadata: ClassVar[MetaData] = raw_survey geom: Annotated[str, WKBElement] = Field(sa_type=Geometry('POINTZ', dimension=3, srid=conf.geo.raw_survey.srid)) @@ -1070,7 +1063,7 @@ class RawSurveyBaseModel(BaseSurveyModel, GeoPointMModel): @classmethod async def get_geo_df(cls, *args, **kwargs): - return await super().get_geo_df(crs=conf.raw_survey['spatial_sys_ref'], + return await super().get_geo_df(crs=conf.raw_survey.spatial_sys_ref, *args, **kwargs) @@ -1086,8 +1079,6 @@ class PlottableModel(Model): to be used (the first one being the default) * OR an ordereed dict of value => resampling method """ - #__abstract__ = True - float_format: ClassVar[str] = '%.1f' values: ClassVar[list[dict[str, str]]] = [] @@ -1117,8 +1108,6 @@ class PlottableModel(Model): class TimePlottableModel(PlottableModel): - #__abstract__ = True - time: datetime @classmethod diff --git a/src/gisaf/models/models_base.py b/src/gisaf/models/models_base.py index b3b70fc..f438f30 100644 --- a/src/gisaf/models/models_base.py +++ b/src/gisaf/models/models_base.py @@ -1,7 +1,7 @@ -from typing import Any +from typing import Any, Dict, Tuple, Type, ClassVar import logging -from sqlmodel import Field, SQLModel, MetaData, JSON, TEXT, Relationship, Column +from sqlmodel import Field, SQLModel, MetaData, JSON, TEXT, Relationship, Column, select from pydantic import computed_field import numpy as np import pandas as pd @@ -10,6 +10,8 @@ import shapely from sqlalchemy.sql import sqltypes from geoalchemy2.types import Geometry +from gisaf.database import BaseModel + pandas_cast_map = { sqltypes.Integer: 'Int64', sqltypes.Float: 'float64', @@ -17,102 +19,118 @@ pandas_cast_map = { logger = logging.getLogger('model_base_base') -class Model(SQLModel): +class Model(BaseModel): """ Base mixin class for models that can be converted to a Pandas dataframe with get_df """ + # status: ClassVar[str] = 'E' + + def __new__(cls, *args, **kwargs): + if not hasattr(cls, 'query'): + cls.query = select(cls) + return super().__new__(cls, *args, **kwargs) class Meta: filtered_columns_on_map: list[str] = [] @classmethod def get_store_name(cls): - return "{}.{}".format(cls.metadata.schema, cls.__tablename__) + if hasattr(cls, '__table__'): + return cls.__table__.fullname + elif hasattr(cls, '__table_args__') and 'schema' in cls.__table_args__: + return f"{cls.__table_args__.schema}.{cls.__tablename__}" + else: + return f'{cls.metadata.schema}.{cls.__tablename__}' @classmethod def get_table_name_prefix(cls): return "{}_{}".format(cls.metadata.schema, cls.__tablename__) - @classmethod - async def get_df(cls, where=None, - with_related=None, recursive=True, - cast=True, - with_only_columns=None, - geom_as_ewkt=False, - **kwargs): - """ - Return a Pandas dataframe of all records - Optional arguments: - * an SQLAlchemy where clause - * with_related: automatically get data from related columns, following the foreign keys in the model definitions - * cast: automatically transform various data in their best python types (eg. with date, time...) - * with_only_columns: fetch only these columns (list of column names) - * geom_as_ewkt: convert geometry columns to EWKB (handy for eg. using upsert_df) - :return: - """ - query = cls.query + # @classmethod + # async def get_df(cls, where=None, + # with_related=None, recursive=True, + # cast=True, + # with_only_columns=None, + # geom_as_ewkt=False, + # **kwargs): + # """ + # Return a Pandas dataframe of all records + # Optional arguments: + # * an SQLAlchemy where clause + # * with_related: automatically get data from related columns, following the foreign keys in the model definitions + # * cast: automatically transform various data in their best python types (eg. with date, time...) + # * with_only_columns: fetch only these columns (list of column names) + # * geom_as_ewkt: convert geometry columns to EWKB (handy for eg. using upsert_df) + # :return: + # """ + # breakpoint() + # if hasattr(cls, 'query'): + # query = cls.query + # else: + # query = select(cls) - if with_related is not False: - if with_related or getattr(cls, 'get_gdf_with_related', False): - joins = get_join_with(cls, recursive) - model_loader = cls.load(**joins) - query = _get_query_with_table_names(model_loader) + # # if with_related is not False: + # # if with_related or getattr(cls, 'get_gdf_with_related', False): + # # joins = get_join_with(cls, recursive) + # # model_loader = cls.load(**joins) + # # query = _get_query_with_table_names(model_loader) - if where is not None: - query.append_whereclause(where) + # if where is not None: + # query.append_whereclause(where) - if with_only_columns: - query = query.with_only_columns([getattr(cls, colname) for colname in with_only_columns]) + # if with_only_columns: + # query = query.with_only_columns([getattr(cls, colname) for colname in with_only_columns]) - ## Got idea from https://github.com/MagicStack/asyncpg/issues/173. - async with query.bind.raw_pool.acquire() as conn: - ## Convert hstore fields to dict - await conn.set_builtin_type_codec('hstore', codec_name='pg_contrib.hstore') + # ## Got idea from https://github.com/MagicStack/asyncpg/issues/173. + # breakpoint() + # async with query.bind.raw_pool.acquire() as conn: + # ## Convert hstore fields to dict + # await conn.set_builtin_type_codec('hstore', codec_name='pg_contrib.hstore') - compiled = query.compile() - stmt = await conn.prepare(compiled.string) - columns = [a.name for a in stmt.get_attributes()] - data = await stmt.fetch(*[compiled.params.get(param) for param in compiled.positiontup]) - df = pd.DataFrame(data, columns=columns) + # compiled = query.compile() + # stmt = await conn.prepare(compiled.string) + # columns = [a.name for a in stmt.get_attributes()] + # data = await stmt.fetch(*[compiled.params.get(param) for param in compiled.positiontup]) + # df = pd.DataFrame(data, columns=columns) - ## Convert primary key columns to Int64: - ## allows NaN, fixing type convertion to float with merge - for pk in [c.name for c in cls.__table__.primary_key.columns]: - if pk in df.columns and df[pk].dtype=='int64': - df[pk] = df[pk].astype('Int64') + # ## Convert primary key columns to Int64: + # ## allows NaN, fixing type convertion to float with merge + # for pk in [c.name for c in cls.__table__.primary_key.columns]: + # if pk in df.columns and df[pk].dtype=='int64': + # df[pk] = df[pk].astype('Int64') - if cast: - ## Cast the type for known types (datetime, ...) - for column_name in df.columns: - col = getattr(query.columns, column_name, None) - if col is None: - logger.debug(f'Cannot get column {column_name} in query for model {cls.__name__}') - continue - column_type = getattr(query.columns, column_name).type - ## XXX: Needs refinment, eg. nullable -> Int64 ... - if column_type.__class__ in pandas_cast_map: - df[column_name] = df[column_name].astype(pandas_cast_map[column_type.__class__]) - elif isinstance(column_type, (sqltypes.Date, sqltypes.DateTime)): - ## Dates, times - df[column_name] = pd.to_datetime(df[column_name]) - #elif isinstance(column_type, (sqltypes.Integer, sqltypes.Float)): - # ## Numeric - # df[column_name] = pd.to_numeric(df[column_name], errors='coerce') - ## XXX: keeping this note about that is about "char" SQL type, but the fix of #9694 makes it unnessary - #elif isinstance(column_type, sqltypes.CHAR) or (isinstance(column_type, sqltypes.String) and column_type.length == 1): - # ## Workaround for bytes being used for string of length 1 (not sure - why???) - # df[column_name] = df[column_name].str.decode('utf-8') + # if cast: + # ## Cast the type for known types (datetime, ...) + # for column_name in df.columns: + # col = getattr(query.columns, column_name, None) + # if col is None: + # logger.debug(f'Cannot get column {column_name} in query for model {cls.__name__}') + # continue + # column_type = getattr(query.columns, column_name).type + # ## XXX: Needs refinment, eg. nullable -> Int64 ... + # if column_type.__class__ in pandas_cast_map: + # df[column_name] = df[column_name].astype(pandas_cast_map[column_type.__class__]) + # elif isinstance(column_type, (sqltypes.Date, sqltypes.DateTime)): + # ## Dates, times + # df[column_name] = pd.to_datetime(df[column_name]) + # #elif isinstance(column_type, (sqltypes.Integer, sqltypes.Float)): + # # ## Numeric + # # df[column_name] = pd.to_numeric(df[column_name], errors='coerce') + # ## XXX: keeping this note about that is about "char" SQL type, but the fix of #9694 makes it unnessary + # #elif isinstance(column_type, sqltypes.CHAR) or (isinstance(column_type, sqltypes.String) and column_type.length == 1): + # # ## Workaround for bytes being used for string of length 1 (not sure - why???) + # # df[column_name] = df[column_name].str.decode('utf-8') - ## Rename the columns, removing the schema_table prefix for the columns in that model - prefix = cls.get_table_name_prefix() - prefix_length = len(prefix) + 1 - rename_map = {colname: colname[prefix_length:] for colname in df.columns if colname.startswith(prefix)} - df.rename(columns=rename_map, inplace=True) + # ## Rename the columns, removing the schema_table prefix for the columns in that model + # prefix = cls.get_table_name_prefix() + # prefix_length = len(prefix) + 1 + # rename_map = {colname: colname[prefix_length:] for colname in df.columns if colname.startswith(prefix)} + # df.rename(columns=rename_map, inplace=True) - ## Eventually convert geometry columns to EWKB - if geom_as_ewkt: - geometry_columns = [col.name for col in cls.__table__.columns if isinstance(col.type, Geometry)] - for column in geometry_columns: - df[column] = shapely.to_wkb(shapely.from_wkb(df.geom), hex=True, include_srid=True) + # ## Eventually convert geometry columns to EWKB + # if geom_as_ewkt: + # geometry_columns = [col.name for col in cls.__table__.columns if isinstance(col.type, Geometry)] + # for column in geometry_columns: + # df[column] = shapely.to_wkb(shapely.from_wkb(df.geom), hex=True, include_srid=True) - return df + # return df diff --git a/src/gisaf/redis_tools.py b/src/gisaf/redis_tools.py index 7d8e274..204e835 100644 --- a/src/gisaf/redis_tools.py +++ b/src/gisaf/redis_tools.py @@ -395,12 +395,11 @@ class Store: for store_name in missing_triger_tables: ## XXX: TODO: See https://stackoverflow.com/questions/7888846/trigger-in-sqlachemy model = registry.geom[store_name] - try: await session.exec(text( ttag_create_trigger.format( - schema=model.metadata.schema, - table=model.__tablename__) + schema=model.__table__.schema, + table=model.__table__.name) )) except UndefinedTableError: logger.warning(f'table {store_name} does not exist in ' diff --git a/src/gisaf/registry.py b/src/gisaf/registry.py index cb5638d..83e9ce4 100644 --- a/src/gisaf/registry.py +++ b/src/gisaf/registry.py @@ -108,8 +108,8 @@ class ModelRegistry: :return: """ logger.debug('make_registry') - await self.make_category_models() self.scan() + await self.make_category_models() await self.build() ## If ogcapi is in app (i.e. not with scheduler): ## Now that the models are refreshed, tells the ogcapi to (re)build @@ -368,7 +368,7 @@ class ModelRegistry: row.model.__name__, row.model.__name__, row.model.description, - row.model.metadata.schema, + row.model.__table__.schema, row.model.base_gis_type, row.model.style, row.model.symbol, @@ -396,7 +396,6 @@ class ModelRegistry: self.categories['store'] = self.categories.apply(get_store_name, axis=1) self.categories['count'] = pd.Series(dtype=pd.Int64Dtype()) - self.categories.set_index('name', inplace=True) df_models = pd.DataFrame(self.geom.items(), columns=['store', 'model'] @@ -552,24 +551,20 @@ class ModelRegistry: self.primary_groups['title'] = self.primary_groups['long_name'] ## Add Misc and Live - self.primary_groups.loc[-1] = ( - 'Misc', + self.primary_groups.loc['Misc'] = ( False, 'Misc and old layers (not coming from our survey; they will be organized, ' 'eventually as the surveys get more complete)', 'Misc', ) - self.primary_groups.index = self.primary_groups.index + 1 - self.primary_groups.loc[len(self.primary_groups)] = ( - 'Live', + self.primary_groups.loc['Live'] = ( False, 'Layers from data processing, sensors, etc, and are updated automatically', 'Live', ) - self.primary_groups.loc[len(self.primary_groups)] = ( - 'Community', + self.primary_groups.loc['Community'] = ( False, 'Layers from community', 'Community',