Migrate joins to sqlalchemy's query options
Use native pandas read_sql_query and geopandas from_postgis Fix definiiton of status in models Fix table names Fix category fields
This commit is contained in:
parent
956147aea8
commit
75bedb3e91
8 changed files with 236 additions and 190 deletions
|
@ -1,11 +1,14 @@
|
|||
from contextlib import asynccontextmanager
|
||||
from typing import Annotated
|
||||
from typing import Annotated, Literal, Any
|
||||
from collections.abc import AsyncGenerator
|
||||
|
||||
from sqlalchemy.ext.asyncio import create_async_engine
|
||||
from sqlalchemy.orm import joinedload, QueryableAttribute, InstrumentedAttribute
|
||||
from sqlmodel import SQLModel, select
|
||||
from sqlmodel.ext.asyncio.session import AsyncSession
|
||||
from fastapi import Depends
|
||||
import pandas as pd
|
||||
import geopandas as gpd
|
||||
|
||||
from gisaf.config import conf
|
||||
|
||||
|
@ -28,4 +31,52 @@ async def db_session() -> AsyncGenerator[AsyncSession]:
|
|||
def pandas_query(session, query):
|
||||
return pd.read_sql_query(query, session.connection())
|
||||
|
||||
def geopandas_query(session, query, *, crs=None, cast=True):
|
||||
return gpd.GeoDataFrame.from_postgis(query, session.connection(), crs=crs)
|
||||
|
||||
class BaseModel(SQLModel):
|
||||
@classmethod
|
||||
def selectinload(cls) -> list[Literal['*'] | QueryableAttribute[Any]]:
|
||||
return []
|
||||
|
||||
@classmethod
|
||||
async def get_df(cls, where=None, with_related=True, **kwargs) -> pd.DataFrame:
|
||||
return await cls._get_df(pandas_query, where=None, with_related=True, **kwargs)
|
||||
|
||||
@classmethod
|
||||
async def get_gdf(cls, *, where=None, with_related=True, **kwargs) -> gpd.GeoDataFrame:
|
||||
return await cls._get_df(geopandas_query, where=None, with_related=True, **kwargs)
|
||||
|
||||
@classmethod
|
||||
async def _get_df(cls, method, *, where=None, with_related=True, **kwargs) -> pd.DataFrame | gpd.GeoDataFrame:
|
||||
async with db_session() as session:
|
||||
query = select(cls)
|
||||
if where is not None:
|
||||
query.append_whereclause(where)
|
||||
## Get the joined tables
|
||||
joined_tables = cls.selectinload()
|
||||
if with_related and len(joined_tables) > 0:
|
||||
query = query.options(*(joinedload(jt) for jt in joined_tables))
|
||||
df = await session.run_sync(method, query, **kwargs)
|
||||
## Chamge column names to reflect the joined tables
|
||||
## Leave the first columns unchanged, as their names come straight
|
||||
## from the model's fields
|
||||
joined_columns = list(df.columns[len(cls.model_fields):])
|
||||
renames: dict[str, str] = {}
|
||||
## Match colum names with the joined tables
|
||||
## Important: this assumes that orders of the joined tables
|
||||
## and their columns is preserved by pandas' read_sql
|
||||
for joined_table in joined_tables:
|
||||
target = joined_table.property.target # type: ignore
|
||||
target_name = target.name
|
||||
for col in target.columns:
|
||||
## Pop the column from the colujmn list and make a new name
|
||||
renames[joined_columns.pop(0)] = f'{target.schema}_{target_name}_{col.name}'
|
||||
df.rename(columns=renames, inplace=True)
|
||||
## Finally, set the index of the df as the index of cls
|
||||
df.set_index([c.name for c in cls.__table__.primary_key.columns], # type: ignore
|
||||
inplace=True)
|
||||
return df
|
||||
|
||||
|
||||
fastapi_db_session = Annotated[AsyncSession, Depends(get_db_session)]
|
|
@ -120,15 +120,16 @@ async def get_geojson(store_name,
|
|||
raise err
|
||||
except Exception as err:
|
||||
logger.exception(err)
|
||||
raise status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
raise err #status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
## The query of category defined models gets the status (not sure how and this could be skipped)
|
||||
## Other models do not have: just add it manually from the model itself
|
||||
if 'status' not in gdf.columns:
|
||||
gdf['status'] = model.status
|
||||
if 'popup' not in gdf.columns:
|
||||
gdf['popup'] = await model.get_popup(gdf)
|
||||
# Add properties
|
||||
properties = await model.get_properties(gdf)
|
||||
columns = ['geometry', 'status', 'popup']
|
||||
columns = ['geom', 'status', 'popup']
|
||||
for property, values in properties.items():
|
||||
columns.append(property)
|
||||
gdf[property] = values
|
||||
|
|
|
@ -285,7 +285,7 @@ class Gisaf:
|
|||
|
||||
## Drop existing
|
||||
if replace_all:
|
||||
engine.execute('DELETE FROM "{}"."{}"'.format(model.metadata.schema, model.__tablename__))
|
||||
engine.execute('DELETE FROM "{}"'.format(model.__table__.fullname))
|
||||
else:
|
||||
raise NotImplementedError('ipynb_tools.Gisaf.to_layer does not support updates yet')
|
||||
|
||||
|
|
|
@ -2,10 +2,10 @@ from typing import Any, ClassVar
|
|||
|
||||
from sqlalchemy import String
|
||||
from pydantic import computed_field, ConfigDict
|
||||
from sqlmodel import Field, Relationship, SQLModel, JSON, TEXT, select
|
||||
from sqlmodel import Field, Relationship, JSON, TEXT
|
||||
|
||||
from gisaf.models.metadata import gisaf_survey
|
||||
from gisaf.database import db_session, pandas_query
|
||||
from gisaf.database import BaseModel
|
||||
|
||||
mapbox_type_mapping = {
|
||||
'Point': 'symbol',
|
||||
|
@ -13,13 +13,6 @@ mapbox_type_mapping = {
|
|||
'Polygon': 'fill',
|
||||
}
|
||||
|
||||
class BaseModel(SQLModel):
|
||||
@classmethod
|
||||
async def get_df(cls):
|
||||
async with db_session() as session:
|
||||
query = select(cls)
|
||||
return await session.run_sync(pandas_query, query)
|
||||
|
||||
|
||||
class CategoryGroup(BaseModel, table=True):
|
||||
metadata = gisaf_survey
|
||||
|
@ -45,7 +38,7 @@ class CategoryModelType(BaseModel, table=True):
|
|||
|
||||
|
||||
class CategoryBase(BaseModel):
|
||||
model_config = ConfigDict(protected_namespaces=()) # type: ignore
|
||||
# model_config = ConfigDict(protected_namespaces=()) # type: ignore
|
||||
class Admin:
|
||||
menu = 'Other'
|
||||
flask_admin_model_view = 'CategoryModelView'
|
||||
|
@ -53,20 +46,20 @@ class CategoryBase(BaseModel):
|
|||
name: str | None = Field(default=None, primary_key=True)
|
||||
domain: ClassVar[str] = 'V'
|
||||
description: str | None
|
||||
group: str = Field(min_length=4, max_length=4,
|
||||
group: str = Field(sa_type=String(4),
|
||||
foreign_key="category_group.name", index=True)
|
||||
minor_group_1: str = Field(min_length=4, max_length=4, default='----')
|
||||
minor_group_2: str = Field(min_length=4, max_length=4, default='----')
|
||||
status: str = Field(min_length=1, max_length=1)
|
||||
minor_group_1: str = Field(sa_type=String(4), default='----')
|
||||
minor_group_2: str = Field(sa_type=String(4), default='----')
|
||||
status: str = Field(sa_type=String(1))
|
||||
custom: bool | None
|
||||
auto_import: bool = True
|
||||
gis_type: str = Field(max_length=50,
|
||||
gis_type: str = Field(sa_type=String(50),
|
||||
foreign_key='category_model_type.name',
|
||||
default='Point')
|
||||
long_name: str | None = Field(max_length=50)
|
||||
long_name: str | None = Field(sa_type=String(50))
|
||||
style: str | None = Field(sa_type=TEXT)
|
||||
symbol: str | None = Field(max_length=1)
|
||||
mapbox_type_custom: str | None = Field(max_length=32)
|
||||
symbol: str | None = Field(sa_type=String(1))
|
||||
mapbox_type_custom: str | None = Field(sa_type=String(12))
|
||||
mapbox_paint: dict[str, Any] | None = Field(sa_type=JSON(none_as_null=True))
|
||||
mapbox_layout: dict[str, Any] | None = Field(sa_type=JSON(none_as_null=True))
|
||||
viewable_role: str | None
|
||||
|
|
|
@ -12,7 +12,7 @@ import geopandas as gpd # type: ignore
|
|||
import shapely # type: ignore
|
||||
import pyproj
|
||||
|
||||
from sqlmodel import SQLModel, Field
|
||||
from sqlmodel import SQLModel, Field, Relationship
|
||||
from sqlmodel.ext.asyncio.session import AsyncSession
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
@ -83,9 +83,13 @@ class BaseSurveyModel(BaseModel):
|
|||
"""
|
||||
id: int | None = Field(sa_type=BigInteger, primary_key=True, default=None)
|
||||
equip_id: int = Field(foreign_key='equipment.id')
|
||||
# equipment: Equipment = Relationship()
|
||||
srvyr_id: int = Field('surveyor.id')
|
||||
# surveyor: Surveyor = Relationship()
|
||||
accur_id: int = Field('accuracy.id')
|
||||
# accuracy: Accuracy = Relationship()
|
||||
project_id: int = Field('project.id')
|
||||
# project: Project = Relationship()
|
||||
|
||||
orig_id: str
|
||||
date: date
|
||||
|
@ -140,7 +144,7 @@ class SurveyModel(BaseSurveyModel):
|
|||
Base mixin class for defining final (reprojected) survey data, with a status
|
||||
"""
|
||||
metadata: ClassVar[MetaData] = survey
|
||||
status: ClassVar[str] = Field(sa_type=String(1))
|
||||
# status: str = Field(sa_type=String(1))
|
||||
|
||||
get_gdf_with_related: ClassVar[bool] = False
|
||||
|
||||
|
@ -251,7 +255,7 @@ class SurveyModel(BaseSurveyModel):
|
|||
query = sql_query_for_geojson.format(
|
||||
model=cls,
|
||||
category=category,
|
||||
schema=cls.metadata.schema,
|
||||
schema=cls.__table__.schema,
|
||||
#table=cls.__tablename__,
|
||||
# FIXME: should be __tablename__, but see SQLModel.__tablename__ which use lower(__name__)
|
||||
table=cls.__name__,
|
||||
|
@ -282,12 +286,10 @@ class SurveyModel(BaseSurveyModel):
|
|||
]
|
||||
|
||||
|
||||
class GeoModel(Model):
|
||||
class GeoModelNoStatus(Model):
|
||||
"""
|
||||
Base class for all geo models
|
||||
"""
|
||||
#__abstract__ = True
|
||||
|
||||
id: int | None = Field(default=None, primary_key=True)
|
||||
|
||||
description: ClassVar[str] = ''
|
||||
|
@ -337,11 +339,6 @@ class GeoModel(Model):
|
|||
Style for the model, used in the map, etc
|
||||
"""
|
||||
|
||||
# status: ClassVar[str] = 'E'
|
||||
# """
|
||||
# Status (ISO layers definition) of the layer. E -> Existing.
|
||||
# """
|
||||
|
||||
_join_with: ClassVar[dict[str, Any]] = {
|
||||
}
|
||||
"""
|
||||
|
@ -474,7 +471,7 @@ class GeoModel(Model):
|
|||
shapely_geom = self.shapely_geom
|
||||
|
||||
if simplify_tolerance:
|
||||
shapely_geom = shapely_geom.simplify(simplify_tolerance / conf.geo['simplify_geom_factor'],
|
||||
shapely_geom = shapely_geom.simplify(simplify_tolerance / conf.geo.simplify_geom_factor,
|
||||
preserve_topology=False)
|
||||
if shapely_geom.is_empty:
|
||||
raise NoPoint
|
||||
|
@ -682,7 +679,7 @@ class GeoModel(Model):
|
|||
if not gpd.options.use_pygeos:
|
||||
logger.warn(f'Using get_geos_df for {cls} but gpd.options.use_pygeos not set')
|
||||
if not crs:
|
||||
crs = conf.crs['geojson']
|
||||
crs = conf.crs.geojson
|
||||
df = await cls.get_df(where=where, **kwargs)
|
||||
df.set_index('id', inplace=True)
|
||||
df.rename(columns={'geom': 'wkb'}, inplace=True)
|
||||
|
@ -694,78 +691,84 @@ class GeoModel(Model):
|
|||
|
||||
@classmethod
|
||||
async def get_geo_df(cls, where=None, crs=None, reproject=False,
|
||||
filter_columns=False, with_popup=False, **kwargs):
|
||||
filter_columns=False, with_popup=False, **kwargs) -> gpd.GeoDataFrame:
|
||||
"""
|
||||
Return a Pandas dataframe of all records
|
||||
Return a GeoPandas GeoDataFrame of all records
|
||||
:param where: where clause for the query (eg. Model.attr=='foo')
|
||||
:param crs: coordinate system (eg. 'epsg:4326') (priority over the reproject parameter)
|
||||
:param reproject: should reproject to conf.srid_for_proj
|
||||
:return:
|
||||
"""
|
||||
df = await cls.get_df(where=where, **kwargs)
|
||||
df.dropna(subset=['geom'], inplace=True)
|
||||
df.set_index('id', inplace=True)
|
||||
df.sort_index(inplace=True)
|
||||
df_clean = df[df.geom != None]
|
||||
## Drop coordinates
|
||||
df_clean.drop(columns=set(df_clean.columns).intersection(['ST_X_1', 'ST_Y_1', 'ST_Z_1']),
|
||||
inplace=True)
|
||||
if not crs:
|
||||
crs = conf.crs['geojson']
|
||||
return await cls.get_gdf(where=where, **kwargs)
|
||||
|
||||
## XXX: is it right? Waiting for application.py to remove pygeos support to know more.
|
||||
if getattr(gpd.options, 'use_pygeos', False):
|
||||
geometry = shapely.from_wkb(df_clean.geom)
|
||||
else:
|
||||
geometry = [wkb.loads(geom) for geom in df_clean.geom]
|
||||
# df = await cls.get_df(where=where, **kwargs)
|
||||
# df.dropna(subset=['geom'], inplace=True)
|
||||
# df.set_index('id', inplace=True)
|
||||
# df.sort_index(inplace=True)
|
||||
# df_clean = df[~df.geom.isna()]
|
||||
# ## Drop coordinates
|
||||
# df_clean.drop(columns=set(df_clean.columns).intersection(['ST_X_1', 'ST_Y_1', 'ST_Z_1']),
|
||||
# inplace=True)
|
||||
# if not crs:
|
||||
# crs = conf.crs.geojson
|
||||
|
||||
gdf = gpd.GeoDataFrame(
|
||||
df_clean.drop('geom', axis=1),
|
||||
crs=crs,
|
||||
geometry=geometry
|
||||
)
|
||||
# ## XXX: is it right? Waiting for application.py to remove pygeos support to know more.
|
||||
# # if getattr(gpd.options, 'use_pygeos', False):
|
||||
# # geometry = shapely.from_wkb(df_clean.geom)
|
||||
# # else:
|
||||
# # geometry = [wkb.loads(geom) for geom in df_clean.geom]
|
||||
|
||||
if hasattr(cls, 'simplify') and cls.simplify:
|
||||
#shapely_geom = shapely_geom.simplify(simplify_tolerance / conf.geo['simplify_geom_factor'],
|
||||
# gdf = gpd.GeoDataFrame(
|
||||
# df_clean.drop('geom', axis=1),
|
||||
# crs=crs,
|
||||
# geometry=geometry
|
||||
# )
|
||||
|
||||
# if hasattr(cls, 'simplify') and cls.simplify:
|
||||
# #shapely_geom = shapely_geom.simplify(simplify_tolerance / conf.geo.simplify_geom_factor,
|
||||
# #preserve_topology=False)
|
||||
# gdf['geometry'] = gdf['geometry'].simplify(
|
||||
# float(cls.simplify) / conf.geo.simplify_geom_factor,
|
||||
# preserve_topology=False)
|
||||
gdf['geometry'] = gdf['geometry'].simplify(
|
||||
float(cls.simplify) / conf.geo['simplify_geom_factor'],
|
||||
preserve_topology=False)
|
||||
|
||||
if reproject:
|
||||
gdf.to_crs(crs=conf.crs['for_proj'], inplace=True)
|
||||
# if reproject:
|
||||
# gdf.to_crs(crs=conf.crs.for_proj, inplace=True)
|
||||
|
||||
## Filter out columns
|
||||
if filter_columns:
|
||||
gdf.drop(columns=set(gdf.columns).intersection(cls.filtered_columns_on_map),
|
||||
inplace=True)
|
||||
# ## Filter out columns
|
||||
# if filter_columns:
|
||||
# gdf.drop(columns=set(gdf.columns).intersection(cls.filtered_columns_on_map),
|
||||
# inplace=True)
|
||||
|
||||
if with_popup:
|
||||
gdf['popup'] = await cls.get_popup(gdf)
|
||||
# if with_popup:
|
||||
# gdf['popup'] = await cls.get_popup(gdf)
|
||||
|
||||
return gdf
|
||||
# return gdf
|
||||
|
||||
@classmethod
|
||||
def get_attachment_dir(cls):
|
||||
return f'{cls.__table__.schema}.{cls.__table__.name}'
|
||||
return cls.__table__.fullname
|
||||
|
||||
@classmethod
|
||||
def get_attachment_base_dir(cls):
|
||||
return Path(conf.attachments['base_dir'])/cls.get_attachment_dir()
|
||||
return Path(conf.attachments.base_dir) / cls.get_attachment_dir()
|
||||
|
||||
class GeoModel(GeoModelNoStatus):
|
||||
status: ClassVar[str] = 'E'
|
||||
"""
|
||||
Status (ISO layers definition) of the layer. E -> Existing.
|
||||
"""
|
||||
|
||||
class LiveGeoModel(GeoModel):
|
||||
status: ClassVar[str] = 'E'
|
||||
store: ClassVar[str]
|
||||
group: ClassVar[str] ='Live'
|
||||
custom: ClassVar[bool] = True
|
||||
is_live: ClassVar[bool] = True
|
||||
is_db: ClassVar[bool] = False
|
||||
|
||||
class Geom(str):
|
||||
pass
|
||||
# class Geom(str):
|
||||
# pass
|
||||
|
||||
class GeoPointModel(GeoModel):
|
||||
#__abstract__ = True
|
||||
class GeoPointModelNoStatus(GeoModelNoStatus):
|
||||
shapefile_model: ClassVar[int] = POINT
|
||||
## geometry typing, see https://stackoverflow.com/questions/77333100/geoalchemy2-geometry-schema-for-pydantic-fastapi
|
||||
geom: Annotated[str, WKBElement] = Field(sa_type=Geometry('POINT', srid=conf.geo.srid))
|
||||
|
@ -827,9 +830,11 @@ class GeoPointModel(GeoModel):
|
|||
info['latitude'] = '{:.6f}'.format(self.shapely_geom.y)
|
||||
return info
|
||||
|
||||
class GeoPointModel(GeoPointModelNoStatus, GeoModel):
|
||||
...
|
||||
|
||||
|
||||
class GeoPointZModel(GeoPointModel):
|
||||
#__abstract__ = True
|
||||
geom: Annotated[str, WKBElement] = Field(sa_type=Geometry('POINTZ', dimension=3, srid=conf.geo.srid))
|
||||
shapefile_model: ClassVar[int] = POINTZ
|
||||
|
||||
|
@ -843,13 +848,11 @@ class GeoPointZModel(GeoPointModel):
|
|||
|
||||
|
||||
class GeoPointMModel(GeoPointZModel):
|
||||
#__abstract__ = True
|
||||
shapefile_model: ClassVar[int] = POINTZ
|
||||
geom: Annotated[str, WKBElement] = Field(sa_type=Geometry('POINTZ', dimension=3, srid=conf.geo.srid))
|
||||
|
||||
|
||||
class GeoLineModel(GeoModel):
|
||||
#__abstract__ = True
|
||||
shapefile_model: ClassVar[int] = POLYLINE
|
||||
geom: Annotated[str, WKBElement] = Field(sa_type=Geometry('LINESTRING', srid=conf.geo.srid))
|
||||
mapbox_type: ClassVar[str] = 'line'
|
||||
|
@ -913,7 +916,6 @@ class GeoLineModel(GeoModel):
|
|||
|
||||
|
||||
class GeoLineModelZ(GeoLineModel):
|
||||
#__abstract__ = True
|
||||
shapefile_model: ClassVar[int] = POLYLINEZ
|
||||
geom: Annotated[str, WKBElement] = Field(sa_type=Geometry('LINESTRINGZ', dimension=3, srid=conf.geo.srid))
|
||||
|
||||
|
@ -930,7 +932,6 @@ class GeoLineModelZ(GeoLineModel):
|
|||
|
||||
|
||||
class GeoPolygonModel(GeoModel):
|
||||
#__abstract__ = True
|
||||
shapefile_model: ClassVar[int] = POLYGON
|
||||
geom: Annotated[str, WKBElement] = Field(sa_type=Geometry('POLYGON', srid=conf.geo.srid))
|
||||
mapbox_type: ClassVar[str] = 'fill'
|
||||
|
@ -1002,7 +1003,6 @@ class GeoPolygonModel(GeoModel):
|
|||
|
||||
|
||||
class GeoPolygonModelZ(GeoPolygonModel):
|
||||
#__abstract__ = True
|
||||
shapefile_model: ClassVar[int] = POLYGONZ
|
||||
geom: Annotated[str, WKBElement] = Field(sa_type=Geometry('POLYGONZ', dimension=3, srid=conf.geo.srid))
|
||||
|
||||
|
@ -1023,17 +1023,13 @@ class GeoPolygonModelZ(GeoPolygonModel):
|
|||
|
||||
|
||||
class GeoPointSurveyModel(SurveyModel, GeoPointMModel):
|
||||
#__abstract__ = True
|
||||
|
||||
## raw_model is set in category_models_maker.make_category_models
|
||||
raw_model: ClassVar['RawSurveyBaseModel'] = None
|
||||
raw_model: ClassVar['RawSurveyBaseModel']
|
||||
|
||||
|
||||
class LineWorkSurveyModel(SurveyModel):
|
||||
#__abstract__ = True
|
||||
|
||||
## raw_model is set in category_models_maker.make_category_models
|
||||
raw_model: ClassVar['RawSurveyBaseModel'] = None
|
||||
raw_model: ClassVar['RawSurveyBaseModel']
|
||||
|
||||
def match_raw_points(self):
|
||||
reprojected_geom = transform(reproject_func, self.shapely_geom)
|
||||
|
@ -1046,20 +1042,17 @@ class LineWorkSurveyModel(SurveyModel):
|
|||
|
||||
|
||||
class GeoLineSurveyModel(LineWorkSurveyModel, GeoLineModelZ):
|
||||
#__abstract__ = True
|
||||
pass
|
||||
|
||||
|
||||
class GeoPolygonSurveyModel(LineWorkSurveyModel, GeoPolygonModelZ):
|
||||
#__abstract__ = True
|
||||
pass
|
||||
|
||||
|
||||
class RawSurveyBaseModel(BaseSurveyModel, GeoPointMModel):
|
||||
class RawSurveyBaseModel(BaseSurveyModel, GeoPointModelNoStatus):
|
||||
"""
|
||||
Abstract base class for category based raw survey point models
|
||||
"""
|
||||
#__abstract__ = True
|
||||
metadata: ClassVar[MetaData] = raw_survey
|
||||
geom: Annotated[str, WKBElement] = Field(sa_type=Geometry('POINTZ', dimension=3,
|
||||
srid=conf.geo.raw_survey.srid))
|
||||
|
@ -1070,7 +1063,7 @@ class RawSurveyBaseModel(BaseSurveyModel, GeoPointMModel):
|
|||
|
||||
@classmethod
|
||||
async def get_geo_df(cls, *args, **kwargs):
|
||||
return await super().get_geo_df(crs=conf.raw_survey['spatial_sys_ref'],
|
||||
return await super().get_geo_df(crs=conf.raw_survey.spatial_sys_ref,
|
||||
*args, **kwargs)
|
||||
|
||||
|
||||
|
@ -1086,8 +1079,6 @@ class PlottableModel(Model):
|
|||
to be used (the first one being the default)
|
||||
* OR an ordereed dict of value => resampling method
|
||||
"""
|
||||
#__abstract__ = True
|
||||
|
||||
float_format: ClassVar[str] = '%.1f'
|
||||
values: ClassVar[list[dict[str, str]]] = []
|
||||
|
||||
|
@ -1117,8 +1108,6 @@ class PlottableModel(Model):
|
|||
|
||||
|
||||
class TimePlottableModel(PlottableModel):
|
||||
#__abstract__ = True
|
||||
|
||||
time: datetime
|
||||
|
||||
@classmethod
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
from typing import Any
|
||||
from typing import Any, Dict, Tuple, Type, ClassVar
|
||||
import logging
|
||||
|
||||
from sqlmodel import Field, SQLModel, MetaData, JSON, TEXT, Relationship, Column
|
||||
from sqlmodel import Field, SQLModel, MetaData, JSON, TEXT, Relationship, Column, select
|
||||
from pydantic import computed_field
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
@ -10,6 +10,8 @@ import shapely
|
|||
from sqlalchemy.sql import sqltypes
|
||||
from geoalchemy2.types import Geometry
|
||||
|
||||
from gisaf.database import BaseModel
|
||||
|
||||
pandas_cast_map = {
|
||||
sqltypes.Integer: 'Int64',
|
||||
sqltypes.Float: 'float64',
|
||||
|
@ -17,102 +19,118 @@ pandas_cast_map = {
|
|||
|
||||
logger = logging.getLogger('model_base_base')
|
||||
|
||||
class Model(SQLModel):
|
||||
class Model(BaseModel):
|
||||
"""
|
||||
Base mixin class for models that can be converted to a Pandas dataframe with get_df
|
||||
"""
|
||||
# status: ClassVar[str] = 'E'
|
||||
|
||||
def __new__(cls, *args, **kwargs):
|
||||
if not hasattr(cls, 'query'):
|
||||
cls.query = select(cls)
|
||||
return super().__new__(cls, *args, **kwargs)
|
||||
|
||||
class Meta:
|
||||
filtered_columns_on_map: list[str] = []
|
||||
|
||||
@classmethod
|
||||
def get_store_name(cls):
|
||||
return "{}.{}".format(cls.metadata.schema, cls.__tablename__)
|
||||
if hasattr(cls, '__table__'):
|
||||
return cls.__table__.fullname
|
||||
elif hasattr(cls, '__table_args__') and 'schema' in cls.__table_args__:
|
||||
return f"{cls.__table_args__.schema}.{cls.__tablename__}"
|
||||
else:
|
||||
return f'{cls.metadata.schema}.{cls.__tablename__}'
|
||||
|
||||
@classmethod
|
||||
def get_table_name_prefix(cls):
|
||||
return "{}_{}".format(cls.metadata.schema, cls.__tablename__)
|
||||
|
||||
@classmethod
|
||||
async def get_df(cls, where=None,
|
||||
with_related=None, recursive=True,
|
||||
cast=True,
|
||||
with_only_columns=None,
|
||||
geom_as_ewkt=False,
|
||||
**kwargs):
|
||||
"""
|
||||
Return a Pandas dataframe of all records
|
||||
Optional arguments:
|
||||
* an SQLAlchemy where clause
|
||||
* with_related: automatically get data from related columns, following the foreign keys in the model definitions
|
||||
* cast: automatically transform various data in their best python types (eg. with date, time...)
|
||||
* with_only_columns: fetch only these columns (list of column names)
|
||||
* geom_as_ewkt: convert geometry columns to EWKB (handy for eg. using upsert_df)
|
||||
:return:
|
||||
"""
|
||||
query = cls.query
|
||||
# @classmethod
|
||||
# async def get_df(cls, where=None,
|
||||
# with_related=None, recursive=True,
|
||||
# cast=True,
|
||||
# with_only_columns=None,
|
||||
# geom_as_ewkt=False,
|
||||
# **kwargs):
|
||||
# """
|
||||
# Return a Pandas dataframe of all records
|
||||
# Optional arguments:
|
||||
# * an SQLAlchemy where clause
|
||||
# * with_related: automatically get data from related columns, following the foreign keys in the model definitions
|
||||
# * cast: automatically transform various data in their best python types (eg. with date, time...)
|
||||
# * with_only_columns: fetch only these columns (list of column names)
|
||||
# * geom_as_ewkt: convert geometry columns to EWKB (handy for eg. using upsert_df)
|
||||
# :return:
|
||||
# """
|
||||
# breakpoint()
|
||||
# if hasattr(cls, 'query'):
|
||||
# query = cls.query
|
||||
# else:
|
||||
# query = select(cls)
|
||||
|
||||
if with_related is not False:
|
||||
if with_related or getattr(cls, 'get_gdf_with_related', False):
|
||||
joins = get_join_with(cls, recursive)
|
||||
model_loader = cls.load(**joins)
|
||||
query = _get_query_with_table_names(model_loader)
|
||||
# # if with_related is not False:
|
||||
# # if with_related or getattr(cls, 'get_gdf_with_related', False):
|
||||
# # joins = get_join_with(cls, recursive)
|
||||
# # model_loader = cls.load(**joins)
|
||||
# # query = _get_query_with_table_names(model_loader)
|
||||
|
||||
if where is not None:
|
||||
query.append_whereclause(where)
|
||||
# if where is not None:
|
||||
# query.append_whereclause(where)
|
||||
|
||||
if with_only_columns:
|
||||
query = query.with_only_columns([getattr(cls, colname) for colname in with_only_columns])
|
||||
# if with_only_columns:
|
||||
# query = query.with_only_columns([getattr(cls, colname) for colname in with_only_columns])
|
||||
|
||||
## Got idea from https://github.com/MagicStack/asyncpg/issues/173.
|
||||
async with query.bind.raw_pool.acquire() as conn:
|
||||
## Convert hstore fields to dict
|
||||
await conn.set_builtin_type_codec('hstore', codec_name='pg_contrib.hstore')
|
||||
# ## Got idea from https://github.com/MagicStack/asyncpg/issues/173.
|
||||
# breakpoint()
|
||||
# async with query.bind.raw_pool.acquire() as conn:
|
||||
# ## Convert hstore fields to dict
|
||||
# await conn.set_builtin_type_codec('hstore', codec_name='pg_contrib.hstore')
|
||||
|
||||
compiled = query.compile()
|
||||
stmt = await conn.prepare(compiled.string)
|
||||
columns = [a.name for a in stmt.get_attributes()]
|
||||
data = await stmt.fetch(*[compiled.params.get(param) for param in compiled.positiontup])
|
||||
df = pd.DataFrame(data, columns=columns)
|
||||
# compiled = query.compile()
|
||||
# stmt = await conn.prepare(compiled.string)
|
||||
# columns = [a.name for a in stmt.get_attributes()]
|
||||
# data = await stmt.fetch(*[compiled.params.get(param) for param in compiled.positiontup])
|
||||
# df = pd.DataFrame(data, columns=columns)
|
||||
|
||||
## Convert primary key columns to Int64:
|
||||
## allows NaN, fixing type convertion to float with merge
|
||||
for pk in [c.name for c in cls.__table__.primary_key.columns]:
|
||||
if pk in df.columns and df[pk].dtype=='int64':
|
||||
df[pk] = df[pk].astype('Int64')
|
||||
# ## Convert primary key columns to Int64:
|
||||
# ## allows NaN, fixing type convertion to float with merge
|
||||
# for pk in [c.name for c in cls.__table__.primary_key.columns]:
|
||||
# if pk in df.columns and df[pk].dtype=='int64':
|
||||
# df[pk] = df[pk].astype('Int64')
|
||||
|
||||
if cast:
|
||||
## Cast the type for known types (datetime, ...)
|
||||
for column_name in df.columns:
|
||||
col = getattr(query.columns, column_name, None)
|
||||
if col is None:
|
||||
logger.debug(f'Cannot get column {column_name} in query for model {cls.__name__}')
|
||||
continue
|
||||
column_type = getattr(query.columns, column_name).type
|
||||
## XXX: Needs refinment, eg. nullable -> Int64 ...
|
||||
if column_type.__class__ in pandas_cast_map:
|
||||
df[column_name] = df[column_name].astype(pandas_cast_map[column_type.__class__])
|
||||
elif isinstance(column_type, (sqltypes.Date, sqltypes.DateTime)):
|
||||
## Dates, times
|
||||
df[column_name] = pd.to_datetime(df[column_name])
|
||||
#elif isinstance(column_type, (sqltypes.Integer, sqltypes.Float)):
|
||||
# ## Numeric
|
||||
# df[column_name] = pd.to_numeric(df[column_name], errors='coerce')
|
||||
## XXX: keeping this note about that is about "char" SQL type, but the fix of #9694 makes it unnessary
|
||||
#elif isinstance(column_type, sqltypes.CHAR) or (isinstance(column_type, sqltypes.String) and column_type.length == 1):
|
||||
# ## Workaround for bytes being used for string of length 1 (not sure - why???)
|
||||
# df[column_name] = df[column_name].str.decode('utf-8')
|
||||
# if cast:
|
||||
# ## Cast the type for known types (datetime, ...)
|
||||
# for column_name in df.columns:
|
||||
# col = getattr(query.columns, column_name, None)
|
||||
# if col is None:
|
||||
# logger.debug(f'Cannot get column {column_name} in query for model {cls.__name__}')
|
||||
# continue
|
||||
# column_type = getattr(query.columns, column_name).type
|
||||
# ## XXX: Needs refinment, eg. nullable -> Int64 ...
|
||||
# if column_type.__class__ in pandas_cast_map:
|
||||
# df[column_name] = df[column_name].astype(pandas_cast_map[column_type.__class__])
|
||||
# elif isinstance(column_type, (sqltypes.Date, sqltypes.DateTime)):
|
||||
# ## Dates, times
|
||||
# df[column_name] = pd.to_datetime(df[column_name])
|
||||
# #elif isinstance(column_type, (sqltypes.Integer, sqltypes.Float)):
|
||||
# # ## Numeric
|
||||
# # df[column_name] = pd.to_numeric(df[column_name], errors='coerce')
|
||||
# ## XXX: keeping this note about that is about "char" SQL type, but the fix of #9694 makes it unnessary
|
||||
# #elif isinstance(column_type, sqltypes.CHAR) or (isinstance(column_type, sqltypes.String) and column_type.length == 1):
|
||||
# # ## Workaround for bytes being used for string of length 1 (not sure - why???)
|
||||
# # df[column_name] = df[column_name].str.decode('utf-8')
|
||||
|
||||
## Rename the columns, removing the schema_table prefix for the columns in that model
|
||||
prefix = cls.get_table_name_prefix()
|
||||
prefix_length = len(prefix) + 1
|
||||
rename_map = {colname: colname[prefix_length:] for colname in df.columns if colname.startswith(prefix)}
|
||||
df.rename(columns=rename_map, inplace=True)
|
||||
# ## Rename the columns, removing the schema_table prefix for the columns in that model
|
||||
# prefix = cls.get_table_name_prefix()
|
||||
# prefix_length = len(prefix) + 1
|
||||
# rename_map = {colname: colname[prefix_length:] for colname in df.columns if colname.startswith(prefix)}
|
||||
# df.rename(columns=rename_map, inplace=True)
|
||||
|
||||
## Eventually convert geometry columns to EWKB
|
||||
if geom_as_ewkt:
|
||||
geometry_columns = [col.name for col in cls.__table__.columns if isinstance(col.type, Geometry)]
|
||||
for column in geometry_columns:
|
||||
df[column] = shapely.to_wkb(shapely.from_wkb(df.geom), hex=True, include_srid=True)
|
||||
# ## Eventually convert geometry columns to EWKB
|
||||
# if geom_as_ewkt:
|
||||
# geometry_columns = [col.name for col in cls.__table__.columns if isinstance(col.type, Geometry)]
|
||||
# for column in geometry_columns:
|
||||
# df[column] = shapely.to_wkb(shapely.from_wkb(df.geom), hex=True, include_srid=True)
|
||||
|
||||
return df
|
||||
# return df
|
||||
|
|
|
@ -395,12 +395,11 @@ class Store:
|
|||
for store_name in missing_triger_tables:
|
||||
## XXX: TODO: See https://stackoverflow.com/questions/7888846/trigger-in-sqlachemy
|
||||
model = registry.geom[store_name]
|
||||
|
||||
try:
|
||||
await session.exec(text(
|
||||
ttag_create_trigger.format(
|
||||
schema=model.metadata.schema,
|
||||
table=model.__tablename__)
|
||||
schema=model.__table__.schema,
|
||||
table=model.__table__.name)
|
||||
))
|
||||
except UndefinedTableError:
|
||||
logger.warning(f'table {store_name} does not exist in '
|
||||
|
|
|
@ -108,8 +108,8 @@ class ModelRegistry:
|
|||
:return:
|
||||
"""
|
||||
logger.debug('make_registry')
|
||||
await self.make_category_models()
|
||||
self.scan()
|
||||
await self.make_category_models()
|
||||
await self.build()
|
||||
## If ogcapi is in app (i.e. not with scheduler):
|
||||
## Now that the models are refreshed, tells the ogcapi to (re)build
|
||||
|
@ -368,7 +368,7 @@ class ModelRegistry:
|
|||
row.model.__name__,
|
||||
row.model.__name__,
|
||||
row.model.description,
|
||||
row.model.metadata.schema,
|
||||
row.model.__table__.schema,
|
||||
row.model.base_gis_type,
|
||||
row.model.style,
|
||||
row.model.symbol,
|
||||
|
@ -396,7 +396,6 @@ class ModelRegistry:
|
|||
self.categories['store'] = self.categories.apply(get_store_name, axis=1)
|
||||
|
||||
self.categories['count'] = pd.Series(dtype=pd.Int64Dtype())
|
||||
self.categories.set_index('name', inplace=True)
|
||||
|
||||
df_models = pd.DataFrame(self.geom.items(),
|
||||
columns=['store', 'model']
|
||||
|
@ -552,24 +551,20 @@ class ModelRegistry:
|
|||
self.primary_groups['title'] = self.primary_groups['long_name']
|
||||
|
||||
## Add Misc and Live
|
||||
self.primary_groups.loc[-1] = (
|
||||
'Misc',
|
||||
self.primary_groups.loc['Misc'] = (
|
||||
False,
|
||||
'Misc and old layers (not coming from our survey; they will be organized, '
|
||||
'eventually as the surveys get more complete)',
|
||||
'Misc',
|
||||
)
|
||||
self.primary_groups.index = self.primary_groups.index + 1
|
||||
|
||||
self.primary_groups.loc[len(self.primary_groups)] = (
|
||||
'Live',
|
||||
self.primary_groups.loc['Live'] = (
|
||||
False,
|
||||
'Layers from data processing, sensors, etc, and are updated automatically',
|
||||
'Live',
|
||||
)
|
||||
|
||||
self.primary_groups.loc[len(self.primary_groups)] = (
|
||||
'Community',
|
||||
self.primary_groups.loc['Community'] = (
|
||||
False,
|
||||
'Layers from community',
|
||||
'Community',
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue