Migrate joins to sqlalchemy's query options

Use native pandas read_sql_query and geopandas from_postgis
Fix definiiton of status in models
Fix table names
Fix category fields
This commit is contained in:
phil 2024-01-02 00:09:08 +05:30
parent 956147aea8
commit 75bedb3e91
8 changed files with 236 additions and 190 deletions

View file

@ -1,11 +1,14 @@
from contextlib import asynccontextmanager
from typing import Annotated
from typing import Annotated, Literal, Any
from collections.abc import AsyncGenerator
from sqlalchemy.ext.asyncio import create_async_engine
from sqlalchemy.orm import joinedload, QueryableAttribute, InstrumentedAttribute
from sqlmodel import SQLModel, select
from sqlmodel.ext.asyncio.session import AsyncSession
from fastapi import Depends
import pandas as pd
import geopandas as gpd
from gisaf.config import conf
@ -28,4 +31,52 @@ async def db_session() -> AsyncGenerator[AsyncSession]:
def pandas_query(session, query):
return pd.read_sql_query(query, session.connection())
def geopandas_query(session, query, *, crs=None, cast=True):
return gpd.GeoDataFrame.from_postgis(query, session.connection(), crs=crs)
class BaseModel(SQLModel):
@classmethod
def selectinload(cls) -> list[Literal['*'] | QueryableAttribute[Any]]:
return []
@classmethod
async def get_df(cls, where=None, with_related=True, **kwargs) -> pd.DataFrame:
return await cls._get_df(pandas_query, where=None, with_related=True, **kwargs)
@classmethod
async def get_gdf(cls, *, where=None, with_related=True, **kwargs) -> gpd.GeoDataFrame:
return await cls._get_df(geopandas_query, where=None, with_related=True, **kwargs)
@classmethod
async def _get_df(cls, method, *, where=None, with_related=True, **kwargs) -> pd.DataFrame | gpd.GeoDataFrame:
async with db_session() as session:
query = select(cls)
if where is not None:
query.append_whereclause(where)
## Get the joined tables
joined_tables = cls.selectinload()
if with_related and len(joined_tables) > 0:
query = query.options(*(joinedload(jt) for jt in joined_tables))
df = await session.run_sync(method, query, **kwargs)
## Chamge column names to reflect the joined tables
## Leave the first columns unchanged, as their names come straight
## from the model's fields
joined_columns = list(df.columns[len(cls.model_fields):])
renames: dict[str, str] = {}
## Match colum names with the joined tables
## Important: this assumes that orders of the joined tables
## and their columns is preserved by pandas' read_sql
for joined_table in joined_tables:
target = joined_table.property.target # type: ignore
target_name = target.name
for col in target.columns:
## Pop the column from the colujmn list and make a new name
renames[joined_columns.pop(0)] = f'{target.schema}_{target_name}_{col.name}'
df.rename(columns=renames, inplace=True)
## Finally, set the index of the df as the index of cls
df.set_index([c.name for c in cls.__table__.primary_key.columns], # type: ignore
inplace=True)
return df
fastapi_db_session = Annotated[AsyncSession, Depends(get_db_session)]

View file

@ -120,15 +120,16 @@ async def get_geojson(store_name,
raise err
except Exception as err:
logger.exception(err)
raise status.HTTP_500_INTERNAL_SERVER_ERROR
raise err #status.HTTP_500_INTERNAL_SERVER_ERROR
## The query of category defined models gets the status (not sure how and this could be skipped)
## Other models do not have: just add it manually from the model itself
if 'status' not in gdf.columns:
gdf['status'] = model.status
if 'popup' not in gdf.columns:
gdf['popup'] = await model.get_popup(gdf)
# Add properties
properties = await model.get_properties(gdf)
columns = ['geometry', 'status', 'popup']
columns = ['geom', 'status', 'popup']
for property, values in properties.items():
columns.append(property)
gdf[property] = values

View file

@ -285,7 +285,7 @@ class Gisaf:
## Drop existing
if replace_all:
engine.execute('DELETE FROM "{}"."{}"'.format(model.metadata.schema, model.__tablename__))
engine.execute('DELETE FROM "{}"'.format(model.__table__.fullname))
else:
raise NotImplementedError('ipynb_tools.Gisaf.to_layer does not support updates yet')

View file

@ -2,10 +2,10 @@ from typing import Any, ClassVar
from sqlalchemy import String
from pydantic import computed_field, ConfigDict
from sqlmodel import Field, Relationship, SQLModel, JSON, TEXT, select
from sqlmodel import Field, Relationship, JSON, TEXT
from gisaf.models.metadata import gisaf_survey
from gisaf.database import db_session, pandas_query
from gisaf.database import BaseModel
mapbox_type_mapping = {
'Point': 'symbol',
@ -13,13 +13,6 @@ mapbox_type_mapping = {
'Polygon': 'fill',
}
class BaseModel(SQLModel):
@classmethod
async def get_df(cls):
async with db_session() as session:
query = select(cls)
return await session.run_sync(pandas_query, query)
class CategoryGroup(BaseModel, table=True):
metadata = gisaf_survey
@ -45,7 +38,7 @@ class CategoryModelType(BaseModel, table=True):
class CategoryBase(BaseModel):
model_config = ConfigDict(protected_namespaces=()) # type: ignore
# model_config = ConfigDict(protected_namespaces=()) # type: ignore
class Admin:
menu = 'Other'
flask_admin_model_view = 'CategoryModelView'
@ -53,20 +46,20 @@ class CategoryBase(BaseModel):
name: str | None = Field(default=None, primary_key=True)
domain: ClassVar[str] = 'V'
description: str | None
group: str = Field(min_length=4, max_length=4,
group: str = Field(sa_type=String(4),
foreign_key="category_group.name", index=True)
minor_group_1: str = Field(min_length=4, max_length=4, default='----')
minor_group_2: str = Field(min_length=4, max_length=4, default='----')
status: str = Field(min_length=1, max_length=1)
minor_group_1: str = Field(sa_type=String(4), default='----')
minor_group_2: str = Field(sa_type=String(4), default='----')
status: str = Field(sa_type=String(1))
custom: bool | None
auto_import: bool = True
gis_type: str = Field(max_length=50,
gis_type: str = Field(sa_type=String(50),
foreign_key='category_model_type.name',
default='Point')
long_name: str | None = Field(max_length=50)
long_name: str | None = Field(sa_type=String(50))
style: str | None = Field(sa_type=TEXT)
symbol: str | None = Field(max_length=1)
mapbox_type_custom: str | None = Field(max_length=32)
symbol: str | None = Field(sa_type=String(1))
mapbox_type_custom: str | None = Field(sa_type=String(12))
mapbox_paint: dict[str, Any] | None = Field(sa_type=JSON(none_as_null=True))
mapbox_layout: dict[str, Any] | None = Field(sa_type=JSON(none_as_null=True))
viewable_role: str | None

View file

@ -12,7 +12,7 @@ import geopandas as gpd # type: ignore
import shapely # type: ignore
import pyproj
from sqlmodel import SQLModel, Field
from sqlmodel import SQLModel, Field, Relationship
from sqlmodel.ext.asyncio.session import AsyncSession
from pydantic import BaseModel
@ -83,9 +83,13 @@ class BaseSurveyModel(BaseModel):
"""
id: int | None = Field(sa_type=BigInteger, primary_key=True, default=None)
equip_id: int = Field(foreign_key='equipment.id')
# equipment: Equipment = Relationship()
srvyr_id: int = Field('surveyor.id')
# surveyor: Surveyor = Relationship()
accur_id: int = Field('accuracy.id')
# accuracy: Accuracy = Relationship()
project_id: int = Field('project.id')
# project: Project = Relationship()
orig_id: str
date: date
@ -140,7 +144,7 @@ class SurveyModel(BaseSurveyModel):
Base mixin class for defining final (reprojected) survey data, with a status
"""
metadata: ClassVar[MetaData] = survey
status: ClassVar[str] = Field(sa_type=String(1))
# status: str = Field(sa_type=String(1))
get_gdf_with_related: ClassVar[bool] = False
@ -251,7 +255,7 @@ class SurveyModel(BaseSurveyModel):
query = sql_query_for_geojson.format(
model=cls,
category=category,
schema=cls.metadata.schema,
schema=cls.__table__.schema,
#table=cls.__tablename__,
# FIXME: should be __tablename__, but see SQLModel.__tablename__ which use lower(__name__)
table=cls.__name__,
@ -282,12 +286,10 @@ class SurveyModel(BaseSurveyModel):
]
class GeoModel(Model):
class GeoModelNoStatus(Model):
"""
Base class for all geo models
"""
#__abstract__ = True
id: int | None = Field(default=None, primary_key=True)
description: ClassVar[str] = ''
@ -337,11 +339,6 @@ class GeoModel(Model):
Style for the model, used in the map, etc
"""
# status: ClassVar[str] = 'E'
# """
# Status (ISO layers definition) of the layer. E -> Existing.
# """
_join_with: ClassVar[dict[str, Any]] = {
}
"""
@ -474,7 +471,7 @@ class GeoModel(Model):
shapely_geom = self.shapely_geom
if simplify_tolerance:
shapely_geom = shapely_geom.simplify(simplify_tolerance / conf.geo['simplify_geom_factor'],
shapely_geom = shapely_geom.simplify(simplify_tolerance / conf.geo.simplify_geom_factor,
preserve_topology=False)
if shapely_geom.is_empty:
raise NoPoint
@ -682,7 +679,7 @@ class GeoModel(Model):
if not gpd.options.use_pygeos:
logger.warn(f'Using get_geos_df for {cls} but gpd.options.use_pygeos not set')
if not crs:
crs = conf.crs['geojson']
crs = conf.crs.geojson
df = await cls.get_df(where=where, **kwargs)
df.set_index('id', inplace=True)
df.rename(columns={'geom': 'wkb'}, inplace=True)
@ -694,78 +691,84 @@ class GeoModel(Model):
@classmethod
async def get_geo_df(cls, where=None, crs=None, reproject=False,
filter_columns=False, with_popup=False, **kwargs):
filter_columns=False, with_popup=False, **kwargs) -> gpd.GeoDataFrame:
"""
Return a Pandas dataframe of all records
Return a GeoPandas GeoDataFrame of all records
:param where: where clause for the query (eg. Model.attr=='foo')
:param crs: coordinate system (eg. 'epsg:4326') (priority over the reproject parameter)
:param reproject: should reproject to conf.srid_for_proj
:return:
"""
df = await cls.get_df(where=where, **kwargs)
df.dropna(subset=['geom'], inplace=True)
df.set_index('id', inplace=True)
df.sort_index(inplace=True)
df_clean = df[df.geom != None]
## Drop coordinates
df_clean.drop(columns=set(df_clean.columns).intersection(['ST_X_1', 'ST_Y_1', 'ST_Z_1']),
inplace=True)
if not crs:
crs = conf.crs['geojson']
return await cls.get_gdf(where=where, **kwargs)
## XXX: is it right? Waiting for application.py to remove pygeos support to know more.
if getattr(gpd.options, 'use_pygeos', False):
geometry = shapely.from_wkb(df_clean.geom)
else:
geometry = [wkb.loads(geom) for geom in df_clean.geom]
# df = await cls.get_df(where=where, **kwargs)
# df.dropna(subset=['geom'], inplace=True)
# df.set_index('id', inplace=True)
# df.sort_index(inplace=True)
# df_clean = df[~df.geom.isna()]
# ## Drop coordinates
# df_clean.drop(columns=set(df_clean.columns).intersection(['ST_X_1', 'ST_Y_1', 'ST_Z_1']),
# inplace=True)
# if not crs:
# crs = conf.crs.geojson
gdf = gpd.GeoDataFrame(
df_clean.drop('geom', axis=1),
crs=crs,
geometry=geometry
)
# ## XXX: is it right? Waiting for application.py to remove pygeos support to know more.
# # if getattr(gpd.options, 'use_pygeos', False):
# # geometry = shapely.from_wkb(df_clean.geom)
# # else:
# # geometry = [wkb.loads(geom) for geom in df_clean.geom]
if hasattr(cls, 'simplify') and cls.simplify:
#shapely_geom = shapely_geom.simplify(simplify_tolerance / conf.geo['simplify_geom_factor'],
# gdf = gpd.GeoDataFrame(
# df_clean.drop('geom', axis=1),
# crs=crs,
# geometry=geometry
# )
# if hasattr(cls, 'simplify') and cls.simplify:
# #shapely_geom = shapely_geom.simplify(simplify_tolerance / conf.geo.simplify_geom_factor,
# #preserve_topology=False)
# gdf['geometry'] = gdf['geometry'].simplify(
# float(cls.simplify) / conf.geo.simplify_geom_factor,
# preserve_topology=False)
gdf['geometry'] = gdf['geometry'].simplify(
float(cls.simplify) / conf.geo['simplify_geom_factor'],
preserve_topology=False)
if reproject:
gdf.to_crs(crs=conf.crs['for_proj'], inplace=True)
# if reproject:
# gdf.to_crs(crs=conf.crs.for_proj, inplace=True)
## Filter out columns
if filter_columns:
gdf.drop(columns=set(gdf.columns).intersection(cls.filtered_columns_on_map),
inplace=True)
# ## Filter out columns
# if filter_columns:
# gdf.drop(columns=set(gdf.columns).intersection(cls.filtered_columns_on_map),
# inplace=True)
if with_popup:
gdf['popup'] = await cls.get_popup(gdf)
# if with_popup:
# gdf['popup'] = await cls.get_popup(gdf)
return gdf
# return gdf
@classmethod
def get_attachment_dir(cls):
return f'{cls.__table__.schema}.{cls.__table__.name}'
return cls.__table__.fullname
@classmethod
def get_attachment_base_dir(cls):
return Path(conf.attachments['base_dir'])/cls.get_attachment_dir()
return Path(conf.attachments.base_dir) / cls.get_attachment_dir()
class GeoModel(GeoModelNoStatus):
status: ClassVar[str] = 'E'
"""
Status (ISO layers definition) of the layer. E -> Existing.
"""
class LiveGeoModel(GeoModel):
status: ClassVar[str] = 'E'
store: ClassVar[str]
group: ClassVar[str] ='Live'
custom: ClassVar[bool] = True
is_live: ClassVar[bool] = True
is_db: ClassVar[bool] = False
class Geom(str):
pass
# class Geom(str):
# pass
class GeoPointModel(GeoModel):
#__abstract__ = True
class GeoPointModelNoStatus(GeoModelNoStatus):
shapefile_model: ClassVar[int] = POINT
## geometry typing, see https://stackoverflow.com/questions/77333100/geoalchemy2-geometry-schema-for-pydantic-fastapi
geom: Annotated[str, WKBElement] = Field(sa_type=Geometry('POINT', srid=conf.geo.srid))
@ -827,9 +830,11 @@ class GeoPointModel(GeoModel):
info['latitude'] = '{:.6f}'.format(self.shapely_geom.y)
return info
class GeoPointModel(GeoPointModelNoStatus, GeoModel):
...
class GeoPointZModel(GeoPointModel):
#__abstract__ = True
geom: Annotated[str, WKBElement] = Field(sa_type=Geometry('POINTZ', dimension=3, srid=conf.geo.srid))
shapefile_model: ClassVar[int] = POINTZ
@ -843,13 +848,11 @@ class GeoPointZModel(GeoPointModel):
class GeoPointMModel(GeoPointZModel):
#__abstract__ = True
shapefile_model: ClassVar[int] = POINTZ
geom: Annotated[str, WKBElement] = Field(sa_type=Geometry('POINTZ', dimension=3, srid=conf.geo.srid))
class GeoLineModel(GeoModel):
#__abstract__ = True
shapefile_model: ClassVar[int] = POLYLINE
geom: Annotated[str, WKBElement] = Field(sa_type=Geometry('LINESTRING', srid=conf.geo.srid))
mapbox_type: ClassVar[str] = 'line'
@ -913,7 +916,6 @@ class GeoLineModel(GeoModel):
class GeoLineModelZ(GeoLineModel):
#__abstract__ = True
shapefile_model: ClassVar[int] = POLYLINEZ
geom: Annotated[str, WKBElement] = Field(sa_type=Geometry('LINESTRINGZ', dimension=3, srid=conf.geo.srid))
@ -930,7 +932,6 @@ class GeoLineModelZ(GeoLineModel):
class GeoPolygonModel(GeoModel):
#__abstract__ = True
shapefile_model: ClassVar[int] = POLYGON
geom: Annotated[str, WKBElement] = Field(sa_type=Geometry('POLYGON', srid=conf.geo.srid))
mapbox_type: ClassVar[str] = 'fill'
@ -1002,7 +1003,6 @@ class GeoPolygonModel(GeoModel):
class GeoPolygonModelZ(GeoPolygonModel):
#__abstract__ = True
shapefile_model: ClassVar[int] = POLYGONZ
geom: Annotated[str, WKBElement] = Field(sa_type=Geometry('POLYGONZ', dimension=3, srid=conf.geo.srid))
@ -1023,17 +1023,13 @@ class GeoPolygonModelZ(GeoPolygonModel):
class GeoPointSurveyModel(SurveyModel, GeoPointMModel):
#__abstract__ = True
## raw_model is set in category_models_maker.make_category_models
raw_model: ClassVar['RawSurveyBaseModel'] = None
raw_model: ClassVar['RawSurveyBaseModel']
class LineWorkSurveyModel(SurveyModel):
#__abstract__ = True
## raw_model is set in category_models_maker.make_category_models
raw_model: ClassVar['RawSurveyBaseModel'] = None
raw_model: ClassVar['RawSurveyBaseModel']
def match_raw_points(self):
reprojected_geom = transform(reproject_func, self.shapely_geom)
@ -1046,20 +1042,17 @@ class LineWorkSurveyModel(SurveyModel):
class GeoLineSurveyModel(LineWorkSurveyModel, GeoLineModelZ):
#__abstract__ = True
pass
class GeoPolygonSurveyModel(LineWorkSurveyModel, GeoPolygonModelZ):
#__abstract__ = True
pass
class RawSurveyBaseModel(BaseSurveyModel, GeoPointMModel):
class RawSurveyBaseModel(BaseSurveyModel, GeoPointModelNoStatus):
"""
Abstract base class for category based raw survey point models
"""
#__abstract__ = True
metadata: ClassVar[MetaData] = raw_survey
geom: Annotated[str, WKBElement] = Field(sa_type=Geometry('POINTZ', dimension=3,
srid=conf.geo.raw_survey.srid))
@ -1070,7 +1063,7 @@ class RawSurveyBaseModel(BaseSurveyModel, GeoPointMModel):
@classmethod
async def get_geo_df(cls, *args, **kwargs):
return await super().get_geo_df(crs=conf.raw_survey['spatial_sys_ref'],
return await super().get_geo_df(crs=conf.raw_survey.spatial_sys_ref,
*args, **kwargs)
@ -1086,8 +1079,6 @@ class PlottableModel(Model):
to be used (the first one being the default)
* OR an ordereed dict of value => resampling method
"""
#__abstract__ = True
float_format: ClassVar[str] = '%.1f'
values: ClassVar[list[dict[str, str]]] = []
@ -1117,8 +1108,6 @@ class PlottableModel(Model):
class TimePlottableModel(PlottableModel):
#__abstract__ = True
time: datetime
@classmethod

View file

@ -1,7 +1,7 @@
from typing import Any
from typing import Any, Dict, Tuple, Type, ClassVar
import logging
from sqlmodel import Field, SQLModel, MetaData, JSON, TEXT, Relationship, Column
from sqlmodel import Field, SQLModel, MetaData, JSON, TEXT, Relationship, Column, select
from pydantic import computed_field
import numpy as np
import pandas as pd
@ -10,6 +10,8 @@ import shapely
from sqlalchemy.sql import sqltypes
from geoalchemy2.types import Geometry
from gisaf.database import BaseModel
pandas_cast_map = {
sqltypes.Integer: 'Int64',
sqltypes.Float: 'float64',
@ -17,102 +19,118 @@ pandas_cast_map = {
logger = logging.getLogger('model_base_base')
class Model(SQLModel):
class Model(BaseModel):
"""
Base mixin class for models that can be converted to a Pandas dataframe with get_df
"""
# status: ClassVar[str] = 'E'
def __new__(cls, *args, **kwargs):
if not hasattr(cls, 'query'):
cls.query = select(cls)
return super().__new__(cls, *args, **kwargs)
class Meta:
filtered_columns_on_map: list[str] = []
@classmethod
def get_store_name(cls):
return "{}.{}".format(cls.metadata.schema, cls.__tablename__)
if hasattr(cls, '__table__'):
return cls.__table__.fullname
elif hasattr(cls, '__table_args__') and 'schema' in cls.__table_args__:
return f"{cls.__table_args__.schema}.{cls.__tablename__}"
else:
return f'{cls.metadata.schema}.{cls.__tablename__}'
@classmethod
def get_table_name_prefix(cls):
return "{}_{}".format(cls.metadata.schema, cls.__tablename__)
@classmethod
async def get_df(cls, where=None,
with_related=None, recursive=True,
cast=True,
with_only_columns=None,
geom_as_ewkt=False,
**kwargs):
"""
Return a Pandas dataframe of all records
Optional arguments:
* an SQLAlchemy where clause
* with_related: automatically get data from related columns, following the foreign keys in the model definitions
* cast: automatically transform various data in their best python types (eg. with date, time...)
* with_only_columns: fetch only these columns (list of column names)
* geom_as_ewkt: convert geometry columns to EWKB (handy for eg. using upsert_df)
:return:
"""
query = cls.query
# @classmethod
# async def get_df(cls, where=None,
# with_related=None, recursive=True,
# cast=True,
# with_only_columns=None,
# geom_as_ewkt=False,
# **kwargs):
# """
# Return a Pandas dataframe of all records
# Optional arguments:
# * an SQLAlchemy where clause
# * with_related: automatically get data from related columns, following the foreign keys in the model definitions
# * cast: automatically transform various data in their best python types (eg. with date, time...)
# * with_only_columns: fetch only these columns (list of column names)
# * geom_as_ewkt: convert geometry columns to EWKB (handy for eg. using upsert_df)
# :return:
# """
# breakpoint()
# if hasattr(cls, 'query'):
# query = cls.query
# else:
# query = select(cls)
if with_related is not False:
if with_related or getattr(cls, 'get_gdf_with_related', False):
joins = get_join_with(cls, recursive)
model_loader = cls.load(**joins)
query = _get_query_with_table_names(model_loader)
# # if with_related is not False:
# # if with_related or getattr(cls, 'get_gdf_with_related', False):
# # joins = get_join_with(cls, recursive)
# # model_loader = cls.load(**joins)
# # query = _get_query_with_table_names(model_loader)
if where is not None:
query.append_whereclause(where)
# if where is not None:
# query.append_whereclause(where)
if with_only_columns:
query = query.with_only_columns([getattr(cls, colname) for colname in with_only_columns])
# if with_only_columns:
# query = query.with_only_columns([getattr(cls, colname) for colname in with_only_columns])
## Got idea from https://github.com/MagicStack/asyncpg/issues/173.
async with query.bind.raw_pool.acquire() as conn:
## Convert hstore fields to dict
await conn.set_builtin_type_codec('hstore', codec_name='pg_contrib.hstore')
# ## Got idea from https://github.com/MagicStack/asyncpg/issues/173.
# breakpoint()
# async with query.bind.raw_pool.acquire() as conn:
# ## Convert hstore fields to dict
# await conn.set_builtin_type_codec('hstore', codec_name='pg_contrib.hstore')
compiled = query.compile()
stmt = await conn.prepare(compiled.string)
columns = [a.name for a in stmt.get_attributes()]
data = await stmt.fetch(*[compiled.params.get(param) for param in compiled.positiontup])
df = pd.DataFrame(data, columns=columns)
# compiled = query.compile()
# stmt = await conn.prepare(compiled.string)
# columns = [a.name for a in stmt.get_attributes()]
# data = await stmt.fetch(*[compiled.params.get(param) for param in compiled.positiontup])
# df = pd.DataFrame(data, columns=columns)
## Convert primary key columns to Int64:
## allows NaN, fixing type convertion to float with merge
for pk in [c.name for c in cls.__table__.primary_key.columns]:
if pk in df.columns and df[pk].dtype=='int64':
df[pk] = df[pk].astype('Int64')
# ## Convert primary key columns to Int64:
# ## allows NaN, fixing type convertion to float with merge
# for pk in [c.name for c in cls.__table__.primary_key.columns]:
# if pk in df.columns and df[pk].dtype=='int64':
# df[pk] = df[pk].astype('Int64')
if cast:
## Cast the type for known types (datetime, ...)
for column_name in df.columns:
col = getattr(query.columns, column_name, None)
if col is None:
logger.debug(f'Cannot get column {column_name} in query for model {cls.__name__}')
continue
column_type = getattr(query.columns, column_name).type
## XXX: Needs refinment, eg. nullable -> Int64 ...
if column_type.__class__ in pandas_cast_map:
df[column_name] = df[column_name].astype(pandas_cast_map[column_type.__class__])
elif isinstance(column_type, (sqltypes.Date, sqltypes.DateTime)):
## Dates, times
df[column_name] = pd.to_datetime(df[column_name])
#elif isinstance(column_type, (sqltypes.Integer, sqltypes.Float)):
# ## Numeric
# df[column_name] = pd.to_numeric(df[column_name], errors='coerce')
## XXX: keeping this note about that is about "char" SQL type, but the fix of #9694 makes it unnessary
#elif isinstance(column_type, sqltypes.CHAR) or (isinstance(column_type, sqltypes.String) and column_type.length == 1):
# ## Workaround for bytes being used for string of length 1 (not sure - why???)
# df[column_name] = df[column_name].str.decode('utf-8')
# if cast:
# ## Cast the type for known types (datetime, ...)
# for column_name in df.columns:
# col = getattr(query.columns, column_name, None)
# if col is None:
# logger.debug(f'Cannot get column {column_name} in query for model {cls.__name__}')
# continue
# column_type = getattr(query.columns, column_name).type
# ## XXX: Needs refinment, eg. nullable -> Int64 ...
# if column_type.__class__ in pandas_cast_map:
# df[column_name] = df[column_name].astype(pandas_cast_map[column_type.__class__])
# elif isinstance(column_type, (sqltypes.Date, sqltypes.DateTime)):
# ## Dates, times
# df[column_name] = pd.to_datetime(df[column_name])
# #elif isinstance(column_type, (sqltypes.Integer, sqltypes.Float)):
# # ## Numeric
# # df[column_name] = pd.to_numeric(df[column_name], errors='coerce')
# ## XXX: keeping this note about that is about "char" SQL type, but the fix of #9694 makes it unnessary
# #elif isinstance(column_type, sqltypes.CHAR) or (isinstance(column_type, sqltypes.String) and column_type.length == 1):
# # ## Workaround for bytes being used for string of length 1 (not sure - why???)
# # df[column_name] = df[column_name].str.decode('utf-8')
## Rename the columns, removing the schema_table prefix for the columns in that model
prefix = cls.get_table_name_prefix()
prefix_length = len(prefix) + 1
rename_map = {colname: colname[prefix_length:] for colname in df.columns if colname.startswith(prefix)}
df.rename(columns=rename_map, inplace=True)
# ## Rename the columns, removing the schema_table prefix for the columns in that model
# prefix = cls.get_table_name_prefix()
# prefix_length = len(prefix) + 1
# rename_map = {colname: colname[prefix_length:] for colname in df.columns if colname.startswith(prefix)}
# df.rename(columns=rename_map, inplace=True)
## Eventually convert geometry columns to EWKB
if geom_as_ewkt:
geometry_columns = [col.name for col in cls.__table__.columns if isinstance(col.type, Geometry)]
for column in geometry_columns:
df[column] = shapely.to_wkb(shapely.from_wkb(df.geom), hex=True, include_srid=True)
# ## Eventually convert geometry columns to EWKB
# if geom_as_ewkt:
# geometry_columns = [col.name for col in cls.__table__.columns if isinstance(col.type, Geometry)]
# for column in geometry_columns:
# df[column] = shapely.to_wkb(shapely.from_wkb(df.geom), hex=True, include_srid=True)
return df
# return df

View file

@ -395,12 +395,11 @@ class Store:
for store_name in missing_triger_tables:
## XXX: TODO: See https://stackoverflow.com/questions/7888846/trigger-in-sqlachemy
model = registry.geom[store_name]
try:
await session.exec(text(
ttag_create_trigger.format(
schema=model.metadata.schema,
table=model.__tablename__)
schema=model.__table__.schema,
table=model.__table__.name)
))
except UndefinedTableError:
logger.warning(f'table {store_name} does not exist in '

View file

@ -108,8 +108,8 @@ class ModelRegistry:
:return:
"""
logger.debug('make_registry')
await self.make_category_models()
self.scan()
await self.make_category_models()
await self.build()
## If ogcapi is in app (i.e. not with scheduler):
## Now that the models are refreshed, tells the ogcapi to (re)build
@ -368,7 +368,7 @@ class ModelRegistry:
row.model.__name__,
row.model.__name__,
row.model.description,
row.model.metadata.schema,
row.model.__table__.schema,
row.model.base_gis_type,
row.model.style,
row.model.symbol,
@ -396,7 +396,6 @@ class ModelRegistry:
self.categories['store'] = self.categories.apply(get_store_name, axis=1)
self.categories['count'] = pd.Series(dtype=pd.Int64Dtype())
self.categories.set_index('name', inplace=True)
df_models = pd.DataFrame(self.geom.items(),
columns=['store', 'model']
@ -552,24 +551,20 @@ class ModelRegistry:
self.primary_groups['title'] = self.primary_groups['long_name']
## Add Misc and Live
self.primary_groups.loc[-1] = (
'Misc',
self.primary_groups.loc['Misc'] = (
False,
'Misc and old layers (not coming from our survey; they will be organized, '
'eventually as the surveys get more complete)',
'Misc',
)
self.primary_groups.index = self.primary_groups.index + 1
self.primary_groups.loc[len(self.primary_groups)] = (
'Live',
self.primary_groups.loc['Live'] = (
False,
'Layers from data processing, sensors, etc, and are updated automatically',
'Live',
)
self.primary_groups.loc[len(self.primary_groups)] = (
'Community',
self.primary_groups.loc['Community'] = (
False,
'Layers from community',
'Community',