Implement simplify (using geopandas)

Add preserve_geometry option
This commit is contained in:
phil 2024-01-07 18:30:25 +05:30
parent e3ed311390
commit c3caedea0e
6 changed files with 93 additions and 66 deletions

19
pdm.lock generated
View file

@ -5,7 +5,7 @@
groups = ["default", "dev", "mqtt"] groups = ["default", "dev", "mqtt"]
strategy = ["cross_platform"] strategy = ["cross_platform"]
lock_version = "4.4.1" lock_version = "4.4.1"
content_hash = "sha256:e0be2ab66a21fc5961c11251fec8a3e6e0e41d1945c5c9630734ebb0be3f6dbb" content_hash = "sha256:d6bc84b5bf12fda8fd24858515794677046aca3dea340a40679d1276ae7a6ea9"
[[package]] [[package]]
name = "aiomqtt" name = "aiomqtt"
@ -308,18 +308,17 @@ files = [
[[package]] [[package]]
name = "fastapi" name = "fastapi"
version = "0.105.0" version = "0.108.0"
requires_python = ">=3.8" requires_python = ">=3.8"
summary = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" summary = "FastAPI framework, high performance, easy to learn, fast to code, ready for production"
dependencies = [ dependencies = [
"anyio<4.0.0,>=3.7.1",
"pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4", "pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4",
"starlette<0.28.0,>=0.27.0", "starlette<0.33.0,>=0.29.0",
"typing-extensions>=4.8.0", "typing-extensions>=4.8.0",
] ]
files = [ files = [
{file = "fastapi-0.105.0-py3-none-any.whl", hash = "sha256:f19ebf6fdc82a3281d10f2cb4774bdfa90238e3b40af3525a0c09fd08ad1c480"}, {file = "fastapi-0.108.0-py3-none-any.whl", hash = "sha256:8c7bc6d315da963ee4cdb605557827071a9a7f95aeb8fcdd3bde48cdc8764dd7"},
{file = "fastapi-0.105.0.tar.gz", hash = "sha256:4d12838819aa52af244580675825e750ad67c9df4614f557a769606af902cf22"}, {file = "fastapi-0.108.0.tar.gz", hash = "sha256:5056e504ac6395bf68493d71fcfc5352fdbd5fda6f88c21f6420d80d81163296"},
] ]
[[package]] [[package]]
@ -1163,15 +1162,15 @@ files = [
[[package]] [[package]]
name = "starlette" name = "starlette"
version = "0.27.0" version = "0.32.0.post1"
requires_python = ">=3.7" requires_python = ">=3.8"
summary = "The little ASGI library that shines." summary = "The little ASGI library that shines."
dependencies = [ dependencies = [
"anyio<5,>=3.4.0", "anyio<5,>=3.4.0",
] ]
files = [ files = [
{file = "starlette-0.27.0-py3-none-any.whl", hash = "sha256:918416370e846586541235ccd38a474c08b80443ed31c578a418e2209b3eef91"}, {file = "starlette-0.32.0.post1-py3-none-any.whl", hash = "sha256:cd0cb10ddb49313f609cedfac62c8c12e56c7314b66d89bb077ba228bada1b09"},
{file = "starlette-0.27.0.tar.gz", hash = "sha256:6a6b0d042acb8d469a01eba54e9cda6cbd24ac602c4cd016723117d6a7e73b75"}, {file = "starlette-0.32.0.post1.tar.gz", hash = "sha256:e54e2b7e2fb06dff9eac40133583f10dfa05913f5a85bf26f427c7a40a9a3d02"},
] ]
[[package]] [[package]]

View file

@ -1 +1 @@
__version__ = '2023.4.dev12+gfda845c.d20231223' __version__ = '2023.4.dev28+ge3ed311.d20240107'

View file

@ -55,6 +55,7 @@ class RawSurvey(BaseSettings):
class Geo(BaseSettings): class Geo(BaseSettings):
raw_survey: RawSurvey raw_survey: RawSurvey
simplify_geom_factor: int simplify_geom_factor: int
simplify_preserve_topology: bool = False
srid: int srid: int
srid_for_proj: int srid_for_proj: int

View file

@ -4,9 +4,11 @@ from collections.abc import AsyncGenerator
from sqlalchemy.ext.asyncio import create_async_engine from sqlalchemy.ext.asyncio import create_async_engine
from sqlalchemy.orm import joinedload, QueryableAttribute, InstrumentedAttribute from sqlalchemy.orm import joinedload, QueryableAttribute, InstrumentedAttribute
from sqlalchemy.sql.selectable import Select
from sqlmodel import SQLModel, select from sqlmodel import SQLModel, select
from sqlmodel.ext.asyncio.session import AsyncSession from sqlmodel.ext.asyncio.session import AsyncSession
from fastapi import Depends from fastapi import Depends
# from geoalchemy2.functions import ST_SimplifyPreserveTopology
import pandas as pd import pandas as pd
import geopandas as gpd import geopandas as gpd
@ -31,7 +33,18 @@ async def db_session() -> AsyncGenerator[AsyncSession]:
def pandas_query(session, query): def pandas_query(session, query):
return pd.read_sql_query(query, session.connection()) return pd.read_sql_query(query, session.connection())
def geopandas_query(session, query, *, crs=None, cast=True): def geopandas_query(session, query: Select, model, *,
# simplify_tolerance: float|None=None,
crs=None, cast=True,
):
## XXX: I could not get the add_columns work without creating a subquery,
## so moving the simplification to geopandas - see in _get_df
# if simplify_tolerance is not None:
# query = query.with_only_columns(*(col for col in query.columns
# if col.name != 'geom'))
# new_column = model.__table__.columns['geom'].ST_SimplifyPreserveTopology(
# simplify_tolerance).label('geom')
# query = query.add_columns(new_column)
return gpd.GeoDataFrame.from_postgis(query, session.connection(), crs=crs) return gpd.GeoDataFrame.from_postgis(query, session.connection(), crs=crs)
class BaseModel(SQLModel): class BaseModel(SQLModel):
@ -40,22 +53,21 @@ class BaseModel(SQLModel):
return [] return []
@classmethod @classmethod
async def get_df(cls, *, async def get_df(cls, **kwargs) -> pd.DataFrame:
where=None, with_related=True, **kwargs return await cls._get_df(pandas_query, **kwargs)
) -> pd.DataFrame:
return await cls._get_df(pandas_query, where=None, with_related=True, **kwargs)
@classmethod @classmethod
async def get_gdf(cls, *, async def get_gdf(cls, **kwargs) -> gpd.GeoDataFrame:
where=None, with_related=True, **kwargs
) -> gpd.GeoDataFrame:
return await cls._get_df(geopandas_query, return await cls._get_df(geopandas_query,
where=None, with_related=True, **kwargs) model=cls,
**kwargs)
@classmethod @classmethod
async def _get_df(cls, method, *, async def _get_df(cls, method, *,
where=None, with_related=True, with_only_columns=[], **kwargs where=None, with_related=True, with_only_columns=[],
) -> pd.DataFrame | gpd.GeoDataFrame: simplify_tolerance: float | None=None,
preserve_topology: bool | None=None,
**kwargs) -> pd.DataFrame | gpd.GeoDataFrame:
async with db_session() as session: async with db_session() as session:
if len(with_only_columns) == 0: if len(with_only_columns) == 0:
query = select(cls) query = select(cls)
@ -70,6 +82,13 @@ class BaseModel(SQLModel):
if with_related and len(joined_tables) > 0: if with_related and len(joined_tables) > 0:
query = query.options(*(joinedload(jt) for jt in joined_tables)) query = query.options(*(joinedload(jt) for jt in joined_tables))
df = await session.run_sync(method, query, **kwargs) df = await session.run_sync(method, query, **kwargs)
if method is geopandas_query and simplify_tolerance is not None:
df['geom'] = df['geom'].simplify(
simplify_tolerance / conf.geo.simplify_geom_factor,
preserve_topology=(conf.geo.simplify_preserve_topology
if preserve_topology is None
else preserve_topology)
)
## Chamge column names to reflect the joined tables ## Chamge column names to reflect the joined tables
## Leave the first columns unchanged, as their names come straight ## Leave the first columns unchanged, as their names come straight
## from the model's fields ## from the model's fields

View file

@ -7,7 +7,8 @@ import logging
from typing import Annotated from typing import Annotated
from asyncio import CancelledError from asyncio import CancelledError
from fastapi import (Depends, FastAPI, HTTPException, Response, Header, WebSocket, WebSocketDisconnect, from fastapi import (Depends, FastAPI, HTTPException, Response, Header,
WebSocket, WebSocketDisconnect,
status, responses) status, responses)
from gisaf.models.authentication import User from gisaf.models.authentication import User
@ -76,7 +77,8 @@ async def live_layer(store: str, websocket: WebSocket):
async def get_geojson(store_name, async def get_geojson(store_name,
user: User = Depends(get_current_active_user), user: User = Depends(get_current_active_user),
If_None_Match: Annotated[str | None, Header()] = None, If_None_Match: Annotated[str | None, Header()] = None,
simplify: Annotated[float | None, Header()] = 50.0, simplify: Annotated[float | None, Header()] = None,
preserveTopology: Annotated[bool|None, Header()] = None,
): ):
""" """
Some REST stores coded manually (route prefixed with "gj": geojson). Some REST stores coded manually (route prefixed with "gj": geojson).
@ -88,45 +90,45 @@ async def get_geojson(store_name,
model = registry.stores.loc[store_name].model model = registry.stores.loc[store_name].model
except KeyError: except KeyError:
raise HTTPException(status.HTTP_404_NOT_FOUND) raise HTTPException(status.HTTP_404_NOT_FOUND)
if hasattr(model, 'viewable_role'): if hasattr(model, 'viewable_role'):
if not(user and user.can_view(model)): if not(user and user.can_view(model)):
logger.info(f'{user.username if user else "Anonymous"} tried to access {model}') username = user.username if user else "Anonymous"
logger.info(f'{username} tried to access {model}')
raise HTTPException(status.HTTP_401_UNAUTHORIZED) raise HTTPException(status.HTTP_401_UNAUTHORIZED)
if await redis_store.has_channel(store_name): if await redis_store.has_channel(store_name):
## Live layers ## Live layers
data = await redis_store.get_layer_as_json(store_name) data = await redis_store.get_layer_as_json(store_name)
return Response(content=data.decode(), return Response(content=data.decode(),
media_type="application/json") media_type="application/json")
# elif not model:
# raise HTTPException(status.HTTP_404_NOT_FOUND)
if model.cache_enabled: if model.cache_enabled:
ttag = await redis_store.get_ttag(store_name) ttag = await redis_store.get_ttag(store_name)
if ttag and If_None_Match == ttag: if ttag and If_None_Match == ttag:
return status.HTTP_304_NOT_MODIFIED return status.HTTP_304_NOT_MODIFIED
if hasattr(model, 'get_geojson'): if hasattr(model, 'get_geojson'):
geojson = await model.get_geojson(simplify_tolerance=simplify, registry=registry) geojson = await model.get_geojson(simplify_tolerance=simplify,
preserve_topology=preserveTopology,
registry=registry)
## Store to redis for caching ## Store to redis for caching
if use_cache: if use_cache:
await redis_store.store_json(model, geojson) await redis_store.store_json(model, geojson)
resp = geojson resp = geojson
elif model.can_get_features_as_df: elif model.can_get_features_as_df:
## Get the GeoDataframe (gdf) with GeoPandas ## Get the GeoDataframe (gdf) with GeoPandas
## get_popup and get_propertites get the gdf as argument and can use vectorised operations ## get_popup and get_propertites get the gdf as argument
## and can use vectorised operations
try: try:
gdf = await model.get_geo_df(cast=True, with_related=True, filter_columns=True) gdf = await model.get_gdf(cast=True, with_related=True,
# filter_columns=True,
preserve_topology=preserveTopology,
simplify_tolerance=simplify)
except CancelledError as err: except CancelledError as err:
logger.debug(f'Request for {store_name} cancelled while getting gdf') logger.debug(f'Getting {store_name} cancelled while getting gdf')
raise err raise err
except Exception as err: except Exception as err:
logger.exception(err) logger.exception(err)
raise err #status.HTTP_500_INTERNAL_SERVER_ERROR raise HTTPException(status.HTTP_500_INTERNAL_SERVER_ERROR)
## The query of category defined models gets the status (not sure how and this could be skipped) ## The query of category defined models gets the status
## (not sure how and this could be skipped)
## Other models do not have: just add it manually from the model itself ## Other models do not have: just add it manually from the model itself
if 'status' not in gdf.columns: if 'status' not in gdf.columns:
gdf['status'] = model.status gdf['status'] = model.status
@ -138,35 +140,38 @@ async def get_geojson(store_name,
for property, values in properties.items(): for property, values in properties.items():
columns.append(property) columns.append(property)
gdf[property] = values gdf[property] = values
geojson = gdf[columns].to_json(separators=(',', ':'), check_circular=False) geojson = gdf[columns].to_json(separators=(',', ':'),
check_circular=False)
## Store to redis for caching ## Store to redis for caching
if use_cache: if use_cache:
await redis_store.store_json(model, geojson) await redis_store.store_json(model, geojson)
resp = geojson resp = geojson
else: else:
logger.warn(f"{model} doesn't allow using dataframe for generating json!") raise HTTPException(status.HTTP_500_INTERNAL_SERVER_ERROR,
attrs, features_kwargs = await model.get_features_attrs(simplify) detail='Gino is for: Gino Is No Option')
## Using gino: allows OO model (get_info, etc) # logger.warn(f"{model} doesn't allow using dataframe for generating json!")
try: # attrs, features_kwargs = await model.get_features_attrs(simplify)
attrs['features'] = await model.get_features_in_bulk_gino(**features_kwargs) # ## Using gino: allows OO model (get_info, etc)
except Exception as err: # try:
logger.exception(err) # attrs['features'] = await model.get_features_in_bulk_gino(**features_kwargs)
raise status.HTTP_500_INTERNAL_SERVER_ERROR # except Exception as err:
resp = attrs # logger.exception(err)
# raise status.HTTP_500_INTERNAL_SERVER_ERROR
# resp = attrs
headers = {} headers = {}
if model.cache_enabled and ttag: if model.cache_enabled and ttag:
headers['ETag'] = ttag headers['ETag'] = ttag
return Response(content=resp, media_type="application/json", headers=headers) return Response(content=resp,
media_type="application/json", headers=headers)
@api.get('/gj/{store_name}/popup/{id}') # @api.get('/gj/{store_name}/popup/{id}')
async def gj_popup(store_name: str, id: int): # async def gj_popup(store_name: str, id: int):
model = registry.geom.get(store_name) # model = registry.geom.get(store_name)
if not hasattr(model, 'get_popup_dynamic'): # if not hasattr(model, 'get_popup_dynamic'):
return '' # return ''
obj = await model.get(id) # obj = await model.get(id)
## Escape characters for json # ## Escape characters for json
popup_more = obj.get_popup_dynamic().replace('"', '\\"').replace('\n', '\\n') # popup_more = obj.get_popup_dynamic().replace('"', '\\"').replace('\n', '\\n')
return {"text": popup_more} # return {"text": popup_more}

View file

@ -218,7 +218,8 @@ class SurveyModel(BaseSurveyModel):
'] #' + df.index.astype('U') '] #' + df.index.astype('U')
@classmethod @classmethod
async def get_geojson(cls, registry=None, simplify_tolerance=0): async def get_geojson(cls,
registry=None, simplify_tolerance=0, preserve_topology=False):
if registry is None: if registry is None:
from ..registry import registry from ..registry import registry
@ -489,8 +490,9 @@ class GeoModelNoStatus(Model):
shapely_geom = self.shapely_geom shapely_geom = self.shapely_geom
if simplify_tolerance: if simplify_tolerance:
shapely_geom = shapely_geom.simplify(simplify_tolerance / conf.geo.simplify_geom_factor, shapely_geom = shapely_geom.simplify(
preserve_topology=False) simplify_tolerance / conf.geo.simplify_geom_factor,
preserve_topology=conf.geo.simplify_preserve_topology)
if shapely_geom.is_empty: if shapely_geom.is_empty:
raise NoPoint raise NoPoint
@ -744,11 +746,12 @@ class GeoModelNoStatus(Model):
# ) # )
# if hasattr(cls, 'simplify') and cls.simplify: # if hasattr(cls, 'simplify') and cls.simplify:
# #shapely_geom = shapely_geom.simplify(simplify_tolerance / conf.geo.simplify_geom_factor, # #shapely_geom = shapely_geom.simplify(
# #preserve_topology=False) # simplify_tolerance / conf.geo.simplify_geom_factor,
# preserve_topology=conf.geo.simplify_preserve_topology)
# gdf['geometry'] = gdf['geometry'].simplify( # gdf['geometry'] = gdf['geometry'].simplify(
# float(cls.simplify) / conf.geo.simplify_geom_factor, # float(cls.simplify) / conf.geo.simplify_geom_factor,
# preserve_topology=False) # preserve_topology=conf.geo.simplify_preserve_topology)
# if reproject: # if reproject:
# gdf.to_crs(crs=conf.crs.for_proj, inplace=True) # gdf.to_crs(crs=conf.crs.for_proj, inplace=True)