Implement simplify (using geopandas)

Add preserve_geometry option
This commit is contained in:
phil 2024-01-07 18:30:25 +05:30
parent e3ed311390
commit c3caedea0e
6 changed files with 93 additions and 66 deletions

19
pdm.lock generated
View file

@ -5,7 +5,7 @@
groups = ["default", "dev", "mqtt"]
strategy = ["cross_platform"]
lock_version = "4.4.1"
content_hash = "sha256:e0be2ab66a21fc5961c11251fec8a3e6e0e41d1945c5c9630734ebb0be3f6dbb"
content_hash = "sha256:d6bc84b5bf12fda8fd24858515794677046aca3dea340a40679d1276ae7a6ea9"
[[package]]
name = "aiomqtt"
@ -308,18 +308,17 @@ files = [
[[package]]
name = "fastapi"
version = "0.105.0"
version = "0.108.0"
requires_python = ">=3.8"
summary = "FastAPI framework, high performance, easy to learn, fast to code, ready for production"
dependencies = [
"anyio<4.0.0,>=3.7.1",
"pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4",
"starlette<0.28.0,>=0.27.0",
"starlette<0.33.0,>=0.29.0",
"typing-extensions>=4.8.0",
]
files = [
{file = "fastapi-0.105.0-py3-none-any.whl", hash = "sha256:f19ebf6fdc82a3281d10f2cb4774bdfa90238e3b40af3525a0c09fd08ad1c480"},
{file = "fastapi-0.105.0.tar.gz", hash = "sha256:4d12838819aa52af244580675825e750ad67c9df4614f557a769606af902cf22"},
{file = "fastapi-0.108.0-py3-none-any.whl", hash = "sha256:8c7bc6d315da963ee4cdb605557827071a9a7f95aeb8fcdd3bde48cdc8764dd7"},
{file = "fastapi-0.108.0.tar.gz", hash = "sha256:5056e504ac6395bf68493d71fcfc5352fdbd5fda6f88c21f6420d80d81163296"},
]
[[package]]
@ -1163,15 +1162,15 @@ files = [
[[package]]
name = "starlette"
version = "0.27.0"
requires_python = ">=3.7"
version = "0.32.0.post1"
requires_python = ">=3.8"
summary = "The little ASGI library that shines."
dependencies = [
"anyio<5,>=3.4.0",
]
files = [
{file = "starlette-0.27.0-py3-none-any.whl", hash = "sha256:918416370e846586541235ccd38a474c08b80443ed31c578a418e2209b3eef91"},
{file = "starlette-0.27.0.tar.gz", hash = "sha256:6a6b0d042acb8d469a01eba54e9cda6cbd24ac602c4cd016723117d6a7e73b75"},
{file = "starlette-0.32.0.post1-py3-none-any.whl", hash = "sha256:cd0cb10ddb49313f609cedfac62c8c12e56c7314b66d89bb077ba228bada1b09"},
{file = "starlette-0.32.0.post1.tar.gz", hash = "sha256:e54e2b7e2fb06dff9eac40133583f10dfa05913f5a85bf26f427c7a40a9a3d02"},
]
[[package]]

View file

@ -1 +1 @@
__version__ = '2023.4.dev12+gfda845c.d20231223'
__version__ = '2023.4.dev28+ge3ed311.d20240107'

View file

@ -55,6 +55,7 @@ class RawSurvey(BaseSettings):
class Geo(BaseSettings):
raw_survey: RawSurvey
simplify_geom_factor: int
simplify_preserve_topology: bool = False
srid: int
srid_for_proj: int

View file

@ -4,9 +4,11 @@ from collections.abc import AsyncGenerator
from sqlalchemy.ext.asyncio import create_async_engine
from sqlalchemy.orm import joinedload, QueryableAttribute, InstrumentedAttribute
from sqlalchemy.sql.selectable import Select
from sqlmodel import SQLModel, select
from sqlmodel.ext.asyncio.session import AsyncSession
from fastapi import Depends
# from geoalchemy2.functions import ST_SimplifyPreserveTopology
import pandas as pd
import geopandas as gpd
@ -31,7 +33,18 @@ async def db_session() -> AsyncGenerator[AsyncSession]:
def pandas_query(session, query):
return pd.read_sql_query(query, session.connection())
def geopandas_query(session, query, *, crs=None, cast=True):
def geopandas_query(session, query: Select, model, *,
# simplify_tolerance: float|None=None,
crs=None, cast=True,
):
## XXX: I could not get the add_columns work without creating a subquery,
## so moving the simplification to geopandas - see in _get_df
# if simplify_tolerance is not None:
# query = query.with_only_columns(*(col for col in query.columns
# if col.name != 'geom'))
# new_column = model.__table__.columns['geom'].ST_SimplifyPreserveTopology(
# simplify_tolerance).label('geom')
# query = query.add_columns(new_column)
return gpd.GeoDataFrame.from_postgis(query, session.connection(), crs=crs)
class BaseModel(SQLModel):
@ -40,22 +53,21 @@ class BaseModel(SQLModel):
return []
@classmethod
async def get_df(cls, *,
where=None, with_related=True, **kwargs
) -> pd.DataFrame:
return await cls._get_df(pandas_query, where=None, with_related=True, **kwargs)
async def get_df(cls, **kwargs) -> pd.DataFrame:
return await cls._get_df(pandas_query, **kwargs)
@classmethod
async def get_gdf(cls, *,
where=None, with_related=True, **kwargs
) -> gpd.GeoDataFrame:
async def get_gdf(cls, **kwargs) -> gpd.GeoDataFrame:
return await cls._get_df(geopandas_query,
where=None, with_related=True, **kwargs)
model=cls,
**kwargs)
@classmethod
async def _get_df(cls, method, *,
where=None, with_related=True, with_only_columns=[], **kwargs
) -> pd.DataFrame | gpd.GeoDataFrame:
where=None, with_related=True, with_only_columns=[],
simplify_tolerance: float | None=None,
preserve_topology: bool | None=None,
**kwargs) -> pd.DataFrame | gpd.GeoDataFrame:
async with db_session() as session:
if len(with_only_columns) == 0:
query = select(cls)
@ -70,6 +82,13 @@ class BaseModel(SQLModel):
if with_related and len(joined_tables) > 0:
query = query.options(*(joinedload(jt) for jt in joined_tables))
df = await session.run_sync(method, query, **kwargs)
if method is geopandas_query and simplify_tolerance is not None:
df['geom'] = df['geom'].simplify(
simplify_tolerance / conf.geo.simplify_geom_factor,
preserve_topology=(conf.geo.simplify_preserve_topology
if preserve_topology is None
else preserve_topology)
)
## Chamge column names to reflect the joined tables
## Leave the first columns unchanged, as their names come straight
## from the model's fields

View file

@ -7,7 +7,8 @@ import logging
from typing import Annotated
from asyncio import CancelledError
from fastapi import (Depends, FastAPI, HTTPException, Response, Header, WebSocket, WebSocketDisconnect,
from fastapi import (Depends, FastAPI, HTTPException, Response, Header,
WebSocket, WebSocketDisconnect,
status, responses)
from gisaf.models.authentication import User
@ -76,7 +77,8 @@ async def live_layer(store: str, websocket: WebSocket):
async def get_geojson(store_name,
user: User = Depends(get_current_active_user),
If_None_Match: Annotated[str | None, Header()] = None,
simplify: Annotated[float | None, Header()] = 50.0,
simplify: Annotated[float | None, Header()] = None,
preserveTopology: Annotated[bool|None, Header()] = None,
):
"""
Some REST stores coded manually (route prefixed with "gj": geojson).
@ -88,45 +90,45 @@ async def get_geojson(store_name,
model = registry.stores.loc[store_name].model
except KeyError:
raise HTTPException(status.HTTP_404_NOT_FOUND)
if hasattr(model, 'viewable_role'):
if not(user and user.can_view(model)):
logger.info(f'{user.username if user else "Anonymous"} tried to access {model}')
username = user.username if user else "Anonymous"
logger.info(f'{username} tried to access {model}')
raise HTTPException(status.HTTP_401_UNAUTHORIZED)
if await redis_store.has_channel(store_name):
## Live layers
data = await redis_store.get_layer_as_json(store_name)
return Response(content=data.decode(),
media_type="application/json")
# elif not model:
# raise HTTPException(status.HTTP_404_NOT_FOUND)
if model.cache_enabled:
ttag = await redis_store.get_ttag(store_name)
if ttag and If_None_Match == ttag:
return status.HTTP_304_NOT_MODIFIED
if hasattr(model, 'get_geojson'):
geojson = await model.get_geojson(simplify_tolerance=simplify, registry=registry)
geojson = await model.get_geojson(simplify_tolerance=simplify,
preserve_topology=preserveTopology,
registry=registry)
## Store to redis for caching
if use_cache:
await redis_store.store_json(model, geojson)
resp = geojson
elif model.can_get_features_as_df:
## Get the GeoDataframe (gdf) with GeoPandas
## get_popup and get_propertites get the gdf as argument and can use vectorised operations
## get_popup and get_propertites get the gdf as argument
## and can use vectorised operations
try:
gdf = await model.get_geo_df(cast=True, with_related=True, filter_columns=True)
gdf = await model.get_gdf(cast=True, with_related=True,
# filter_columns=True,
preserve_topology=preserveTopology,
simplify_tolerance=simplify)
except CancelledError as err:
logger.debug(f'Request for {store_name} cancelled while getting gdf')
logger.debug(f'Getting {store_name} cancelled while getting gdf')
raise err
except Exception as err:
logger.exception(err)
raise err #status.HTTP_500_INTERNAL_SERVER_ERROR
## The query of category defined models gets the status (not sure how and this could be skipped)
raise HTTPException(status.HTTP_500_INTERNAL_SERVER_ERROR)
## The query of category defined models gets the status
## (not sure how and this could be skipped)
## Other models do not have: just add it manually from the model itself
if 'status' not in gdf.columns:
gdf['status'] = model.status
@ -138,35 +140,38 @@ async def get_geojson(store_name,
for property, values in properties.items():
columns.append(property)
gdf[property] = values
geojson = gdf[columns].to_json(separators=(',', ':'), check_circular=False)
geojson = gdf[columns].to_json(separators=(',', ':'),
check_circular=False)
## Store to redis for caching
if use_cache:
await redis_store.store_json(model, geojson)
resp = geojson
else:
logger.warn(f"{model} doesn't allow using dataframe for generating json!")
attrs, features_kwargs = await model.get_features_attrs(simplify)
## Using gino: allows OO model (get_info, etc)
try:
attrs['features'] = await model.get_features_in_bulk_gino(**features_kwargs)
except Exception as err:
logger.exception(err)
raise status.HTTP_500_INTERNAL_SERVER_ERROR
resp = attrs
raise HTTPException(status.HTTP_500_INTERNAL_SERVER_ERROR,
detail='Gino is for: Gino Is No Option')
# logger.warn(f"{model} doesn't allow using dataframe for generating json!")
# attrs, features_kwargs = await model.get_features_attrs(simplify)
# ## Using gino: allows OO model (get_info, etc)
# try:
# attrs['features'] = await model.get_features_in_bulk_gino(**features_kwargs)
# except Exception as err:
# logger.exception(err)
# raise status.HTTP_500_INTERNAL_SERVER_ERROR
# resp = attrs
headers = {}
if model.cache_enabled and ttag:
headers['ETag'] = ttag
return Response(content=resp, media_type="application/json", headers=headers)
return Response(content=resp,
media_type="application/json", headers=headers)
@api.get('/gj/{store_name}/popup/{id}')
async def gj_popup(store_name: str, id: int):
model = registry.geom.get(store_name)
if not hasattr(model, 'get_popup_dynamic'):
return ''
obj = await model.get(id)
## Escape characters for json
popup_more = obj.get_popup_dynamic().replace('"', '\\"').replace('\n', '\\n')
return {"text": popup_more}
# @api.get('/gj/{store_name}/popup/{id}')
# async def gj_popup(store_name: str, id: int):
# model = registry.geom.get(store_name)
# if not hasattr(model, 'get_popup_dynamic'):
# return ''
# obj = await model.get(id)
# ## Escape characters for json
# popup_more = obj.get_popup_dynamic().replace('"', '\\"').replace('\n', '\\n')
# return {"text": popup_more}

View file

@ -218,7 +218,8 @@ class SurveyModel(BaseSurveyModel):
'] #' + df.index.astype('U')
@classmethod
async def get_geojson(cls, registry=None, simplify_tolerance=0):
async def get_geojson(cls,
registry=None, simplify_tolerance=0, preserve_topology=False):
if registry is None:
from ..registry import registry
@ -489,8 +490,9 @@ class GeoModelNoStatus(Model):
shapely_geom = self.shapely_geom
if simplify_tolerance:
shapely_geom = shapely_geom.simplify(simplify_tolerance / conf.geo.simplify_geom_factor,
preserve_topology=False)
shapely_geom = shapely_geom.simplify(
simplify_tolerance / conf.geo.simplify_geom_factor,
preserve_topology=conf.geo.simplify_preserve_topology)
if shapely_geom.is_empty:
raise NoPoint
@ -744,11 +746,12 @@ class GeoModelNoStatus(Model):
# )
# if hasattr(cls, 'simplify') and cls.simplify:
# #shapely_geom = shapely_geom.simplify(simplify_tolerance / conf.geo.simplify_geom_factor,
# #preserve_topology=False)
# #shapely_geom = shapely_geom.simplify(
# simplify_tolerance / conf.geo.simplify_geom_factor,
# preserve_topology=conf.geo.simplify_preserve_topology)
# gdf['geometry'] = gdf['geometry'].simplify(
# float(cls.simplify) / conf.geo.simplify_geom_factor,
# preserve_topology=False)
# preserve_topology=conf.geo.simplify_preserve_topology)
# if reproject:
# gdf.to_crs(crs=conf.crs.for_proj, inplace=True)