gisaf-backend/src/gisaf/importers.py

import datetime
import logging
import re
from typing import Any
from collections import defaultdict, OrderedDict
from math import isnan
from mimetypes import guess_type
from zipfile import ZipFile, BadZipFile

import geopandas as gpd
import numpy as np
import pandas as pd
from shapely.wkb import dumps as dumps_wkb
from sqlalchemy import and_
from sqlalchemy.sql.schema import Column

from gisaf.config import conf
#from .models.admin import FileImport
from gisaf.models.admin import FeatureImportData, BasketImportResult
from gisaf.models.raw_survey import RawSurveyModel
from gisaf.models.survey import Surveyor, Accuracy, Equipment, AccuracyEquimentSurveyorMapping
from gisaf.models.tags import Tags
from gisaf.redis_tools import store as redis_store
from gisaf.registry import registry
from gisaf.utils import (
    delete_df, upsert_df,
    SHAPELY_TYPE_TO_MAPBOX_TYPE, DEFAULT_MAPBOX_LAYOUT,
    MAPBOX_COLOR_ATTRIBUTE_NAME, MAPBOX_OPACITY_ATTRIBUTE_NAME,
    DEFAULT_MAPBOX_PAINT, gisTypeSymbolMap
)

logger = logging.getLogger(__name__)

class ImportError(Exception):
    pass

class Importer:
    """
    Base class for all importers.
    The main process is executed by do_import(file)
    Subclasses should define read_file and process_df.
    """
    basket: Any = None # type hint: baskets.Basket

    async def do_import(self, file_record, dry_run=False, **kwargs) -> BasketImportResult:
        """
        Return: a BasketImportResult instance, or a message string,
        or a tuple or a list like (message, details for user feedback).
        """
        df = await self.read_file(file_record)
        model = self.get_model(file_record)
        return await self.import_df(df, model, file_record, dry_run=dry_run, **kwargs)

    async def read_file(self, file_record):
        """
        Subclasses should define the operation for reading the file into a DataFrame.
        """
        raise NotImplementedError('Nothing defined for reading the file')

    async def import_df(self, df, model, file_record, dry_run=False, **kwargs):
        """
        Subclasses should define the operation for importing the DataFrame (save to DB).
        """
        raise NotImplementedError('Nothing defined for importing the file')

    async def get_model(self, file_record):
        """
        Subclasses should define the operation for returning the model for the file.
        """
        raise NotImplementedError('Nothing defined for identifying the model')

    def preprocess(self, df):
        """
        Hook for pre-processing the dataframe read from the file.
        """
        pass


class RawSurveyImporter(Importer):
    """
    Importer for raw survey points
    """
    def get_model(self, file_record):
        return RawSurveyModel

    async def read_file(self, file_record):
        try:
            df = pd.read_csv(file_record.path, header=None)
        except pd.errors.ParserError:
            raise ImportError('File format error (cannot parse)')

        ## Take only the first 5 columns
        if len(df.columns) < 5:
            raise ImportError('File format error (at least 5 columns needed)')
        df = df[range(5)]
        df.dropna(inplace=True)
        df.columns = ['orig_id', 'easting', 'northing', 'elevation', 'category']
        ## Strip extra characters
        df['category'] = df['category'].str.strip()
        df['orig_id'] = df.orig_id.astype(str)

        ## Create a Geodataframe
        gdf = gpd.GeoDataFrame(
            df,
            geometry=gpd.points_from_xy(df.easting, df.northing, df.elevation),
            crs=conf.raw_survey['spatial_sys_ref']
        )
        return gdf

    async def import_df(self, gdf, model, file_record, dry_run=False,
                        remove_misplaced=False, **kwargs):
        user_feedback = OrderedDict()
        user_feedback['Points in file'] = len(gdf)

        ## Date from the file name
        ## TODO: should be added to FileImport (or other metadata)
        fname_search = re.match('^(\S+)-(\d\d\d\d)-(\d\d)-(\d\d).*$', file_record['name'])
        if not fname_search:
            raise ImportError('The file name is not OK '
                              '(format should be: "PPP-DESCRIPTION-YYYY-MM-DD", '
                              'PPP being the project name, DESCRITION is optional and discarded)')
        date = datetime.date(day=int(fname_search.group(4)),
                             month=int(fname_search.group(3)),
                             year=int(fname_search.group(2)))

        ## TODO: Check if date == file_record.date

        accuracies = await AccuracyEquimentSurveyorMapping.get_df(with_related=True)

        ## Cleanup
        accuracies.rename(columns={
            'gisaf_survey_surveyor_id': 'surveyor_id',
            'gisaf_survey_surveyor_name': 'surveyor',
            'gisaf_survey_equipment_id': 'equipment_id',
            'gisaf_survey_equipment_name': 'equipment',
            'gisaf_survey_accuracy_id': 'accuracy_id',
            'gisaf_survey_accuracy_name': 'accuracy_name',
            'gisaf_survey_accuracy_accuracy': 'accuracy',
            'geometry__3': 'type',
        }, inplace=True)
        accuracies.drop(columns=['surveyor__1', 'equipment_2', 'accuracy__4'], inplace=True)
        accuracy = accuracies.loc[
            (accuracies.surveyor==file_record.surveyor)
            & (accuracies.equipment==file_record.equipment)
            & (accuracies.type=='Point')
        ]
        if len(accuracy) == 0:
            raise ImportError(f'No accuracy defined for surveyor {file_record.surveyor} '\
                              f'and equipment {file_record.equipment}')
        elif len(accuracy) > 1:
            raise ImportError(f'More than 1 accuracy defined for surveyor {file_record.surveyor} '\
                              f'and equipment {file_record.equipment}')
        accuracy = accuracy.iloc[0]

        def _apply_point_hash_function(rec):
            return point_hash_function(
                date,
                file_record.project_id,
                file_record.surveyor_id,
                file_record.equipment_id,
                rec.name + 1 ## Index in the file
            )

        gdf['id'] = gdf.apply(_apply_point_hash_function, axis=1)

        ## Add information to all records in the gdf
        gdf['date'] = pd.to_datetime(date)
        gdf['project_id'] = file_record.project_id
        gdf['srvyr_id'] = file_record.surveyor_id
        gdf['equip_id'] = file_record.equipment_id
        gdf['accur_id'] = accuracy.accuracy_id

        df_raw_survey = await model.get_df(
            where=and_(
                model.date==date,
                model.project_id==file_record.project_id,
                model.srvyr_id==file_record.surveyor_id,
                model.equip_id==file_record.equipment_id,
            )
        )
        user_feedback['Existing raw_survey points'] = len(df_raw_survey)

        ## Import to raw_survey_data table
        ## PostGis specific: add SRID
        gdf['geom'] = gdf['geometry'].apply(lambda g: dumps_wkb(g, srid=conf.geo.raw_survey.srid, hex=True))
        if not dry_run:
            await upsert_df(gdf, model)

        ## Keep track of points without known categories for user feedback
        df_unknown_categories = gdf[~gdf.category.isin(registry.categories.index)]
        unknown_categories = df_unknown_categories[['category', 'orig_id']].groupby('category').count()
        for row in unknown_categories.itertuples():
            user_feedback[f'Category {row[0]} (unknown)'] = row[1]

        gdf = gdf.merge(registry.categories, left_on='category', right_index=True)

        ## Import to raw survey models tables
        for category_name, category_gdf in gdf.groupby('category'):
            category = registry.categories.loc[category_name]
            user_feedback[f"Category {category.name} ({category.store_name})"] = len(category_gdf)
            if not dry_run:
                await upsert_df(category_gdf, category.raw_survey_model)

        ## Auto import: import from raw survey models tables to geometry tables
        gdf['geom'] = gdf.to_crs(conf.srid).geometry.apply(lambda g: dumps_wkb(g, srid=conf.srid, hex=True))
        for category_name, category_gdf in gdf.groupby('category'):
            category = registry.categories.loc[category_name]
            if category.auto_import and category.gis_type == 'Point':
                if not dry_run:
                    await upsert_df(category_gdf, category.model)

                    if remove_misplaced:
                        gdf_db = await category.model.get_geo_df()
                        gdf_db.reset_index(inplace=True)
                        ## Finding by id does not work since points might
                        ## have been placed in a different category
                        #matches = gdf_db.loc[gdf_db['id'].isin(category_gdf.id)]
                        matches = gdf_db.loc[
                            gdf_db.orig_id.isin(category_gdf.orig_id)
                            & (gdf_db.srvyr_id == file_record.surveyor_id)
                            & (gdf_db.equip_id == file_record.equipment_id)
                            & (gdf_db.date.dt.date == date)
                        ]
                        matches_merged = matches.merge(
                            category_gdf.to_crs(conf.srid),
                            on='orig_id',
                            suffixes=('', '_new')
                        )
                        misplaced = matches_merged.loc[
                            matches_merged['geometry'] != matches_merged['geometry_new']
                        ]
                        if len(misplaced) > 0:
                            await delete_df(misplaced, category.model)
                        ## Filter matches with orig_id and different geometry
                        pass

        user_feedback['Points imported'] = len(gdf)

        return BasketImportResult(
            message=f"Import successful{' (dry run)' if dry_run else ''}",
            details=user_feedback,
        )


class GeoDataImporter(Importer):
    """
    Base class for geo data importers
    """
    def get_model(self, file_record):
        return registry.geom.get(file_record.store)

    async def read_file(self, file_record):
        ## TODO: guess_type supports pathlib.Path from Python 3.8, so it should be:
        #if guess_type(file_record.path)[0] == 'application/zip':
        ## For Python 3.7 compatibility:
        if guess_type(str(file_record.path.absolute()))[0] == 'application/zip':
            return gpd.read_file(f'zip://{file_record.path}')
        else:
            return gpd.read_file(file_record.path)

    async def get_metadata_from_raw_points(self, gdf, model, file_record):
        pass

    async def import_df(self, gdf_file_input, model, file_record, dry_run=False, live_publish_results=False):
        ## XXX: Removed qml processing (Mapbox)
        #qml_file_ = glob.glob(path.join(tmpdir, '*.qml'))
        #if qml_file_:
        #    process_qml(qml_file_[0], model)

        new_or_updated_items = {}
        now = datetime.datetime.now()
        bad_geometry_types = {}
        user_feedback = OrderedDict()
        to_keep = []

        ## If the model has a project (eg. all survey models, see BaseSurveyModel),
        ## get only the features with the project given in the fileImport object
        if hasattr(model, 'project_id'):
            if np.isnan(file_record.project_id):
                raise ImportError('No project defined for that file')
            query = model.project_id==file_record.project_id
        else:
            query = None

        if file_record.status and isinstance(model.status, Column):
            query = and_(query, model.status==file_record.status)

        ## Explode the input (in case of multi-geometries)
        gdf_file = gdf_file_input.explode(ignore_index=True)

        gdf_db = await model.get_geo_df(query, with_related=False)
        gdf_db.reset_index(inplace=True)

        ## Cleanup input: drop empty geometries and duplicates
        nb_all = len(gdf_file)
        gdf_file.dropna(subset=['geometry'], inplace=True)
        gdf_file = gdf_file[~gdf_file.geometry.is_empty]
        nb_empty_geom = nb_all - len(gdf_file)
        gdf_file.drop_duplicates(inplace=True)
        nb_duplicate = nb_all - nb_empty_geom - len(gdf_file)

        ## Reproject
        gdf_file = gdf_file.copy().to_crs(conf.crs['db'])

        synonyms = getattr(model, 'synonyms', {'ID': 'id'})
        ## Apply synonyms
        for k, v in synonyms.items():
            if k in gdf_file.columns:
                gdf_file[v] = gdf_file[k]

        if 'orig_id' in gdf_file.columns:
            gdf_file.drop(columns='orig_id', inplace=True)

        ## Make sure the types of the columns of gdf_file and gdf_db match
        for col_name, col_type in gdf_db.dtypes.items():
            if col_name in gdf_file.columns:
                try:
                    coerce = gdf_file[col_name].dtypes != col_type
                except TypeError:
                    ## Workaround for strange bug in Pandas: data type not understood
                    coerce = gdf_file[col_name].dtypes.name != col_type.name
                if coerce:
                    gdf_file[col_name] = gdf_file[col_name].astype(col_type)

        ## Add status
        if 'status' not in gdf_file.columns:
            gdf_file['status'] = file_record.status

        ## TODO: define exactly the meaning of the 'id' attribute in shapefiles,
        ## it gets quite confusing.
        ## Drop the id, as it migt conflict with the one in the DB
        ## For now in this import process only, rename eventual 'id' to 'orig_id'
        if 'id' not in gdf_file.columns:
            gdf_file['id'] = None
        gdf_file.rename(columns={'id': 'orig_id'}, inplace=True)
        gdf_file['orig_id'] = gdf_file['orig_id'].astype(str).astype(object)

        ## Get geometries as pygeos for gdf_file and gdf_db
        gdf_file['ewkb'] = gdf_file.geometry.apply(lambda g: dumps_wkb(g, srid=conf.srid, hex=True))
        gdf_db['ewkb'] = gdf_db.geometry.apply(lambda g: dumps_wkb(g, srid=conf.srid, hex=True))

        self.preprocess(gdf_file)

        await self.get_metadata_from_raw_points(gdf_file, model, file_record)

        ## Diffs: using attribute and spatial merges, see https://geopandas.org/mergingdata.html
        ## Attribute diff: identify identical rows,
        ## so wont be touched and don't need spatial join
        df_identical = gdf_db.merge(gdf_file, how='inner')

        ## Need an id
        if 'id' not in gdf_file.columns:
            gdf_file['id'] = gdf_file.index

        gdf_diff = gdf_db.merge(gdf_file, on='ewkb', how='outer',
                                indicator=True, suffixes=('', '__file'))

        to_update = gdf_diff.loc[gdf_diff._merge=='both']
        ## Filter out the identical rows from to_update: update non-geometry attributes,
        ## keeping the id
        to_update = to_update.loc[~to_update.id.isin(df_identical.id)]

        to_delete = gdf_diff.loc[gdf_diff._merge=='left_only']

        ## TODO?: try with geometric operations to determine nearest geometry
        ##        from the file's to the DB's.
        ## See:
        # - _binary_predicate in geopandas.array, which returns True/False,
        #     when it could return the id of the matching geometry
        ## Meanwhile, just wait for sjoin to be implemented:
        # - https://github.com/geopandas/geopandas/pull/1271
        #almost_equals = to_delete.geom_almost_equals(gdf_db.geometry)
        #equals = to_delete.geom_equals(gdf_db.geometry)
        #equals_exact = to_delete.geom_equals_exact(gdf_db.geometry, tolerance=0.1)

        to_insert = gdf_diff.loc[gdf_diff._merge=='right_only']

        ## Take column names from the gdf_file (*__file) and rename to the gdf_db.
        ## Geometry is a bit of an exception.
        to_insert_columns = [col for col in to_insert.columns
                             if col.endswith('__file') and col != 'geometry__file']
        to_insert = to_insert.drop(columns={col[:-6] for col in to_insert_columns})\
            .rename(columns={col: col[:-6] for col in to_insert_columns})\
            .merge(gdf_diff[['orig_id', 'ewkb']], left_index=True, right_index=True)\
            .drop(columns=['id'])

        to_update_columns = [col for col in to_update.columns
                             if col.endswith('__file')
                                and col not in ('geometry__file', 'id__file')]
        to_update = to_update.drop(columns={col[:-6] for col in to_update_columns})\
            .rename(columns={col: col[:-6] for col in to_update_columns})

        to_insert['geometry'] = to_insert['geometry__file']
        to_insert.rename(columns={'orig_id_x': 'orig_id', 'ewkb_x': 'ewkb'}, inplace=True)
        to_insert.drop(columns=['orig_id_y', 'ewkb_y'], inplace=True)

        ## Add project to all records in the new records
        to_insert['project_id'] = file_record.project_id

        ## Write to DB
        if not dry_run:
            for _gdf in [to_insert, to_update]:
                _gdf['geom'] = _gdf.geometry.apply(
                    lambda g: dumps_wkb(g, srid=conf.srid, hex=True)
                )

            if len(to_insert) > 0:
                inserted = await upsert_df(to_insert, model)
                to_insert['id'] = inserted['id']
            await upsert_df(to_update, model)
            await delete_df(to_delete, model)

            ## Add feature_import_data
            feature_import_data = await FeatureImportData.get_df(
                where=FeatureImportData.store==file_record.store
            )
            fid_updated = feature_import_data[['id', 'feature_id']]\
                .merge(to_update, left_on='feature_id', right_on='id')
            fid_updated['file_path'] = str(file_record.path)
            fid_updated['store'] = file_record.store
            fid_updated['origin'] = 'shapefile'  ## TODO: gpkg
            fid_updated['time'] = now
            fid_updated['file_md5'] = file_record.md5
            fid_updated.drop(columns=['orig_id'], inplace=True)
            fid_updated.rename(columns={'orig_id__file': 'orig_id'}, inplace=True)
            ## Write to DB
            await upsert_df(fid_updated, FeatureImportData)
            ## Same for to_insert
            if len(to_insert) > 0:
                fid_inserted = pd.DataFrame(columns=fid_updated.columns)
                fid_inserted['feature_id'] = inserted['id']
                ## FIXME: fid_inserted['orig_id']
                #fid_inserted['orig_id'] = inserted['orig_id']
                fid_inserted['store'] = file_record.store
                fid_inserted['file_path'] = str(file_record.path)
                fid_inserted['origin'] = 'shapefile'  ## TODO: gpkg
                fid_inserted['time'] = now
                fid_inserted['file_md5'] = file_record.md5
                await upsert_df(fid_inserted, FeatureImportData)

            ## Find and update tags
            if len(to_insert) > 0:
                tags = await Tags.get_geo_df(where=Tags.store==file_record.store)
                if len(tags) > 0:
                    matching_tags = gpd.sjoin(to_insert, tags.reset_index())
                    new_ref_ids = tags.merge(matching_tags, left_on='ref_id',
                                            right_on='ref_id')[['ref_id', 'id_left']]
                    new_tags = tags.merge(new_ref_ids, on='ref_id')
                    new_tags.drop(columns='ref_id', inplace=True)
                    new_tags.rename(columns={'id_left': 'ref_id'}, inplace=True)
                    await upsert_df(new_tags, Tags)

        ## Publish on Gisaf live
        gis_type = gdf_file.gis_type.iloc[0]
        mapbox_type = SHAPELY_TYPE_TO_MAPBOX_TYPE.get(gis_type, None)
        mapbox_paint = DEFAULT_MAPBOX_PAINT.get(mapbox_type, {}).copy()
        mapbox_layout = DEFAULT_MAPBOX_LAYOUT.get(mapbox_type, {}).copy()
        symbol = gisTypeSymbolMap.get(gis_type, '\ue02e')

        live_layers = {
            'insert': {
                'data': to_insert,
                'color': 'blue',
                'opacity': 0.3,
            },
            'update': {
                'data': to_update,
                'color': 'green',
                'opacity': 0.3,
            },
            'delete': {
                'data': to_delete,
                'color': 'red',
                'opacity': 0.3,
            },
        }
        if live_publish_results:
            for name, defn in live_layers.items():
                mapbox_paint[MAPBOX_COLOR_ATTRIBUTE_NAME[mapbox_type]] = defn['color']
                mapbox_paint[MAPBOX_OPACITY_ATTRIBUTE_NAME[mapbox_type]] = defn['opacity']
                await redis_store.publish_gdf(
                    f'Last import: {name}',
                    defn['data'],
                    status='*',
                    mapbox_paint=mapbox_paint,
                    mapbox_layout=mapbox_layout,
                    viewable_role='reviewer',
                )

        user_feedback['Total in file'] = nb_all
        if nb_empty_geom > 0:
            user_feedback['Empty single geoms in file'] = nb_empty_geom
        if nb_duplicate > 0:
            user_feedback['Duplicates in file'] = nb_duplicate
        user_feedback['Total in database'] = len(gdf_db)
        user_feedback['Identical (no change)'] = len(df_identical)
        user_feedback['Update attributes'] = len(to_update)
        user_feedback['Insert'] = len(to_insert)
        user_feedback['Delete'] = len(to_delete)
        #summary = len(gdf_db) + len(to_insert) - len(to_delete) - len(gdf_file)
        #user_feedback['Summary (+/-)'] = f'{summary:d}'

        return BasketImportResult(
            time=now,
            message=f"Import successful{' (dry run)' if dry_run else ''}",
            details=user_feedback,
        )


class LineWorkImporter(GeoDataImporter):
    """
    Importer for line work geo data files (from survey)
    """
    async def get_metadata_from_raw_points(self, gdf, model, file_record):
        """
        Find raw points in the shape and set equip_id, srvyr_id, accur_id
        """
        raw_points = await model.raw_model.get_geo_df(where=model.raw_model.project_id==file_record.project_id)
        raw_points.drop(columns='orig_id', inplace=True)
        raw_points.to_crs(conf.crs['db'], inplace=True)

        ## Take a small buffer around the points
        ## due to approximations due to reprojection, inaccurate line work...
        buf_raw_points = raw_points.copy()
        buf_raw_points['geometry'] = raw_points.buffer(0.0000005, resolution=3)

        ## Drop columns which interfer with metadata coming from raw points
        ## In future, the importer might detect and use such metadata in the input file
        gdf.drop(
            columns=set(gdf.columns).intersection(
                {'date', 'accur_id', 'srvyr_id', 'equip_id', 'project_id'}
            ),
            inplace=True
        )

        sjoin = gpd.sjoin(gdf, buf_raw_points, op='intersects', how='left')
        sjoin.drop(columns=['accur_id', 'status_left', 'status_right', 'ewkb'], inplace=True)
        sjoin.rename(columns={'index_right': 'raw_point_id'}, inplace=True)
        sjoin['raw_point_id'] = sjoin['raw_point_id'].astype('Int64')

        ## For surveyor info
        ## Get the last surveyor, equipment (by date)
        ## Sort raw points by date, maintaining the index (raw points)
        cols_surveyor_info = ['srvyr_id', 'equip_id', 'date']
        sjoin.set_index([sjoin.index, 'date'], inplace=True)
        sjoin.sort_index(ascending=False, inplace=True)
        sjoin.reset_index(inplace=True)
        sjoin.set_index('level_0', inplace=True)
        sjoin.index.name = 'feature_id'

        gdf[cols_surveyor_info] = sjoin.groupby(sjoin.index).head(1)[cols_surveyor_info]
        ## Fix issue when all dates are NaT (reset_index converts that column to float,
        ## see https://github.com/pandas-dev/pandas/issues/30517)
        gdf.date.fillna(pd.NaT, inplace=True)

        ## A bit the same for accuracies: take the lowest one of the raw points for each feature
        accuracies = await Accuracy.get_df()
        accuracies.set_index('id', inplace=True)

        accuracy_mappings = await AccuracyEquimentSurveyorMapping.get_df(
            where=AccuracyEquimentSurveyorMapping.geometry_type == 'Line_work'
        )
        accuracy_mappings.drop(columns='geometry_type', inplace=True)

        sjoin = sjoin.reset_index().merge(
            accuracy_mappings,
            left_on=['srvyr_id', 'equip_id'],
            right_on=['surveyor_id', 'equipment_id']
        )
        sjoin = sjoin.merge(accuracies[['accuracy']], left_on='accuracy_id', right_index=True)

        sjoin.set_index(['feature_id', 'accuracy'], inplace=True)
        sjoin.sort_index(ascending=False, inplace=True)
        sjoin.reset_index(inplace=True)
        sjoin.set_index('feature_id', inplace=True)

        gdf['accur_id'] = sjoin.groupby(sjoin.index).head(1)['accuracy_id']


hash_max = 2**32
hash_date_min = datetime.date(year=1900, month=1, day=1).toordinal()

def point_hash_function(date, project_id, surveyor_id, equipment_id, i):
    """
    A function which takes arguments, and generates a "unique", positive
    integer hash, reasonably speaking.
    We could just use hash() %% 2*32, that would be quite reasonable, but hard to reverse.
    A simply resersable hash, would be ideally with a number generated
    with human readability in mind, like:
    return int('{:02d}{:03d}{:03d}{:01d}{:d}'.format(
        project_id % hash_project_max,
        date.toordinal() - hash_date_min,
        surveyor_id % hash_surveyor_max,
        equipment_id % hash_equipment_max,
        i
    )) % 2**32
    But i don't think it's reliable enough and would be super confusing actually as soon as
    one of the parameters overflows its limits.

    Using https://stackoverflow.com/questions/4273466/reversible-hash-function, comment 7,
    try a reversable function with some mathematical base.

    Choose 2 big primes from https://www.bigprimes.net/archive/prime/14000000/ :
    32416190039 and 32416190071

    num = '{:03d}{:05d}{:03d}{:02d}{:05d}'.format(
        project_id % hash_project_max,
        date.toordinal() - hash_date_min,
        surveyor_id % hash_surveyor_max,
        equipment_id % hash_equipment_max,
        i
    )

    The reverse hash function, for the record, if we ever use a reversable hash based on coprimes,
    would look like, maybe:
    return (h_ * 32416190071) % 2**32
    # Then we would return the inverse of the format function in point_hash_function, easy ;)

    No good, so hash() and no smart reverse: have to use brute force to reverse.
    """
    h_ = hash((
        project_id,
        date.toordinal() - hash_date_min,
        surveyor_id,
        equipment_id,
        i
    ))
    return h_ % hash_max