Don't use the entities cache when upserting entities.

This may make things a bit less optimal, but it's probably the only
possible solution that preserves my sanity.

Managing upserts of cached instances that were previously made transient
and expunged from the session is far from easy, and the management of
recursive parent/children relationships only add one more layer of
complexity (and that management is already complex enough in its current
implementation).
This commit is contained in:
Fabio Manganiello 2022-12-18 15:13:21 +01:00
parent b0464219d3
commit 5763c5e0ba
Signed by: blacklight
GPG key ID: D90FBA7F76362774
2 changed files with 25 additions and 25 deletions

View file

@ -37,40 +37,40 @@ class EntitiesRepository:
logger.info('Entities cache initialized') logger.info('Entities cache initialized')
def get( def get(
self, session: Session, entities: Iterable[Entity] self, session: Session, entities: Iterable[Entity], use_cache=True
) -> Dict[Tuple[str, str], Entity]: ) -> Dict[Tuple[str, str], Entity]:
""" """
Given a set of entity objects, it returns those that already exist Given a set of entity objects, it returns those that already exist
(or have the same ``entity_key``). It looks up both the cache and the (or have the same ``entity_key``). It looks up both the cache and the
database. database.
""" """
entities_map: Dict[Tuple[str, str], Entity] = {
e.entity_key: e for e in entities
}
# Fetch the entities that exist in the cache
existing_entities = {} existing_entities = {}
# TODO UNCOMMENT THIS CODE TO ACTUALLY USE THE CACHE! if not use_cache:
# existing_entities = { existing_entities = self._db.fetch(session, entities)
# key: self._entities_cache.by_external_id_and_plugin[key] self._cache.update(*existing_entities.values())
# for key in entities_map.keys() else:
# if key in self._entities_cache.by_external_id_and_plugin # Fetch the entities that exist in the cache
# } existing_entities = {
e.entity_key: self._cache.get(e) for e in entities if self._cache.get(e)
}
# Retrieve from the database the entities that miss from the cache # Retrieve from the database the entities that miss from the cache
cache_miss_entities = { cache_miss_entities = {
key: e for key, e in entities_map.items() if key not in existing_entities e.entity_key: e
} for e in entities
if e.entity_key not in existing_entities
}
cache_miss_existing_entities = self._db.fetch( cache_miss_existing_entities = self._db.fetch(
session, cache_miss_entities.values() session, cache_miss_entities.values()
) )
# Update the cache # Update the cache
self._cache.update(*cache_miss_existing_entities.values()) self._cache.update(*cache_miss_existing_entities.values())
# Return the union of the cached + retrieved entities
existing_entities.update(cache_miss_existing_entities)
# Return the union of the cached + retrieved entities
existing_entities.update(cache_miss_existing_entities)
return existing_entities return existing_entities
def save(self, *entities: Entity) -> Iterable[Entity]: def save(self, *entities: Entity) -> Iterable[Entity]:

View file

@ -49,7 +49,7 @@ class EntitiesMerger:
(Recursive) inner implementation of the entity merge logic. (Recursive) inner implementation of the entity merge logic.
""" """
processed_entities = [] processed_entities = []
existing_entities.update(self._repo.get(session, entities)) existing_entities.update(self._repo.get(session, entities, use_cache=False))
# Retrieve existing records and merge them # Retrieve existing records and merge them
for entity in entities: for entity in entities:
@ -96,7 +96,7 @@ class EntitiesMerger:
# If there's no parent_id but there is a parent object, try to fetch # If there's no parent_id but there is a parent object, try to fetch
# its stored version # its stored version
if not parent_id and parent: if not parent_id and parent:
batch = list(self._repo.get(session, [parent]).values()) batch = list(self._repo.get(session, [parent], use_cache=False).values())
# If the parent is already stored, use its ID # If the parent is already stored, use its ID
if batch: if batch: