forked from platypush/platypush
Big rewrite/refactor of the entities merger
This commit is contained in:
parent
2411b961e8
commit
878fe91155
6 changed files with 193 additions and 126 deletions
|
@ -5,6 +5,7 @@ from typing import Collection, Optional
|
||||||
|
|
||||||
from ._base import (
|
from ._base import (
|
||||||
Entity,
|
Entity,
|
||||||
|
EntityKey,
|
||||||
EntitySavedCallback,
|
EntitySavedCallback,
|
||||||
get_entities_registry,
|
get_entities_registry,
|
||||||
init_entities_db,
|
init_entities_db,
|
||||||
|
@ -80,6 +81,7 @@ __all__ = (
|
||||||
'DimmerEntityManager',
|
'DimmerEntityManager',
|
||||||
'EntitiesEngine',
|
'EntitiesEngine',
|
||||||
'Entity',
|
'Entity',
|
||||||
|
'EntityKey',
|
||||||
'EntityManager',
|
'EntityManager',
|
||||||
'EntitySavedCallback',
|
'EntitySavedCallback',
|
||||||
'EnumSwitchEntityManager',
|
'EnumSwitchEntityManager',
|
||||||
|
|
|
@ -27,6 +27,11 @@ from platypush.message import JSONAble
|
||||||
EntityRegistryType = Dict[str, Type['Entity']]
|
EntityRegistryType = Dict[str, Type['Entity']]
|
||||||
entities_registry: EntityRegistryType = {}
|
entities_registry: EntityRegistryType = {}
|
||||||
|
|
||||||
|
EntityKey = Tuple[str, str]
|
||||||
|
""" The entity's logical key, as an ``<external_id, plugin>`` tuple. """
|
||||||
|
EntityMapping = Dict[EntityKey, 'Entity']
|
||||||
|
""" Internal mapping for entities used for deduplication/merge/upsert. """
|
||||||
|
|
||||||
_import_error_ignored_modules: Final[Set[str]] = {'bluetooth'}
|
_import_error_ignored_modules: Final[Set[str]] = {'bluetooth'}
|
||||||
"""
|
"""
|
||||||
ImportError exceptions will be ignored for these entity submodules when
|
ImportError exceptions will be ignored for these entity submodules when
|
||||||
|
@ -110,7 +115,7 @@ if 'entity' not in Base.metadata:
|
||||||
return tuple(inspector.mapper.column_attrs)
|
return tuple(inspector.mapper.column_attrs)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def entity_key(self) -> Tuple[str, str]:
|
def entity_key(self) -> EntityKey:
|
||||||
"""
|
"""
|
||||||
This method returns the "external" key of an entity.
|
This method returns the "external" key of an entity.
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -1,13 +1,13 @@
|
||||||
from logging import getLogger
|
from logging import getLogger
|
||||||
from threading import Thread, Event
|
from threading import Thread, Event
|
||||||
from typing import Dict, Optional, Tuple
|
from typing import Dict, Optional
|
||||||
|
|
||||||
from platypush.context import get_bus
|
from platypush.context import get_bus
|
||||||
from platypush.entities import Entity
|
from platypush.entities import Entity
|
||||||
from platypush.message.event.entities import EntityUpdateEvent
|
from platypush.message.event.entities import EntityUpdateEvent
|
||||||
from platypush.utils import set_thread_name
|
from platypush.utils import set_thread_name
|
||||||
|
|
||||||
from platypush.entities._base import EntitySavedCallback
|
from platypush.entities._base import EntityKey, EntitySavedCallback
|
||||||
from platypush.entities._engine.queue import EntitiesQueue
|
from platypush.entities._engine.queue import EntitiesQueue
|
||||||
from platypush.entities._engine.repo import EntitiesRepository
|
from platypush.entities._engine.repo import EntitiesRepository
|
||||||
|
|
||||||
|
@ -46,7 +46,7 @@ class EntitiesEngine(Thread):
|
||||||
""" Queue where all entity upsert requests are received."""
|
""" Queue where all entity upsert requests are received."""
|
||||||
self._repo = EntitiesRepository()
|
self._repo = EntitiesRepository()
|
||||||
""" The repository of the processed entities. """
|
""" The repository of the processed entities. """
|
||||||
self._callbacks: Dict[Tuple[str, str], EntitySavedCallback] = {}
|
self._callbacks: Dict[EntityKey, EntitySavedCallback] = {}
|
||||||
""" (external_id, plugin) -> callback mapping"""
|
""" (external_id, plugin) -> callback mapping"""
|
||||||
|
|
||||||
def post(self, *entities: Entity, callback: Optional[EntitySavedCallback] = None):
|
def post(self, *entities: Entity, callback: Optional[EntitySavedCallback] = None):
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
import logging
|
import logging
|
||||||
from typing import Dict, Iterable, Tuple
|
from typing import Dict, Iterable, Optional, Tuple
|
||||||
|
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
from platypush.entities import Entity
|
from platypush.entities._base import Entity, EntityMapping
|
||||||
|
|
||||||
# pylint: disable=no-name-in-module
|
# pylint: disable=no-name-in-module
|
||||||
from platypush.entities._engine.repo.db import EntitiesDb
|
from platypush.entities._engine.repo.db import EntitiesDb
|
||||||
|
@ -20,7 +20,7 @@ class EntitiesRepository:
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self._db = EntitiesDb()
|
self._db = EntitiesDb()
|
||||||
self._merger = EntitiesMerger(self)
|
self._merge = EntitiesMerger()
|
||||||
|
|
||||||
def get(
|
def get(
|
||||||
self, session: Session, entities: Iterable[Entity]
|
self, session: Session, entities: Iterable[Entity]
|
||||||
|
@ -43,7 +43,63 @@ class EntitiesRepository:
|
||||||
autocommit=False,
|
autocommit=False,
|
||||||
expire_on_commit=False,
|
expire_on_commit=False,
|
||||||
) as session:
|
) as session:
|
||||||
merged_entities = self._merger.merge(session, entities)
|
merged_entities = self._merge(
|
||||||
|
session,
|
||||||
|
entities,
|
||||||
|
existing_entities=self._fetch_all_and_flatten(session, entities),
|
||||||
|
)
|
||||||
|
|
||||||
merged_entities = self._db.upsert(session, merged_entities)
|
merged_entities = self._db.upsert(session, merged_entities)
|
||||||
|
|
||||||
return merged_entities
|
return merged_entities
|
||||||
|
|
||||||
|
def _fetch_all_and_flatten(
|
||||||
|
self,
|
||||||
|
session: Session,
|
||||||
|
entities: Iterable[Entity],
|
||||||
|
) -> EntityMapping:
|
||||||
|
"""
|
||||||
|
Given a collection of entities, retrieves their persisted instances
|
||||||
|
(lookup is performed by ``entity_key``), and it also recursively
|
||||||
|
expands their relationships, so the session is updated with the latest
|
||||||
|
persisted versions of all the objects in the hierarchy.
|
||||||
|
|
||||||
|
:return: An ``entity_key -> entity`` mapping.
|
||||||
|
"""
|
||||||
|
expanded_entities = {}
|
||||||
|
for entity in entities:
|
||||||
|
root_entity = self._get_root_entity(session, entity)
|
||||||
|
expanded_entities.update(self._expand_children([root_entity]))
|
||||||
|
expanded_entities.update(self._expand_children([entity]))
|
||||||
|
|
||||||
|
return self.get(session, expanded_entities.values())
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _expand_children(
|
||||||
|
cls,
|
||||||
|
entities: Iterable[Entity],
|
||||||
|
all_entities: Optional[EntityMapping] = None,
|
||||||
|
) -> EntityMapping:
|
||||||
|
"""
|
||||||
|
Recursively expands and flattens all the children of a set of entities
|
||||||
|
into an ``entity_key -> entity`` mapping.
|
||||||
|
"""
|
||||||
|
all_entities = all_entities or {}
|
||||||
|
for entity in entities:
|
||||||
|
all_entities[entity.entity_key] = entity
|
||||||
|
cls._expand_children(entity.children, all_entities)
|
||||||
|
|
||||||
|
return all_entities
|
||||||
|
|
||||||
|
def _get_root_entity(self, session: Session, entity: Entity) -> Entity:
|
||||||
|
"""
|
||||||
|
Retrieve the root entity (i.e. the one with a null parent) of an
|
||||||
|
entity.
|
||||||
|
"""
|
||||||
|
parent = entity
|
||||||
|
while parent:
|
||||||
|
parent = self._merge.get_parent(session, entity)
|
||||||
|
if parent:
|
||||||
|
entity = parent
|
||||||
|
|
||||||
|
return entity
|
||||||
|
|
|
@ -6,7 +6,7 @@ from sqlalchemy import and_, or_
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
from platypush.context import get_plugin
|
from platypush.context import get_plugin
|
||||||
from platypush.entities import Entity
|
from platypush.entities._base import Entity
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|
|
@ -1,34 +1,30 @@
|
||||||
from typing import Dict, Iterable, List, Optional, Tuple
|
from typing import Iterable, List, Optional
|
||||||
|
|
||||||
from sqlalchemy.orm import Session, exc
|
from sqlalchemy.orm import Session, exc
|
||||||
|
|
||||||
from platypush.entities import Entity
|
from platypush.entities._base import Entity, EntityMapping
|
||||||
|
|
||||||
|
|
||||||
# pylint: disable=too-few-public-methods
|
# pylint: disable=too-few-public-methods
|
||||||
class EntitiesMerger:
|
class EntitiesMerger:
|
||||||
"""
|
"""
|
||||||
This object is in charge of detecting and merging entities that already
|
A stateless functor in charge of detecting and merging entities that
|
||||||
exist on the database before flushing the session.
|
already exist on the database before flushing the session.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, repository):
|
def __call__(
|
||||||
from . import EntitiesRepository
|
|
||||||
|
|
||||||
self._repo: EntitiesRepository = repository
|
|
||||||
|
|
||||||
def merge(
|
|
||||||
self,
|
self,
|
||||||
session: Session,
|
session: Session,
|
||||||
entities: Iterable[Entity],
|
entities: Iterable[Entity],
|
||||||
|
existing_entities: Optional[EntityMapping] = None,
|
||||||
) -> List[Entity]:
|
) -> List[Entity]:
|
||||||
"""
|
"""
|
||||||
Merge a set of entities with their existing representations and update
|
Merge a set of entities with their existing representations and update
|
||||||
the parent/child relationships and return a tuple with
|
the parent/child relationships and return a list containing
|
||||||
``[new_entities, updated_entities]``.
|
``[*updated_entities, *new_entities]``.
|
||||||
"""
|
"""
|
||||||
new_entities: Dict[Tuple[str, str], Entity] = {}
|
existing_entities = existing_entities or {}
|
||||||
existing_entities: Dict[Tuple[str, str], Entity] = {}
|
new_entities: EntityMapping = {}
|
||||||
|
|
||||||
self._merge(
|
self._merge(
|
||||||
session,
|
session,
|
||||||
|
@ -37,156 +33,164 @@ class EntitiesMerger:
|
||||||
existing_entities=existing_entities,
|
existing_entities=existing_entities,
|
||||||
)
|
)
|
||||||
|
|
||||||
return [*existing_entities.values(), *new_entities.values()]
|
return list({**existing_entities, **new_entities}.values())
|
||||||
|
|
||||||
def _merge(
|
def _merge(
|
||||||
self,
|
self,
|
||||||
session: Session,
|
session: Session,
|
||||||
entities: Iterable[Entity],
|
entities: Iterable[Entity],
|
||||||
new_entities: Dict[Tuple[str, str], Entity],
|
new_entities: EntityMapping,
|
||||||
existing_entities: Dict[Tuple[str, str], Entity],
|
existing_entities: EntityMapping,
|
||||||
) -> List[Entity]:
|
) -> List[Entity]:
|
||||||
"""
|
"""
|
||||||
(Recursive) inner implementation of the entity merge logic.
|
(Recursive) inner implementation of the entity merge logic.
|
||||||
"""
|
"""
|
||||||
processed_entities = []
|
processed_entities = []
|
||||||
existing_entities.update(self._repo.get(session, entities))
|
|
||||||
|
|
||||||
# Make sure that we have no duplicate entity keys in the current batch
|
|
||||||
entities = list(
|
|
||||||
{
|
|
||||||
**({e.entity_key: e for e in entities}),
|
|
||||||
**(
|
|
||||||
{
|
|
||||||
e.entity_key: e
|
|
||||||
for e in {str(ee.id): ee for ee in entities if ee.id}.values()
|
|
||||||
}
|
|
||||||
),
|
|
||||||
}.values()
|
|
||||||
)
|
|
||||||
|
|
||||||
# Retrieve existing records and merge them
|
# Retrieve existing records and merge them
|
||||||
for entity in entities:
|
for entity in entities:
|
||||||
key = entity.entity_key
|
key = entity.entity_key
|
||||||
existing_entity = existing_entities.get(key, new_entities.get(key))
|
existing_entity = existing_entities.get(key, new_entities.get(key))
|
||||||
parent_id, parent = self._update_parent(session, entity, new_entities)
|
|
||||||
|
# Synchronize the parent(s)
|
||||||
|
entity = self._sync_parent(session, entity, new_entities, existing_entities)
|
||||||
|
|
||||||
if existing_entity:
|
if existing_entity:
|
||||||
# Update the parent
|
# Merge the columns with those of the existing entity
|
||||||
if not parent_id and parent:
|
existing_entity = self._merge_columns(entity, existing_entity)
|
||||||
existing_entity.parent = parent
|
|
||||||
else:
|
|
||||||
existing_entity.parent_id = parent_id
|
|
||||||
|
|
||||||
# Merge the other columns
|
|
||||||
self._merge_columns(entity, existing_entity)
|
|
||||||
# Merge the children
|
# Merge the children
|
||||||
self._merge(session, entity.children, new_entities, existing_entities)
|
self._append_children(
|
||||||
# Use the updated version of the existing entity.
|
existing_entity,
|
||||||
|
*self._merge(
|
||||||
|
session,
|
||||||
|
entity.children,
|
||||||
|
new_entities,
|
||||||
|
existing_entities,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Use the existing entity now that it's been merged
|
||||||
entity = existing_entity
|
entity = existing_entity
|
||||||
else:
|
else:
|
||||||
# Add it to the map of new entities if the entity doesn't exist
|
# Add it to the map of new entities if the entity doesn't exist on the db
|
||||||
# on the repo
|
|
||||||
new_entities[key] = entity
|
new_entities[key] = entity
|
||||||
|
|
||||||
processed_entities.append(entity)
|
processed_entities.append(entity)
|
||||||
|
|
||||||
return processed_entities
|
return processed_entities
|
||||||
|
|
||||||
def _update_parent(
|
@classmethod
|
||||||
self,
|
def _sync_parent(
|
||||||
|
cls,
|
||||||
session: Session,
|
session: Session,
|
||||||
entity: Entity,
|
entity: Entity,
|
||||||
new_entities: Dict[Tuple[str, str], Entity],
|
new_entities: EntityMapping,
|
||||||
) -> Tuple[Optional[int], Optional[Entity]]:
|
existing_entities: EntityMapping,
|
||||||
|
) -> Entity:
|
||||||
"""
|
"""
|
||||||
Recursively update the hierarchy of an entity, moving upwards towards
|
Recursively refresh the parent of an entity all the way up in the
|
||||||
the parent.
|
hierarchy, to make sure that all the parent/child relations are
|
||||||
|
appropriately rewired and that all the relevant objects are added to
|
||||||
|
this session.
|
||||||
"""
|
"""
|
||||||
parent_id: Optional[int] = entity.parent_id
|
parent = cls.get_parent(session, entity)
|
||||||
try:
|
if not parent:
|
||||||
parent: Optional[Entity] = entity.parent
|
# No parent -> we can terminate the recursive climbing
|
||||||
except exc.DetachedInstanceError:
|
return entity
|
||||||
# Dirty fix for `Parent instance <...> is not bound to a Session;
|
|
||||||
# lazy load operation of attribute 'parent' cannot proceed
|
|
||||||
parent = session.query(Entity).get(parent_id) if parent_id else None
|
|
||||||
|
|
||||||
# If the entity has a parent with an ID, use that
|
# Check if an entity with the same key as the reported parent already
|
||||||
if parent and parent.id:
|
# exists in the cached entities
|
||||||
parent_id = parent_id or parent.id
|
existing_parent = existing_entities.get(
|
||||||
|
parent.entity_key, new_entities.get(parent.entity_key)
|
||||||
|
)
|
||||||
|
|
||||||
# If there's no parent_id but there is a parent object, try to fetch
|
if not existing_parent:
|
||||||
# its stored version
|
# No existing parent -> we need to flush the one reported by this
|
||||||
if not parent_id and parent:
|
# entity
|
||||||
batch = list(self._repo.get(session, [parent]).values())
|
return entity
|
||||||
|
|
||||||
# If the parent is already stored, use its ID
|
# Check if the existing parent already has a child with the same key as
|
||||||
if batch:
|
# this entity
|
||||||
parent = batch[0]
|
existing_entity = next(
|
||||||
parent_id = parent.id
|
iter(
|
||||||
|
child
|
||||||
|
for child in existing_parent.children
|
||||||
|
if child.entity_key == entity.entity_key
|
||||||
|
),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
|
||||||
# Otherwise, check if its key is already among those awaiting flush
|
if not existing_entity:
|
||||||
# and reuse the same objects (prevents SQLAlchemy from generating
|
# If this entity isn't currently a member of the existing parent,
|
||||||
# duplicate inserts)
|
# temporarily reset the parent of the current entity, so we won't
|
||||||
else:
|
# carry stale objects around. We will soon rewire it to the
|
||||||
temp_entity = new_entities.get(parent.entity_key)
|
# existing parent.
|
||||||
if temp_entity:
|
|
||||||
self._remove_duplicate_children(entity, temp_entity)
|
|
||||||
parent = entity.parent = temp_entity
|
|
||||||
else:
|
|
||||||
new_entities[parent.entity_key] = parent
|
|
||||||
|
|
||||||
# Recursively apply any changes up in the hierarchy
|
|
||||||
self._update_parent(session, parent, new_entities=new_entities)
|
|
||||||
|
|
||||||
# If we found a parent_id, populate it on the entity (and remove the
|
|
||||||
# supporting relationship object so SQLAlchemy doesn't go nuts when
|
|
||||||
# flushing)
|
|
||||||
if parent_id:
|
|
||||||
entity.parent = None
|
entity.parent = None
|
||||||
entity.parent_id = parent_id
|
else:
|
||||||
|
# Otherwise, merge the columns of the existing entity with those of
|
||||||
|
# the new entity and use the existing entity
|
||||||
|
entity = cls._merge_columns(entity, existing_entity)
|
||||||
|
|
||||||
return parent_id, parent
|
# Refresh the existing collection of children with the new/updated
|
||||||
|
# entity
|
||||||
|
cls._append_children(existing_parent, entity)
|
||||||
|
|
||||||
|
# Recursively call this function to synchronize any parent entities up
|
||||||
|
# in the taxonomy
|
||||||
|
cls._sync_parent(session, existing_parent, new_entities, existing_entities)
|
||||||
|
return entity
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _remove_duplicate_children(entity: Entity, parent: Optional[Entity] = None):
|
def get_parent(session: Session, entity: Entity) -> Optional[Entity]:
|
||||||
if not parent:
|
"""
|
||||||
return
|
Gets the parent of an entity, and it fetches if it's not available in
|
||||||
|
the current session.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
return entity.parent
|
||||||
|
except exc.DetachedInstanceError:
|
||||||
|
# Dirty fix for `Parent instance <...> is not bound to a Session;
|
||||||
|
# lazy load operation of attribute 'parent' cannot proceed`
|
||||||
|
return (
|
||||||
|
session.query(Entity).get(entity.parent_id)
|
||||||
|
if entity.parent_id
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
|
||||||
# Make sure that an entity has no duplicate entity IDs among its
|
@staticmethod
|
||||||
# children
|
def _append_children(entity: Entity, *children: Entity):
|
||||||
existing_child_index_by_id = None
|
"""
|
||||||
|
Update the list of children of a given entity with the given list of
|
||||||
|
entities.
|
||||||
|
|
||||||
|
Note that, in case of ``entity_key`` conflict (the key of a new entity
|
||||||
|
already exists in the entity's children), the most recent version will
|
||||||
|
be used, so any column merge logic needs to happen before this method
|
||||||
|
is called.
|
||||||
|
"""
|
||||||
|
entity.children = list(
|
||||||
|
{
|
||||||
|
**{e.entity_key: e for e in entity.children},
|
||||||
|
**{e.entity_key: e for e in children},
|
||||||
|
}.values()
|
||||||
|
)
|
||||||
|
|
||||||
|
for child in children:
|
||||||
|
child.parent = entity
|
||||||
if entity.id:
|
if entity.id:
|
||||||
try:
|
child.parent_id = entity.id
|
||||||
existing_child_index_by_id = [e.id for e in parent.children].index(
|
|
||||||
entity.id
|
|
||||||
)
|
|
||||||
parent.children.pop(existing_child_index_by_id)
|
|
||||||
except ValueError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Make sure that an entity has no duplicate entity keys among its
|
@staticmethod
|
||||||
# children
|
def _merge_columns(entity: Entity, existing_entity: Entity) -> Entity:
|
||||||
existing_child_index_by_key = None
|
|
||||||
try:
|
|
||||||
existing_child_index_by_key = [e.entity_key for e in parent.children].index(
|
|
||||||
entity.entity_key
|
|
||||||
)
|
|
||||||
parent.children.pop(existing_child_index_by_key)
|
|
||||||
except ValueError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def _merge_columns(cls, entity: Entity, existing_entity: Entity) -> Entity:
|
|
||||||
"""
|
"""
|
||||||
Merge two versions of an entity column by column.
|
Merge two versions of an entity column by column.
|
||||||
"""
|
"""
|
||||||
columns = [col.key for col in entity.columns]
|
columns = [col.key for col in entity.columns]
|
||||||
for col in columns:
|
for col in columns:
|
||||||
if col == 'meta':
|
if col == 'meta':
|
||||||
existing_entity.meta = {
|
existing_entity.meta = { # type: ignore
|
||||||
**(existing_entity.meta or {}),
|
**(existing_entity.meta or {}), # type: ignore
|
||||||
**(entity.meta or {}),
|
**(entity.meta or {}), # type: ignore
|
||||||
}
|
}
|
||||||
elif col not in ('id', 'created_at'):
|
elif col not in ('id', 'created_at'):
|
||||||
setattr(existing_entity, col, getattr(entity, col))
|
setattr(existing_entity, col, getattr(entity, col))
|
||||||
|
|
Loading…
Reference in a new issue