J'ai un DAG dans ma base de données relationnelle (Firebird) avec deux tables edge
et node
(modèle de liste d'adjacence). Je veux les interroger récursivement, mais trouvé des requêtes récursives très inefficaces. J'ai donc essayé de mettre en place des déclencheurs pour maintenir la fermeture transitive suite au Dong et.al. papier http://homepages.inf.ed.ac.uk/libkin/papers/tc-sql.pdf.Comment maintenir efficacement une table de fermeture transitive?
SELECT
s sont maintenant très rapides, mais DELETE
s sont extrêmement lents, car presque tout le graphe est copié pour une seule suppression. Pire encore, les mises à jour simultanées semblent impossibles.
Y a-t-il une meilleure façon de l'implémenter?
Modifier
J'ai fait quelques expériences et introduit un compteur de référence à la table de TC. Avec cela, les suppressions sont rapides. J'ai écrit quelques cas de test simples, mais je ne suis pas sûr si je vais bien. C'est ce que j'ai jusqu'à présent:
CREATE GENERATOR graph_tc_seq;
CREATE TABLE EDGE (
parent DECIMAL(10, 0) NOT NULL,
child DECIMAL(10, 0) NOT NULL,
PRIMARY KEY (parent, child)
);
CREATE TABLE GRAPH_TC (
parent DECIMAL(10, 0) NOT NULL,
child DECIMAL(10, 0) NOT NULL,
refcount DECIMAL(9, 0),
PRIMARY KEY (parent, child)
);
CREATE TABLE GRAPH_TC_TEMP (
session_id DECIMAL(9, 0),
parent DECIMAL(10, 0),
child DECIMAL(10, 0)
);
CREATE PROCEDURE GRAPH_TC_CREATE (p_parent DECIMAL(10, 0), c_child DECIMAL(10, 0))
AS
declare variable tp_parent DECIMAL(10,0);
declare variable tc_child DECIMAL(10,0);
declare variable session_id DECIMAL(9,0);
declare variable refs DECIMAL(9,0);
begin
session_id = gen_id(graph_tc_seq,1);
insert into graph_tc_temp (parent, child, session_id, refcount) values (:p_parent, :p_parent, :session_id, 1);
insert into graph_tc_temp (parent, child, session_id, refcount) values (:c_child, :c_child, :session_id, 1);
insert into graph_tc_temp (parent, child, session_id, refcount) values (:p_parent, :c_child, :session_id, 1);
insert into graph_tc_temp (parent, child, session_id, refcount) select distinct :p_parent, child, :session_id, refcount from graph_tc where parent = :c_child and not parent = child;
insert into graph_tc_temp (child, parent, session_id, refcount) select distinct :c_child, parent, :session_id, refcount from graph_tc where child = :p_parent and not parent = child;
insert into graph_tc_temp (parent, child, session_id, refcount) select distinct a.parent, b.child, :session_id, a.refcount*b.refcount from graph_tc a, graph_tc b where a.child = :p_parent and b.parent = :c_child and not a.parent = a.child and not b.parent = b.child;
for select parent, child, refcount from graph_tc_temp e where session_id= :session_id and exists (select * from graph_tc t where t.parent = e.parent and t.child = e.child) into :tp_parent, :tc_child, :refs do begin
update graph_tc set refcount=refcount+ :refs where parent = :tp_parent and child = :tc_child;
end
insert into graph_tc (parent, child, refcount) select parent, child, refcount from graph_tc_temp e where session_id = :session_id and not exists (select * from graph_tc t where t.parent = e.parent and t.child = e.child);
delete from graph_tc_temp where session_id = :session_id;
end^
CREATE PROCEDURE GRAPH_TC_DELETE (p_parent DECIMAL(10, 0), c_child DECIMAL(10, 0))
AS
declare variable tp_parent DECIMAL(10,0);
declare variable tc_child DECIMAL(10,0);
declare variable refs DECIMAL(9,0);
begin
delete from graph_tc where parent = :p_parent and child = :p_parent and refcount <= 1;
update graph_tc set refcount = refcount - 1 where parent = :p_parent and child = :p_parent and refcount > 1;
delete from graph_tc where parent = :c_child and child = :c_child and refcount <= 1;
update graph_tc set refcount = refcount - 1 where parent = :c_child and child = :c_child and refcount > 1;
delete from graph_tc where parent = :p_parent and child = :c_child and refcount <= 1;
update graph_tc set refcount = refcount - 1 where parent = :p_parent and child = :c_child and refcount > 1;
for select distinct :p_parent, b.child, refcount from graph_tc b where b.parent = :c_child and not b.parent = b.child into :tp_parent, :tc_child, :refs do begin
delete from graph_tc where parent = :tp_parent and child = :tc_child and refcount <= :refs;
update graph_tc set refcount = refcount - :refs where parent = :tp_parent and child = :tc_child and refcount > :refs;
end
for select distinct :c_child, b.parent, refcount from graph_tc b where b.child = :p_parent and not b.parent = b.child into :tc_child, :tp_parent, :refs do begin
delete from graph_tc where child = :tc_child and parent = :tp_parent and refcount <= :refs;
update graph_tc set refcount = refcount - :refs where child = :tc_child and parent = :tp_parent and refcount > :refs;
end
for select distinct a.parent, b.child, a.refcount*b.refcount from graph_tc a, graph_tc b where not a.parent = a.child and not b.parent = b.child and a.child = :p_parent and b.parent = :c_child into :tp_parent, :tc_child, :refs do begin
delete from graph_tc where parent = :tp_parent and child = :tc_child and refcount <= :refs;
update graph_tc set refcount = refcount - :refs where parent = :tp_parent and child = :tc_child and refcount > :refs;
end
end^
CREATE TRIGGER GRAPH_TC_AFTER_INSERT FOR EDGE AFTER INSERT as
begin
execute procedure graph_tc_create(new.parent,new.child);
end^
CREATE TRIGGER GRAPH_TC_AFTER_UPDATE FOR EDGE AFTER UPDATE as
begin
if ((new.parent <> old.parent) or (new.child <> old.child)) then begin
execute procedure graph_tc_delete(old.parent,old.child);
execute procedure graph_tc_create(new.parent,new.child);
end
end^
CREATE TRIGGER GRAPH_TC_AFTER_DELETE FOR EDGE AFTER DELETE as
begin
execute procedure graph_tc_delete(old.parent,old.child);
end^
Ceci est ma propre idée, mais je pense que d'autres ont déjà implémenté un TC. Font-ils la même chose?
J'ai quelques cas de test, mais je ne suis pas sûr si je pourrais obtenir une incohérence avec des graphes plus gros. En ce qui concerne la simultanéité, je pense que cette approche échouera lorsque deux transactions simultanées veulent mettre à jour le graphique, n'est-ce pas?
Modifier
J'ai trouvé quelques bugs dans mon code, et je voudrais partager la version fixe avec vous. J'ai trouvé un bon article: http://www.codeproject.com/Articles/22824/A-Model-to-Represent-Directed-Acyclic-Graphs-DAG-o. Y a-t-il des articles ou des articles scientifiques plus intéressants, avec des approches différentes?
Pouvez-vous inclure (les parties pertinentes de) les définitions DDL et de déclenchement? –
@MarkRotteveel voir mon édition –
Envisagez d'utiliser un [GTT (table temporaire globale)] (http://www.firebirdsql.org/file/documentation/reference_manuals/reference_material/html/langrefupd25-ddl-table.html) au lieu d'un table normale pour 'GRAPH_TC_TEMP' –