LOAD CSV WITH HEADERS FROM 'file:///中医经络脉象数据库.csv' AS row WITH [ trim(row['脉象库']), trim(row['中医复合脉象库']), trim(row['中医经脉库']), trim(row['中医络脉库']) ] AS zhenghouList UNWIND zhenghouList AS zh WITH zh WHERE zh IS NOT NULL AND zh <> '' MERGE (:中医经络脉象数据库 {name: zh})
查找和删除重复节点
先查找再删除
这是查找的
1 2 3 4 5 6 7 8 9 10 11 12 13
MATCH (n:`形态特征库`) WITH n, // 白名单:只保留 汉字 + 英文字母 + 数字 apoc.text.regreplace(toString(n.name), '[^0-9A-Za-z\\p{IsHan}]', '') AS cleanedName WITH cleanedName, collect(n) AS nodes WHERE cleanedName <> ''AND size(nodes) > 1 RETURN cleanedName, size(nodes) AS cnt, [x IN nodes | x.name] AS rawNames, [x IN nodes | id(x)] AS nodeIds ORDERBY cnt DESC;
MATCH (n:`形态特征库`) WITH n, apoc.text.regreplace(toString(n.name), '[^0-9A-Za-z\\p{IsHan}]', '') AS cleanedName
// 保留策略:优先保留原始 name 已经等于 cleanedName 的(最干净),否则保留原始 name 最短的 WITH cleanedName, n ORDERBY (toString(n.name) = cleanedName) DESC, size(toString(n.name)) ASC
WITH cleanedName, collect(n) AS nodes WHERE cleanedName <> ''AND size(nodes) > 1
SET node.name = cleanedName RETURN cleanedName, size(nodes) AS mergedFrom, id(node) AS keptNodeId, node.name AS keptName ORDERBY mergedFrom DESC;
🔗 批量构建关系(通用方法)
这个代码是当数据库在遍历表格的时候只是关注实实在在的实体,不会因为空格的存在就认为这两个不是一回事
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
LOAD CSV WITH HEADERS FROM 'file:///中医病因数据库.csv' AS row WITH [ [row.`内伤一`, row.`内伤关系一`, row.`内伤二`], [row.`内伤二`, row.`内伤关系一`, row.`内伤三`], [row.`内伤三`, row.`内伤关系一`, row.`内伤四`] ] AS triples UNWIND triples AS tri WITH apoc.text.clean(toString(tri[0])) AS source, apoc.text.clean(toString(tri[2])) AS target, apoc.text.clean(toString(tri[1])) AS relType WHERE source <> '' AND target <> '' AND relType <> '' AND toLower(source) <> 'null' AND toLower(target) <> 'null' AND toLower(relType) <> 'null'
MATCH (s:病因库 {name: source}) MATCH (t:病因库 {name: target}) CALL apoc.create.relationship(s, relType, {}, t) YIELD rel RETURN count(rel) AS createdRels;
1 2 3 4 5
MATCH (n)-[r]->(n) RETURN r LIMIT 50; MATCH (n)-[r]->(n) DELETE r;
要是发现了重复的关系,可以使用该技术对所有关系降重处理
1 2 3 4
MATCH (a)-[r]->(b) WITH a,b, type(r) AS t, collect(r) AS rs WHERE size(rs) > 1 FOREACH (x IN tail(rs) | DELETE x);
搜索没有关系的节点,确保构建关系的时候没有遗漏
1 2 3
MATCH (n:解剖库) WHERE NOT (n)--() RETURN n
要删除这些没有关系的节点的话,可以使用下面的代码一步到位
看看是不是你要删除的节点名称
1 2 3 4
MATCH (n:中医证候库) WHERENOT (n)--() RETURN n.name LIMIT20
删掉这些节点
1 2 3
MATCH (n:中医证候库) WHERE NOT (n)--() DELETE n
🔍 节点匹配度校验(一致性检查)
这是在建立关系之前可以考虑的选项,能检测是不是你想要匹配的两列关系
1 2 3 4 5 6 7 8 9 10 11 12
LOAD CSV WITH HEADERS FROM'file:///概念.csv'ASrow WITH trim(row.`源节点`) AS source, trim(row.`目标节点`) AS target WHERE source ISNOTNULLAND source <> ''AND target ISNOTNULLAND target <> '' OPTIONAL MATCH (a:白睛分区库 {name: source}) OPTIONAL MATCH (b:概念库 {name: target}) RETURN source AS 源节点_CSV, a.name AS 源节点_DB, target AS 目标节点_CSV, b.name AS 目标节点_DB LIMIT200;
匹配不上最快的方法是删掉重建,用GPT编写相关的代码就行
1 2 3
MATCH (n:形态特征库) WHERE n.name CONTAINS'血脉' DETACH DELETE n;
LOAD CSV WITH HEADERS FROM "file:///目诊解剖经典库.csv" AS row WITH trim(row.`第二批节点`) AS entity_name, trim(row.`第二批经典`) AS classic_name, trim(row.`第二批条文`) AS clause_text, trim(row.`关系三`) AS rel1, trim(row.`关系四`) AS rel2
// 1) 匹配已存在节点 MATCH (e:眼部解剖数据库 {name: entity_name}) MATCH (c:中医经典数据库 {name: classic_name}) MATCH (t:中医经典数据库 {name: clause_text})
// 2) 动态:经典 -> 条文 CALL apoc.merge.relationship( c, // start node rel1, // 这里是你的关系一,也就是在前的那个,看清楚 {}, // matchProps (用于定位同类关系,可空) {}, // onCreateProps t // end node ) YIELD rel AS rel_classic_clause
// 3) 动态:解剖 -> 经典(并写 clause 定位钥匙) CALL apoc.merge.relationship( e, rel2, {}, // matchProps {clause: clause_text}, // onCreateProps c ) YIELD rel AS rel_entity_classic
// 4) 如果关系已存在,也要确保 clause 更新(避免旧值) SET rel_entity_classic.clause = clause_text
RETURN count(*) AS rows_done;
可以用这个代码验证一下刚刚构建的关系是不是成功了
1 2 3
MATCH (c:证候库 {name:'心肝寒痰证'})-[:出现]->(t:形态库)-[:颜色是]->(color:颜色库) RETURN t.name AS 特征, color.name AS 颜色 ORDER BY t.name
//转移单个节点,示例是将“形态特征库”中的“鲜红色”节点转移到“颜色特征库中”*
1 2 3 4
MATCH (n:`形态特征库` {name: '鲜红色'}) SET n:`颜色特征库` REMOVE n:`形态特征库` RETURN n;
//转移多个节点(这个我没试过)
1 2 3 4 5
MATCH (n:`形态特征库`) WHERE n.name IN ['鲜红色', '深红色', '玫瑰红'] *// 这里列出需要转移的节点* SET n:`颜色特征库` REMOVE n:`形态特征库` RETURN n;
MATCH (n:目诊解剖) WITH count(n) AS 总节点数 MATCH (n:目诊解剖)-[:概念为]-() WITH 总节点数, count(DISTINCT n) AS 已连接节点数 RETURN 总节点数, 已连接节点数, 总节点数 - 已连接节点数 AS 未连接节点数, (toFloat(已连接节点数) / 总节点数) * 100AS 连接率百分比