MATCH (e:Entity), (d:Document)
WHERE e.embeddings IS NOT NULL
AND d.embeddings IS NOT NULL
AND size(e.embeddings) = size(d.embeddings)
WITH e, d,
reduce(numerator = 0.0, i in range(0, size(e.embeddings)-1) | numerator + toFloat(e.embeddings[i])*toFloat(d.embeddings[i])) as dotProduct,
sqrt(reduce(eSum = 0.0, i in range(0, size(e.embeddings)-1) | eSum + toFloat(e.embeddings[i])^2)) as eNorm,
sqrt(reduce(dSum = 0.0, i in range(0, size(d.embeddings)-1) | dSum + toFloat(d.embeddings[i])^2)) as dNorm
WITH e, d, dotProduct / (eNorm * dNorm) as cosineSimilarity
WHERE cosineSimilarity > 0.8
MERGE (e)-[r:RELATES_TO]->(d)
ON CREATE SET r.cosineSimilarity = cosineSimilarity
RETURN ID(e), e.name, ID(d), d.full_text, r.cosineSimilarity
ORDER BY cosineSimilarity DESC
queries = [
"DROP INDEX test_index_document IF EXISTS;",
"DROP INDEX test_index_entity IF EXISTS;"
]
for query in queries:
run_query(driver, query)
# 为文档嵌入创建向量索引
CREATE VECTOR INDEX test_index_document
IF NOT EXISTS
FOR (d:Document)
ON (d.embeddings)
OPTIONS {indexConfig: {
`vector.dimensions`: 768,
`vector.similarity_function`: 'cosine'
}}
# 为实体嵌入创建向量索引
CREATE VECTOR INDEX test_index_entity IF NOT EXISTS
FOR (n:Entity)
ON (n.embeddings)
OPTIONS {indexConfig: {
`vector.dimensions`: 768,
`vector.similarity_function`: 'cosine'
}}
CALL db.index.vector.queryNodes('test_index_entity', 10, $user_query)
YIELD node AS vectorNode, score as vectorScore
WITH vectorNode, vectorScore
ORDER BY vectorScore DESC
RETURN vectorNode.name AS label, vectorScore AS score
CALL db.index.vector.queryNodes('test_index_entity', 10, $user_query)
YIELD node AS vectorNode, score as vectorScore
WITH vectorNode, vectorScore
MATCH (vectorNode)-[r]->(d:Document)
WITH vectorNode.name AS label, vectorScore as score, d.docID as closest_document_name,
d.full_text as closest_document_text, r.cosineSimilarity as similarity
ORDER BY similarity DESC
RETURN label, closest_document_name, closest_document_text, similarity
LIMIT 10
CALL db.index.vector.queryNodes('test_index_entity', 10, $my_query_emb_list)
YIELD node AS vectorNode, score as vectorScore
WITH vectorNode, vectorScore
MATCH (vectorNode)-[r]->(d:Document)
WITH vectorNode.name AS label, vectorScore as score, d.docID as closest_document_name,
d.full_text as closest_document_text, r.cosineSimilarity as similarity
ORDER BY similarity DESC
RETURN label, closest_document_name, closest_document_text, similarity
LIMIT 10
UNION
CALL db.index.fulltext.queryNodes('text_index_entity', $my_query)
YIELD node AS textNode, score as textScore
WITH textNode, textScore
MATCH (textNode)-[r]->(d:Document)
WITH textNode.name AS label, textScore as score, d.docID as closest_document_name,
d.full_text as closest_document_text, r.cosineSimilarity as similarity
ORDER BY similarity DESC
RETURN label, closest_document_name, closest_document_text, similarity
LIMIT 10
CALL db.index.vector.queryNodes('test_index_entity', 10, $user_query_emb)
YIELD node AS vectorNode, score as vectorScore
WITH vectorNode, vectorScore
MATCH (vectorNode)-[r]->(d:Document)
WITH DISTINCT vectorNode as e, d, r, r.cosineSimilarity as cosineSimilarity
ORDER BY cosineSimilarity DESC
WITH ID(e) as id, e.label as title,
cosineSimilarity,
e.description as description,
head(collect(d.docID)) as document_id,
head(collect(d.chunkID)) as chunkID,
head(collect(d.full_text)) as document_text,
reduce(mDot = 0.0, i IN range(0, size($user_query_emb) - 1) | mDot + $user_query_emb[i] * e.embeddings[i]) /
(sqrt(reduce(mSq = 0.0, x IN $user_query_emb | mSq + x^2)) * sqrt(reduce(eSq = 0.0, y IN e.embeddings | eSq + y^2))) AS entity_similarity,
reduce(mDot = 0.0, i IN range(0, size($user_query_emb) - 1) | mDot + $user_query_emb[i] * d.embeddings[i]) /
(sqrt(reduce(mSq = 0.0, x IN $user_query_emb | mSq + x^2)) * sqrt(reduce(dSq = 0.0, y IN d.embeddings | dSq + y^2))) AS document_similarity
WITH id, title, description, document_id, document_text, entity_similarity, document_similarity, chunkID,
(entity_similarity + document_similarity) / 2 AS similarity, cosineSimilarity
WHERE similarity > 0.8
RETURN id, title, document_id, document_text, similarity, chunkID
ORDER BY cosineSimilarity DESC, similarity DESC