MMR in Milvus vector stores #15634

pauri32 · 2024-08-26T06:58:46Z

Description

Enabled MMR query mode for Milvus Vector Stores. The MMR (Maximum Marginal Relevance) algorithm is used to select documents that are not only relevant to a given query but also diverse from each other.

MMR was missing in Milvus, but it was implemented for other vector stores. See for reference:

llama_index/llama-index-integrations/vector_stores/llama-index-vector-stores-astra-db/llama_index/vector_stores/astra_db/base.py

Lines 369 to 425 in ef9a21c

    
           elif query.mode == VectorStoreQueryMode.MMR: 
        
               # Querying a larger number of vectors and then doing MMR on them. 
        
               if ( 
        
                   kwargs.get("mmr_prefetch_factor") is not None 
        
                   and kwargs.get("mmr_prefetch_k") is not None 
        
               ): 
        
                   raise ValueError( 
        
                       "'mmr_prefetch_factor' and 'mmr_prefetch_k' " 
        
                       "cannot coexist in a call to query()" 
        
                   ) 
        
               else: 
        
                   if kwargs.get("mmr_prefetch_k") is not None: 
        
                       prefetch_k0 = int(kwargs["mmr_prefetch_k"]) 
        
                   else: 
        
                       prefetch_k0 = int( 
        
                           query.similarity_top_k 
        
                           * kwargs.get("mmr_prefetch_factor", DEFAULT_MMR_PREFETCH_FACTOR) 
        
                       ) 
        
               # Get the most we can possibly need to fetch 
        
               prefetch_k = max(prefetch_k0, query.similarity_top_k) 
        
               # Call AstraPy to fetch them (similarity from DB not needed here) 
        
               prefetch_matches = list( 
        
                   self._collection.find( 
        
                       filter=query_metadata, 
        
                       projection={"*": True}, 
        
                       limit=prefetch_k, 
        
                       sort={"$vector": query_embedding}, 
        
                   ) 
        
               ) 
        
               # Get the MMR threshold 
        
               mmr_threshold = query.mmr_threshold or kwargs.get("mmr_threshold") 
        
               # If we have found documents, we can proceed 
        
               if prefetch_matches: 
        
                   zipped_indices, zipped_embeddings = zip( 
        
                       *enumerate(match["$vector"] for match in prefetch_matches) 
        
                   ) 
        
                   pf_match_indices, pf_match_embeddings = list(zipped_indices), list( 
        
                       zipped_embeddings 
        
                   ) 
        
               else: 
        
                   pf_match_indices, pf_match_embeddings = [], [] 
        
               # Call the Llama utility function to get the top  k 
        
               mmr_similarities, mmr_indices = get_top_k_mmr_embeddings( 
        
                   query_embedding, 
        
                   pf_match_embeddings, 
        
                   similarity_top_k=query.similarity_top_k, 
        
                   embedding_ids=pf_match_indices, 
        
                   mmr_threshold=mmr_threshold, 
        
               ) 
        
               # Finally, build the final results based on the mmr values 
        
               matches = [prefetch_matches[mmr_index] for mmr_index in mmr_indices] 
        
               top_k_scores = mmr_similarities

llama_index/llama-index-integrations/vector_stores/llama-index-vector-stores-cassandra/llama_index/vector_stores/cassandra/base.py

Lines 271 to 319 in ef9a21c

    
           elif query.mode == VectorStoreQueryMode.MMR: 
        
               # Querying a larger number of vectors and then doing MMR on them. 
        
               if ( 
        
                   kwargs.get("mmr_prefetch_factor") is not None 
        
                   and kwargs.get("mmr_prefetch_k") is not None 
        
               ): 
        
                   raise ValueError( 
        
                       "'mmr_prefetch_factor' and 'mmr_prefetch_k' " 
        
                       "cannot coexist in a call to query()" 
        
                   ) 
        
               else: 
        
                   if kwargs.get("mmr_prefetch_k") is not None: 
        
                       prefetch_k0 = int(kwargs["mmr_prefetch_k"]) 
        
                   else: 
        
                       prefetch_k0 = int( 
        
                           query.similarity_top_k 
        
                           * kwargs.get("mmr_prefetch_factor", DEFAULT_MMR_PREFETCH_FACTOR) 
        
                       ) 
        
               prefetch_k = max(prefetch_k0, query.similarity_top_k) 
        
               # 
        
               prefetch_matches = list( 
        
                   self._vector_table.metric_ann_search( 
        
                       vector=query_embedding, 
        
                       n=prefetch_k, 
        
                       metric="cos", 
        
                       metric_threshold=None,  # this is not `mmr_threshold` 
        
                       metadata=query_metadata, 
        
                   ) 
        
               ) 
        
               # 
        
               mmr_threshold = query.mmr_threshold or kwargs.get("mmr_threshold") 
        
               if prefetch_matches: 
        
                   pf_match_indices, pf_match_embeddings = zip( 
        
                       *enumerate(match["vector"] for match in prefetch_matches) 
        
                   ) 
        
               else: 
        
                   pf_match_indices, pf_match_embeddings = [], [] 
        
               pf_match_indices = list(pf_match_indices) 
        
               pf_match_embeddings = list(pf_match_embeddings) 
        
               mmr_similarities, mmr_indices = get_top_k_mmr_embeddings( 
        
                   query_embedding, 
        
                   pf_match_embeddings, 
        
                   similarity_top_k=query.similarity_top_k, 
        
                   embedding_ids=pf_match_indices, 
        
                   mmr_threshold=mmr_threshold, 
        
               ) 
        
               # 
        
               matches = [prefetch_matches[mmr_index] for mmr_index in mmr_indices] 
        
               top_k_scores = mmr_similarities

Main changes:

Added MMR query mode for search in Milvus.
Used a prefetch factor in the same style as in Cassandra.
Changed version for llama-index-vector-stores-milvus

New Package?

Did I fill in the tool.llamahub section in the pyproject.toml and provide a detailed README.md for my new integration or package?

Yes
No

Version Bump?

Did I bump the version in the pyproject.toml file of the package I am updating? (Except for the llama-index-core package)

Yes
No

Type of Change

Please delete options that are not relevant.

Bug fix (non-breaking change which fixes an issue)
New feature (non-breaking change which adds functionality)
Breaking change (fix or feature that would cause existing functionality to not work as expected)
This change requires a documentation update

How Has This Been Tested?

Please describe the tests that you ran to verify your changes. Provide instructions so we can reproduce. Please also list any relevant details for your test configuration

Added new unit/integration tests
Added new notebook (that tests end-to-end)
I stared at the code and made sure it makes sense

Suggested Checklist:

I have performed a self-review of my own code
I have commented my code, particularly in hard-to-understand areas
I have made corresponding changes to the documentation
I have added Google Colab support for the newly added notebooks.
My changes generate no new warnings
I have added tests that prove my fix is effective or that my feature works
New and existing unit tests pass locally with my changes
I ran make format; make lint to appease the lint gods

…tores

paro-lw added 2 commits August 23, 2024 18:24

enabled MMR query mode for milvus vector stores

190c248

formatting

b25e945

dosubot bot added the size:M This PR changes 30-99 lines, ignoring generated files. label Aug 26, 2024

pauri32 changed the title ~~Mmr milvus vector stores~~ MMR milvus vector stores Aug 26, 2024

pauri32 changed the title ~~MMR milvus vector stores~~ MMR in Milvus vector stores Aug 26, 2024

paro-lw and others added 4 commits August 26, 2024 16:17

Merge remote-tracking branch 'upstream/HEAD' into mmr-milvus-vector-s…

0bcde76

…tores

Merge branch 'main' into mmr-milvus-vector-stores

7a7eac3

Merge branch 'main' into mmr-milvus-vector-stores

c6c2bda

vbump

a4c8efc

logan-markewich self-assigned this Aug 30, 2024

Merge branch 'main' into mmr-milvus-vector-stores

aa9c84c

logan-markewich approved these changes Aug 30, 2024

View reviewed changes

dosubot bot added the lgtm This PR has been approved by a maintainer label Aug 30, 2024

logan-markewich enabled auto-merge (squash) August 30, 2024 00:49

logan-markewich merged commit d7d4555 into run-llama:main Aug 30, 2024
9 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

MMR in Milvus vector stores #15634

MMR in Milvus vector stores #15634

pauri32 commented Aug 26, 2024

	elif query.mode == VectorStoreQueryMode.MMR:
	# Querying a larger number of vectors and then doing MMR on them.
	if (
	kwargs.get("mmr_prefetch_factor") is not None
	and kwargs.get("mmr_prefetch_k") is not None
	):
	raise ValueError(
	"'mmr_prefetch_factor' and 'mmr_prefetch_k' "
	"cannot coexist in a call to query()"
	)
	else:
	if kwargs.get("mmr_prefetch_k") is not None:
	prefetch_k0 = int(kwargs["mmr_prefetch_k"])
	else:
	prefetch_k0 = int(
	query.similarity_top_k
	* kwargs.get("mmr_prefetch_factor", DEFAULT_MMR_PREFETCH_FACTOR)
	)
	# Get the most we can possibly need to fetch
	prefetch_k = max(prefetch_k0, query.similarity_top_k)

	# Call AstraPy to fetch them (similarity from DB not needed here)
	prefetch_matches = list(
	self._collection.find(
	filter=query_metadata,
	projection={"*": True},
	limit=prefetch_k,
	sort={"$vector": query_embedding},
	)
	)

	# Get the MMR threshold
	mmr_threshold = query.mmr_threshold or kwargs.get("mmr_threshold")

	# If we have found documents, we can proceed
	if prefetch_matches:
	zipped_indices, zipped_embeddings = zip(
	*enumerate(match["$vector"] for match in prefetch_matches)
	)
	pf_match_indices, pf_match_embeddings = list(zipped_indices), list(
	zipped_embeddings
	)
	else:
	pf_match_indices, pf_match_embeddings = [], []

	# Call the Llama utility function to get the top k
	mmr_similarities, mmr_indices = get_top_k_mmr_embeddings(
	query_embedding,
	pf_match_embeddings,
	similarity_top_k=query.similarity_top_k,
	embedding_ids=pf_match_indices,
	mmr_threshold=mmr_threshold,
	)

	# Finally, build the final results based on the mmr values
	matches = [prefetch_matches[mmr_index] for mmr_index in mmr_indices]
	top_k_scores = mmr_similarities

MMR in Milvus vector stores #15634

MMR in Milvus vector stores #15634

Conversation

pauri32 commented Aug 26, 2024

Description

New Package?

Version Bump?

Type of Change

How Has This Been Tested?

Suggested Checklist: