Understanding vector databases, embeddings, and semantic search for AI applications
A Vector Database is the smart search engine that powers your AI agentsβ ability to find relevant information instantly. Think of it as Google for your knowledge base, but instead of matching keywords, it understands the meaning behind questions and finds the most relevant answers.
Imagine you have a library with thousands of books, but instead of organizing them alphabetically, you organize them by meaning and topic similarity. A vector database does exactly this with your documents and data.
User: "How do I return a broken item?"Search looks for: "return" AND "broken" AND "item"Might miss: Documents that say "refund defective product"
Vector Search:
Copy
User: "How do I return a broken item?"Search understands: User wants to return a defective productFinds: "Refund policy for defective products", "Exchange damaged goods", "Return broken merchandise"
Employee Question: "I need time off for my wedding"Vector DB Finds:- "Personal leave policies"- "Vacation request procedures"- "Special occasion time off"- "Marriage leave benefits"
import faissimport numpy as npclass FAISSVectorDB: def __init__(self, dimension): self.dimension = dimension self.index = faiss.IndexFlatL2(dimension) # L2 distance self.texts = [] def add_vectors(self, vectors, texts): """Add vectors to the database""" vectors = np.array(vectors).astype('float32') self.index.add(vectors) self.texts.extend(texts) def search(self, query_vector, k=5): """Search for similar vectors""" query_vector = np.array([query_vector]).astype('float32') distances, indices = self.index.search(query_vector, k) results = [] for i, idx in enumerate(indices[0]): if idx != -1: # Valid result results.append({ 'text': self.texts[idx], 'distance': distances[0][i], 'similarity': 1 / (1 + distances[0][i]) # Convert to similarity }) return results# Usage exampledb = FAISSVectorDB(dimension=384)# Add some example datatexts = [ "The cat sat on the mat", "Dogs are great pets", "I love machine learning", "Python is a programming language"]vectors = [get_local_embedding(text) for text in texts]db.add_vectors(vectors, texts)# Searchquery = "feline on carpet"query_vector = get_local_embedding(query)results = db.search(query_vector, k=2)for result in results: print(f"Text: {result['text']}") print(f"Similarity: {result['similarity']:.3f}")
from sentence_transformers import CrossEncoderclass AdvancedVectorDB: def __init__(self): self.vector_db = ChromaVectorDB("advanced_search") self.reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2') def expand_query(self, query, expansion_terms=3): """Expand query with related terms""" # In practice, you might use a language model for this expansion_map = { "return": ["refund", "exchange", "send back"], "broken": ["damaged", "defective", "faulty"], "delivery": ["shipping", "shipping", "postal"] } expanded_terms = [] for word in query.lower().split(): if word in expansion_map: expanded_terms.extend(expansion_map[word][:expansion_terms]) return f"{query} {' '.join(expanded_terms)}" def search_with_reranking(self, query, k=5, rerank_top=20): """Search with query expansion and result re-ranking""" # Expand query expanded_query = self.expand_query(query) # Get initial results initial_results = self.vector_db.search(expanded_query, n_results=rerank_top) # Re-rank using cross-encoder if len(initial_results) > 1: query_doc_pairs = [ (query, result['document']) for result in initial_results ] rerank_scores = self.reranker.predict(query_doc_pairs) # Combine with original scores for i, result in enumerate(initial_results): result['rerank_score'] = rerank_scores[i] # Sort by rerank score initial_results.sort(key=lambda x: x['rerank_score'], reverse=True) return initial_results[:k]