Merge branch 'main' into main

langchain-ai · Jan 8, 2025 · be1162f · be1162f
2 parents 5e916f8 + 5af91a1
commit be1162f
Show file tree

Hide file tree

Showing 19 changed files with 1,513 additions and 169 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,16 @@
 
 ## Next
 
+### Changed
+
+- Made the `source` parameter of `GraphDocument` optional and updated related methods to support this.
+
+### Fixed
+
+- Disabled warnings from the Neo4j driver for the Neo4jGraph class.
+
+## 0.2.0
+
 ### Added
 
 - Enhanced Neo4j driver connection management with more robust error handling.

diff --git a/libs/neo4j/.gitignore b/libs/neo4j/.gitignore
@@ -1 +1,2 @@
 __pycache__
+.python-version
diff --git a/libs/neo4j/langchain_neo4j/chains/graph_qa/cypher.py b/libs/neo4j/langchain_neo4j/chains/graph_qa/cypher.py
@@ -358,7 +358,7 @@ def from_llm(
         if validate_cypher:
             corrector_schema = [
                 Schema(el["start"], el["type"], el["end"])
-                for el in kwargs["graph"].structured_schema.get("relationships")
+                for el in kwargs["graph"].get_structured_schema.get("relationships", [])
             ]
             cypher_query_corrector = CypherQueryCorrector(corrector_schema)
 

diff --git a/libs/neo4j/langchain_neo4j/graphs/graph_document.py b/libs/neo4j/langchain_neo4j/graphs/graph_document.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from typing import List, Union
+from typing import List, Optional, Union
 
 from langchain_core.documents import Document
 from langchain_core.load.serializable import Serializable
@@ -43,9 +43,10 @@ class GraphDocument(Serializable):
     Attributes:
         nodes (List[Node]): A list of nodes in the graph.
         relationships (List[Relationship]): A list of relationships in the graph.
-        source (Document): The document from which the graph information is derived.
+        source (Optional[Document]): The document from which the graph information is
+            derived.
     """
 
     nodes: List[Node]
     relationships: List[Relationship]
-    source: Document
+    source: Optional[Document] = None
diff --git a/libs/neo4j/langchain_neo4j/graphs/neo4j_graph.py b/libs/neo4j/langchain_neo4j/graphs/neo4j_graph.py
@@ -191,7 +191,7 @@ def _format_schema(schema: Dict, is_enhanced: bool) -> str:
                     "DATE_TIME",
                     "LOCAL_DATE_TIME",
                 ]:
-                    if prop.get("min") is not None:
+                    if prop.get("min") and prop.get("max"):
                         example = f'Min: {prop["min"]}, Max: {prop["max"]}'
                     else:
                         example = (
@@ -215,7 +215,7 @@ def _format_schema(schema: Dict, is_enhanced: bool) -> str:
             formatted_rel_props.append(f"- **{rel_type}**")
             for prop in properties:
                 example = ""
-                if prop["type"] == "STRING":
+                if prop["type"] == "STRING" and prop.get("values"):
                     if prop.get("distinct_count", 11) > DISTINCT_VALUE_LIMIT:
                         example = (
                             f'Example: "{clean_string_values(prop["values"][0])}"'
@@ -238,8 +238,8 @@ def _format_schema(schema: Dict, is_enhanced: bool) -> str:
                     "DATE_TIME",
                     "LOCAL_DATE_TIME",
                 ]:
-                    if prop.get("min"):  # If we have min/max
-                        example = f'Min: {prop["min"]}, Max:  {prop["max"]}'
+                    if prop.get("min") and prop.get("max"):  # If we have min/max
+                        example = f'Min: {prop["min"]}, Max: {prop["max"]}'
                     else:  # return a single value
                         example = (
                             f'Example: "{prop["values"][0]}"' if prop["values"] else ""
@@ -252,7 +252,7 @@ def _format_schema(schema: Dict, is_enhanced: bool) -> str:
                         f'Min Size: {prop["min_size"]}, Max Size: {prop["max_size"]}'
                     )
                 formatted_rel_props.append(
-                    f"  - `{prop['property']}: {prop['type']}` {example}"
+                    f"  - `{prop['property']}`: {prop['type']} {example}"
                 )
     else:
         # Format node properties
@@ -310,6 +310,7 @@ class Neo4jGraph(GraphStore):
     enhanced_schema (bool): A flag whether to scan the database for
             example values and use them in the graph schema. Default is False.
     driver_config (Dict): Configuration passed to Neo4j Driver.
+            Defaults to {"notifications_min_severity", "OFF"} if not set.
 
     *Security note*: Make sure that the database connection uses credentials
         that are narrowly-scoped to only include necessary permissions.
@@ -365,9 +366,10 @@ def __init__(
             {"database": database}, "database", "NEO4J_DATABASE", "neo4j"
         )
 
-        self._driver = neo4j.GraphDatabase.driver(
-            url, auth=auth, **(driver_config or {})
-        )
+        if driver_config is None:
+            driver_config = {}
+        driver_config.setdefault("notifications_min_severity", "OFF")
+        self._driver = neo4j.GraphDatabase.driver(url, auth=auth, **driver_config)
         self._database = database
         self.timeout = timeout
         self.sanitize = sanitize
@@ -377,6 +379,20 @@ def __init__(
         # Verify connection
         try:
             self._driver.verify_connectivity()
+        except neo4j.exceptions.ConfigurationError as e:
+            # If notification filtering is not supported
+            if "Notification filtering is not supported" in str(e):
+                # Retry without notifications_min_severity
+                driver_config.pop("notifications_min_severity", None)
+                self._driver = neo4j.GraphDatabase.driver(
+                    url, auth=auth, **driver_config
+                )
+                self._driver.verify_connectivity()
+            else:
+                raise ValueError(
+                    "Could not connect to Neo4j database. "
+                    "Please ensure that the driver config is correct"
+                )
         except neo4j.exceptions.ServiceUnavailable:
             raise ValueError(
                 "Could not connect to Neo4j database. "
@@ -600,7 +616,7 @@ def add_graph_documents(
         - graph_documents (List[GraphDocument]): A list of GraphDocument objects
         that contain the nodes and relationships to be added to the graph. Each
         GraphDocument should encapsulate the structure of part of the graph,
-        including nodes, relationships, and the source document information.
+        including nodes, relationships, and optionally the source document information.
         - include_source (bool, optional): If True, stores the source document
         and links it to nodes in the graph using the MENTIONS relationship.
         This is useful for tracing back the origin of data. Merges source
@@ -634,25 +650,33 @@ def add_graph_documents(
                 )
                 self.refresh_schema()  # Refresh constraint information
 
+        # Check each graph_document has a source when include_source is true
+        if include_source:
+            for doc in graph_documents:
+                if doc.source is None:
+                    raise TypeError(
+                        "include_source is set to True, "
+                        "but at least one document has no `source`."
+                    )
+
         node_import_query = _get_node_import_query(baseEntityLabel, include_source)
         rel_import_query = _get_rel_import_query(baseEntityLabel)
         for document in graph_documents:
-            if not document.source.metadata.get("id"):
-                document.source.metadata["id"] = md5(
-                    document.source.page_content.encode("utf-8")
-                ).hexdigest()
+            node_import_query_params: dict[str, Any] = {
+                "data": [el.__dict__ for el in document.nodes]
+            }
+            if include_source and document.source:
+                if not document.source.metadata.get("id"):
+                    document.source.metadata["id"] = md5(
+                        document.source.page_content.encode("utf-8")
+                    ).hexdigest()
+                node_import_query_params["document"] = document.source.__dict__
 
             # Remove backticks from node types
             for node in document.nodes:
                 node.type = _remove_backticks(node.type)
             # Import nodes
-            self.query(
-                node_import_query,
-                {
-                    "data": [el.__dict__ for el in document.nodes],
-                    "document": document.source.__dict__,
-                },
-            )
+            self.query(node_import_query, node_import_query_params)
             # Import relationships
             self.query(
                 rel_import_query,

diff --git a/libs/neo4j/langchain_neo4j/vectorstores/neo4j_vector.py b/libs/neo4j/langchain_neo4j/vectorstores/neo4j_vector.py
@@ -466,6 +466,8 @@ class Neo4jVector(VectorStore):
             (default: False). Useful for testing.
         effective_search_ratio: Controls the candidate pool size by multiplying $k
             to balance query accuracy and performance.
+        embedding_dimension: The dimension of the embeddings. If not provided,
+            will query the embedding model to calculate the dimension.
 
     Example:
         .. code-block:: python
@@ -509,6 +511,7 @@ def __init__(
         relevance_score_fn: Optional[Callable[[float], float]] = None,
         index_type: IndexType = DEFAULT_INDEX_TYPE,
         graph: Optional[Neo4jGraph] = None,
+        embedding_dimension: Optional[int] = None,
     ) -> None:
         try:
             import neo4j
@@ -593,8 +596,11 @@ def __init__(
         self.search_type = search_type
         self._index_type = index_type
 
-        # Calculate embedding dimension
-        self.embedding_dimension = len(embedding.embed_query("foo"))
+        if embedding_dimension:
+            self.embedding_dimension = embedding_dimension
+        else:
+            # Calculate embedding dimension
+            self.embedding_dimension = len(embedding.embed_query("foo"))
 
         # Delete existing data if flagged
         if pre_delete_collection:
@@ -1336,6 +1342,7 @@ def from_existing_index(
         index_name: str,
         search_type: SearchType = DEFAULT_SEARCH_TYPE,
         keyword_index_name: Optional[str] = None,
+        embedding_dimension: Optional[int] = None,
         **kwargs: Any,
     ) -> Neo4jVector:
         """
@@ -1358,10 +1365,24 @@ def from_existing_index(
             index_name=index_name,
             keyword_index_name=keyword_index_name,
             search_type=search_type,
+            embedding_dimension=embedding_dimension,
             **kwargs,
         )
 
-        embedding_dimension, index_type = store.retrieve_existing_index()
+        if embedding_dimension:
+            (
+                embedding_dimension_from_existing,
+                index_type,
+            ) = store.retrieve_existing_index()
+            if embedding_dimension_from_existing != embedding_dimension:
+                raise ValueError(
+                    "The provided embedding function and vector index "
+                    "dimensions do not match.\n"
+                    f"Embedding function dimension: {embedding_dimension}\n"
+                    f"Vector index dimension: {embedding_dimension_from_existing}"
+                )
+        else:
+            embedding_dimension, index_type = store.retrieve_existing_index()
 
         # Raise error if relationship index type
         if index_type == "RELATIONSHIP":
@@ -1408,6 +1429,7 @@ def from_existing_relationship_index(
         embedding: Embeddings,
         index_name: str,
         search_type: SearchType = DEFAULT_SEARCH_TYPE,
+        embedding_dimension: Optional[int] = None,
         **kwargs: Any,
     ) -> Neo4jVector:
         """
@@ -1428,10 +1450,24 @@ def from_existing_relationship_index(
         store = cls(
             embedding=embedding,
             index_name=index_name,
+            embedding_dimension=embedding_dimension,
             **kwargs,
         )
 
-        embedding_dimension, index_type = store.retrieve_existing_index()
+        if embedding_dimension:
+            (
+                embedding_dimension_from_existing,
+                index_type,
+            ) = store.retrieve_existing_index()
+            if embedding_dimension_from_existing != embedding_dimension:
+                raise ValueError(
+                    "The provided embedding function and vector index "
+                    "dimensions do not match.\n"
+                    f"Embedding function dimension: {embedding_dimension}\n"
+                    f"Vector index dimension: {embedding_dimension_from_existing}"
+                )
+        else:
+            embedding_dimension, index_type = store.retrieve_existing_index()
 
         if not index_type:
             raise ValueError(

diff --git a/libs/neo4j/poetry.lock b/libs/neo4j/poetry.lock
diff --git a/libs/neo4j/pyproject.toml b/libs/neo4j/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "langchain-neo4j"
-version = "0.1.1"
+version = "0.2.0"
 description = "An integration package connecting Neo4j and LangChain"
 authors = []
 readme = "README.md"
@@ -13,7 +13,7 @@ license = "MIT"
 
 [tool.poetry.dependencies]
 python = ">=3.9,<4.0"
-langchain-core = "^0.3.0"
+langchain-core = "^0.3.8"
 neo4j = "^5.25.0"
 langchain = "^0.3.7"
 

diff --git a/libs/neo4j/tests/unit_tests/llms/__init__.py → ...ests/integration_tests/chains/__init__.py b/libs/neo4j/tests/unit_tests/llms/__init__.py → ...ests/integration_tests/chains/__init__.py