Skip to content

Commit

Permalink
Merge branch 'main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
alexthomas93 authored Jan 8, 2025
2 parents 5e916f8 + 5af91a1 commit be1162f
Show file tree
Hide file tree
Showing 19 changed files with 1,513 additions and 169 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,16 @@

## Next

### Changed

- Made the `source` parameter of `GraphDocument` optional and updated related methods to support this.

### Fixed

- Disabled warnings from the Neo4j driver for the Neo4jGraph class.

## 0.2.0

### Added

- Enhanced Neo4j driver connection management with more robust error handling.
Expand Down
1 change: 1 addition & 0 deletions libs/neo4j/.gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
__pycache__
.python-version
2 changes: 1 addition & 1 deletion libs/neo4j/langchain_neo4j/chains/graph_qa/cypher.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ def from_llm(
if validate_cypher:
corrector_schema = [
Schema(el["start"], el["type"], el["end"])
for el in kwargs["graph"].structured_schema.get("relationships")
for el in kwargs["graph"].get_structured_schema.get("relationships", [])
]
cypher_query_corrector = CypherQueryCorrector(corrector_schema)

Expand Down
7 changes: 4 additions & 3 deletions libs/neo4j/langchain_neo4j/graphs/graph_document.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from typing import List, Union
from typing import List, Optional, Union

from langchain_core.documents import Document
from langchain_core.load.serializable import Serializable
Expand Down Expand Up @@ -43,9 +43,10 @@ class GraphDocument(Serializable):
Attributes:
nodes (List[Node]): A list of nodes in the graph.
relationships (List[Relationship]): A list of relationships in the graph.
source (Document): The document from which the graph information is derived.
source (Optional[Document]): The document from which the graph information is
derived.
"""

nodes: List[Node]
relationships: List[Relationship]
source: Document
source: Optional[Document] = None
64 changes: 44 additions & 20 deletions libs/neo4j/langchain_neo4j/graphs/neo4j_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def _format_schema(schema: Dict, is_enhanced: bool) -> str:
"DATE_TIME",
"LOCAL_DATE_TIME",
]:
if prop.get("min") is not None:
if prop.get("min") and prop.get("max"):
example = f'Min: {prop["min"]}, Max: {prop["max"]}'
else:
example = (
Expand All @@ -215,7 +215,7 @@ def _format_schema(schema: Dict, is_enhanced: bool) -> str:
formatted_rel_props.append(f"- **{rel_type}**")
for prop in properties:
example = ""
if prop["type"] == "STRING":
if prop["type"] == "STRING" and prop.get("values"):
if prop.get("distinct_count", 11) > DISTINCT_VALUE_LIMIT:
example = (
f'Example: "{clean_string_values(prop["values"][0])}"'
Expand All @@ -238,8 +238,8 @@ def _format_schema(schema: Dict, is_enhanced: bool) -> str:
"DATE_TIME",
"LOCAL_DATE_TIME",
]:
if prop.get("min"): # If we have min/max
example = f'Min: {prop["min"]}, Max: {prop["max"]}'
if prop.get("min") and prop.get("max"): # If we have min/max
example = f'Min: {prop["min"]}, Max: {prop["max"]}'
else: # return a single value
example = (
f'Example: "{prop["values"][0]}"' if prop["values"] else ""
Expand All @@ -252,7 +252,7 @@ def _format_schema(schema: Dict, is_enhanced: bool) -> str:
f'Min Size: {prop["min_size"]}, Max Size: {prop["max_size"]}'
)
formatted_rel_props.append(
f" - `{prop['property']}: {prop['type']}` {example}"
f" - `{prop['property']}`: {prop['type']} {example}"
)
else:
# Format node properties
Expand Down Expand Up @@ -310,6 +310,7 @@ class Neo4jGraph(GraphStore):
enhanced_schema (bool): A flag whether to scan the database for
example values and use them in the graph schema. Default is False.
driver_config (Dict): Configuration passed to Neo4j Driver.
Defaults to {"notifications_min_severity", "OFF"} if not set.
*Security note*: Make sure that the database connection uses credentials
that are narrowly-scoped to only include necessary permissions.
Expand Down Expand Up @@ -365,9 +366,10 @@ def __init__(
{"database": database}, "database", "NEO4J_DATABASE", "neo4j"
)

self._driver = neo4j.GraphDatabase.driver(
url, auth=auth, **(driver_config or {})
)
if driver_config is None:
driver_config = {}
driver_config.setdefault("notifications_min_severity", "OFF")
self._driver = neo4j.GraphDatabase.driver(url, auth=auth, **driver_config)
self._database = database
self.timeout = timeout
self.sanitize = sanitize
Expand All @@ -377,6 +379,20 @@ def __init__(
# Verify connection
try:
self._driver.verify_connectivity()
except neo4j.exceptions.ConfigurationError as e:
# If notification filtering is not supported
if "Notification filtering is not supported" in str(e):
# Retry without notifications_min_severity
driver_config.pop("notifications_min_severity", None)
self._driver = neo4j.GraphDatabase.driver(
url, auth=auth, **driver_config
)
self._driver.verify_connectivity()
else:
raise ValueError(
"Could not connect to Neo4j database. "
"Please ensure that the driver config is correct"
)
except neo4j.exceptions.ServiceUnavailable:
raise ValueError(
"Could not connect to Neo4j database. "
Expand Down Expand Up @@ -600,7 +616,7 @@ def add_graph_documents(
- graph_documents (List[GraphDocument]): A list of GraphDocument objects
that contain the nodes and relationships to be added to the graph. Each
GraphDocument should encapsulate the structure of part of the graph,
including nodes, relationships, and the source document information.
including nodes, relationships, and optionally the source document information.
- include_source (bool, optional): If True, stores the source document
and links it to nodes in the graph using the MENTIONS relationship.
This is useful for tracing back the origin of data. Merges source
Expand Down Expand Up @@ -634,25 +650,33 @@ def add_graph_documents(
)
self.refresh_schema() # Refresh constraint information

# Check each graph_document has a source when include_source is true
if include_source:
for doc in graph_documents:
if doc.source is None:
raise TypeError(
"include_source is set to True, "
"but at least one document has no `source`."
)

node_import_query = _get_node_import_query(baseEntityLabel, include_source)
rel_import_query = _get_rel_import_query(baseEntityLabel)
for document in graph_documents:
if not document.source.metadata.get("id"):
document.source.metadata["id"] = md5(
document.source.page_content.encode("utf-8")
).hexdigest()
node_import_query_params: dict[str, Any] = {
"data": [el.__dict__ for el in document.nodes]
}
if include_source and document.source:
if not document.source.metadata.get("id"):
document.source.metadata["id"] = md5(
document.source.page_content.encode("utf-8")
).hexdigest()
node_import_query_params["document"] = document.source.__dict__

# Remove backticks from node types
for node in document.nodes:
node.type = _remove_backticks(node.type)
# Import nodes
self.query(
node_import_query,
{
"data": [el.__dict__ for el in document.nodes],
"document": document.source.__dict__,
},
)
self.query(node_import_query, node_import_query_params)
# Import relationships
self.query(
rel_import_query,
Expand Down
44 changes: 40 additions & 4 deletions libs/neo4j/langchain_neo4j/vectorstores/neo4j_vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,8 @@ class Neo4jVector(VectorStore):
(default: False). Useful for testing.
effective_search_ratio: Controls the candidate pool size by multiplying $k
to balance query accuracy and performance.
embedding_dimension: The dimension of the embeddings. If not provided,
will query the embedding model to calculate the dimension.
Example:
.. code-block:: python
Expand Down Expand Up @@ -509,6 +511,7 @@ def __init__(
relevance_score_fn: Optional[Callable[[float], float]] = None,
index_type: IndexType = DEFAULT_INDEX_TYPE,
graph: Optional[Neo4jGraph] = None,
embedding_dimension: Optional[int] = None,
) -> None:
try:
import neo4j
Expand Down Expand Up @@ -593,8 +596,11 @@ def __init__(
self.search_type = search_type
self._index_type = index_type

# Calculate embedding dimension
self.embedding_dimension = len(embedding.embed_query("foo"))
if embedding_dimension:
self.embedding_dimension = embedding_dimension
else:
# Calculate embedding dimension
self.embedding_dimension = len(embedding.embed_query("foo"))

# Delete existing data if flagged
if pre_delete_collection:
Expand Down Expand Up @@ -1336,6 +1342,7 @@ def from_existing_index(
index_name: str,
search_type: SearchType = DEFAULT_SEARCH_TYPE,
keyword_index_name: Optional[str] = None,
embedding_dimension: Optional[int] = None,
**kwargs: Any,
) -> Neo4jVector:
"""
Expand All @@ -1358,10 +1365,24 @@ def from_existing_index(
index_name=index_name,
keyword_index_name=keyword_index_name,
search_type=search_type,
embedding_dimension=embedding_dimension,
**kwargs,
)

embedding_dimension, index_type = store.retrieve_existing_index()
if embedding_dimension:
(
embedding_dimension_from_existing,
index_type,
) = store.retrieve_existing_index()
if embedding_dimension_from_existing != embedding_dimension:
raise ValueError(
"The provided embedding function and vector index "
"dimensions do not match.\n"
f"Embedding function dimension: {embedding_dimension}\n"
f"Vector index dimension: {embedding_dimension_from_existing}"
)
else:
embedding_dimension, index_type = store.retrieve_existing_index()

# Raise error if relationship index type
if index_type == "RELATIONSHIP":
Expand Down Expand Up @@ -1408,6 +1429,7 @@ def from_existing_relationship_index(
embedding: Embeddings,
index_name: str,
search_type: SearchType = DEFAULT_SEARCH_TYPE,
embedding_dimension: Optional[int] = None,
**kwargs: Any,
) -> Neo4jVector:
"""
Expand All @@ -1428,10 +1450,24 @@ def from_existing_relationship_index(
store = cls(
embedding=embedding,
index_name=index_name,
embedding_dimension=embedding_dimension,
**kwargs,
)

embedding_dimension, index_type = store.retrieve_existing_index()
if embedding_dimension:
(
embedding_dimension_from_existing,
index_type,
) = store.retrieve_existing_index()
if embedding_dimension_from_existing != embedding_dimension:
raise ValueError(
"The provided embedding function and vector index "
"dimensions do not match.\n"
f"Embedding function dimension: {embedding_dimension}\n"
f"Vector index dimension: {embedding_dimension_from_existing}"
)
else:
embedding_dimension, index_type = store.retrieve_existing_index()

if not index_type:
raise ValueError(
Expand Down
4 changes: 2 additions & 2 deletions libs/neo4j/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions libs/neo4j/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "langchain-neo4j"
version = "0.1.1"
version = "0.2.0"
description = "An integration package connecting Neo4j and LangChain"
authors = []
readme = "README.md"
Expand All @@ -13,7 +13,7 @@ license = "MIT"

[tool.poetry.dependencies]
python = ">=3.9,<4.0"
langchain-core = "^0.3.0"
langchain-core = "^0.3.8"
neo4j = "^5.25.0"
langchain = "^0.3.7"

Expand Down
Loading

0 comments on commit be1162f

Please sign in to comment.