From 1856a42d8666f24ea63c23964c7579a9b6be7209 Mon Sep 17 00:00:00 2001 From: Maarten Pronk Date: Tue, 24 Sep 2024 23:08:58 +0200 Subject: [PATCH] First working version (read) of GeoArrow.. --- .github/workflows/CI.yml | 2 +- .gitignore | 3 ++ CondaPkg.toml | 3 ++ Project.toml | 18 +++++++++- README.md | 11 +++--- src/GeoArrow.jl | 14 ++++++-- src/arrow.jl | 76 ++++++++++++++++++++++++++++++++++++++++ src/io.jl | 25 +++++++++++++ src/type.jl | 25 +++++++++++++ test/links.txt | 36 +++++++++++++++++++ test/runtests.jl | 52 ++++++++++++++++++++++++++- 11 files changed, 256 insertions(+), 9 deletions(-) create mode 100644 CondaPkg.toml create mode 100644 src/arrow.jl create mode 100644 src/io.jl create mode 100644 src/type.jl create mode 100644 test/links.txt diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index adabdd8..b616700 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -19,7 +19,7 @@ jobs: matrix: version: - '1.6' - - '1.9' + - '1' - 'nightly' os: - ubuntu-latest diff --git a/.gitignore b/.gitignore index 20fe29d..9d4c9f9 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,6 @@ *.jl.mem /Manifest.toml /docs/build/ +test/.cpenv +.CondaPkg +*.arrow diff --git a/CondaPkg.toml b/CondaPkg.toml new file mode 100644 index 0000000..fe9d0dc --- /dev/null +++ b/CondaPkg.toml @@ -0,0 +1,3 @@ +[deps] +pyarrow = "" +geoarrow-pyarrow = "" diff --git a/Project.toml b/Project.toml index fe45f32..d6bc6fd 100644 --- a/Project.toml +++ b/Project.toml @@ -3,11 +3,27 @@ uuid = "5bc3a8d9-1bfb-4624-ba94-a391279174d6" authors = ["Maarten Pronk and contributors"] version = "0.1.0" +[deps] +Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45" +Extents = "411431e0-e8b7-467b-b5e0-f676ba4f2910" +GeoFormatTypes = "68eda718-8dee-11e9-39e7-89f7f65f511f" +GeoInterface = "cf35fbd7-0cd7-5166-be24-54bfbe79505f" +JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1" +WellKnownGeometry = "0f680547-7be7-4555-8820-bb198eeb646b" + [compat] +Arrow = "2.4" +GeoFormatTypes = "0.4" +GeoInterface = "1.2" +JSON3 = "1.14" +WellKnownGeometry = "0.2" julia = "1.6" [extras] +CondaPkg = "992eb4ea-22a4-4c89-a5bb-47a3300528ab" +Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6" +PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Test"] +test = ["Test", "Downloads", "CondaPkg", "PythonCall"] diff --git a/README.md b/README.md index cfeaa29..83726c7 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,10 @@ # GeoArrow +A [geoarrow]() implementation in Julia. -[![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://evetion.github.io/GeoArrow.jl/stable/) -[![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://evetion.github.io/GeoArrow.jl/dev/) -[![Build Status](https://github.com/evetion/GeoArrow.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/evetion/GeoArrow.jl/actions/workflows/CI.yml?query=branch%3Amain) -[![Coverage](https://codecov.io/gh/evetion/GeoArrow.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/evetion/GeoArrow.jl) +*work in progress* + +[![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://juliageo.github.io/GeoArrow.jl/stable/) +[![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://juliageo.github.io/GeoArrow.jl/dev/) +[![Build Status](https://github.com/juliageo/GeoArrow.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/juliageo/GeoArrow.jl/actions/workflows/CI.yml?query=branch%3Amain) +[![Coverage](https://codecov.io/gh/juliageo/GeoArrow.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/juliageo/GeoArrow.jl) [![PkgEval](https://JuliaCI.github.io/NanosoldierReports/pkgeval_badges/G/GeoArrow.svg)](https://JuliaCI.github.io/NanosoldierReports/pkgeval_badges/report.html) diff --git a/src/GeoArrow.jl b/src/GeoArrow.jl index e566d26..177b1c9 100644 --- a/src/GeoArrow.jl +++ b/src/GeoArrow.jl @@ -1,5 +1,15 @@ module GeoArrow +using Arrow +using GeoInterface +using GeoFormatTypes +using JSON3 +using WellKnownGeometry +using Extents -# Write your package code here. +include("type.jl") +include("arrow.jl") +include("io.jl") -end +export read, write + +end # module diff --git a/src/arrow.jl b/src/arrow.jl new file mode 100644 index 0000000..23cd08f --- /dev/null +++ b/src/arrow.jl @@ -0,0 +1,76 @@ +ArrowTypes.isstringtype(::ArrowTypes.StructKind) = false + +POINT = Symbol("geoarrow.point") +LINESTRING = Symbol("geoarrow.linestring") +POLYGON = Symbol("geoarrow.polygon") +MULTIPOINT = Symbol("geoarrow.multipoint") +MULTILINESTRING = Symbol("geoarrow.multilinestring") +MULTIPOLYGON = Symbol("geoarrow.multipolygon") +WKB = Symbol("geoarrow.wkb") +WKT = Symbol("geoarrow.wkt") +BOX = Symbol("geoarrow.box") + +function ArrowTypes.JuliaType(::Val{POINT}, x, metadata) + D = length(x.types) + T = x.types[1] + return Geometry{PointTrait,D,T} +end +ArrowTypes.JuliaType(::Val{LINESTRING}, x, metadata) = Geometry{LineStringTrait} +ArrowTypes.JuliaType(::Val{POLYGON}, x, metadata) = Geometry{PolygonTrait} +ArrowTypes.JuliaType(::Val{MULTIPOINT}, x, metadata) = Geometry{MultiPointTrait} +ArrowTypes.JuliaType(::Val{MULTILINESTRING}, x, metadata) = Geometry{MultiLineStringTrait} +ArrowTypes.JuliaType(::Val{MULTIPOLYGON}, x, metadata) = Geometry{MultiPolygonTrait} +ArrowTypes.JuliaType(::Val{WKB}, x, metadata) = GeoFormatTypes.WellKnownBinary +ArrowTypes.JuliaType(::Val{WKT}, x, metadata) = GeoFormatTypes.WellKnownText +function ArrowTypes.JuliaType(::Val{BOX}, x, metadata) + D = length(x.types) + if D == 4 + Extents.Extent{(:X, :Y)} + elseif D == 6 + Extents.Extent{(:X, :Y, :Z)} + elseif D == 8 + Extents.Extent{(:X, :Y, :Z, :M)} + else + throw(ArgumentError("Invalid number of dimensions for Extent")) + end +end +ArrowTypes.ArrowKind(::Type{Geometry}) = ArrowTypes.ListKind() +ArrowTypes.ArrowKind(::Type{<:Geometry{PointTrait,D,T}}) where {D,T} = ArrowTypes.FixedSizeListKind{D,T}() +ArrowTypes.ArrowKind(::Type{<:GeoFormatTypes.WellKnownBinary}) = ArrowTypes.PrimitiveKind() +ArrowTypes.ArrowKind(::Type{<:GeoFormatTypes.WellKnownText}) = ArrowTypes.ListKind() + +ArrowTypes.ArrowType(::Type{<:GeoFormatTypes.WellKnownBinary}) = Vector{UInt8} +ArrowTypes.ArrowType(::Type{<:GeoFormatTypes.WellKnownText}) = String +ArrowTypes.ArrowType(::Type{Geometry{X,D,T,G}}) where {X,D,T,G} = G + +ArrowTypes.arrowname(::Type{Geometry{PointTrait}}) = POINT +ArrowTypes.arrowname(::Type{Geometry{LineStringTrait}}) = LINESTRING +ArrowTypes.arrowname(::Type{Geometry{PolygonTrait}}) = POLYGON +ArrowTypes.arrowname(::Type{Geometry{MultiPointTrait}}) = MULTIPOINT +ArrowTypes.arrowname(::Type{Geometry{MultiLineStringTrait}}) = MULTILINESTRING +ArrowTypes.arrowname(::Type{Geometry{MultiPolygonTrait}}) = MULTIPOLYGON +ArrowTypes.arrowname(::Type{<:GeoFormatTypes.WellKnownBinary}) = WKB +ArrowTypes.arrowname(::Type{<:GeoFormatTypes.WellKnownText}) = WKT +ArrowTypes.arrowname(::Type{Extents.Extent}) = BOX + +ArrowTypes.toarrow(x::Geometry) = x.geom +ArrowTypes.toarrow(x::GeoFormatTypes.WellKnownText) = GeoFormatTypes.val(x) +ArrowTypes.toarrow(x::GeoFormatTypes.WellKnownBinary) = GeoFormatTypes.val(x) +ArrowTypes.toarrow(x::Extents.Extent{(:X, :Y)}) = (; xmin=ex.X[1], ymin=ex.Y[1], xmax=ex.X[2], ymax=ex.Y[2]) +ArrowTypes.toarrow(x::Extents.Extent{(:X, :Y, :Z)}) = (; xmin=ex.X[1], ymin=ex.Y[1], zmin=ex.Z[1], xmax=ex.X[2], ymax=ex.Y[2], zmax=ex.Z[2]) +ArrowTypes.toarrow(x::Extents.Extent{(:X, :Y, :Z, :M)}) = (; xmin=ex.X[1], ymin=ex.Y[1], zmin=ex.Z[1], mmin=ex.M[1], xmax=ex.X[2], ymax=ex.Y[2], zmax=ex.Z[2], mmax=ex.M[2]) + +ArrowTypes.fromarrow(::Type{GeoFormatTypes.WellKnownBinary}, x) = GeoFormatTypes.WellKnownBinary(GeoFormatTypes.Geom(), x) +ArrowTypes.fromarrow(::Type{GeoFormatTypes.WellKnownText}, x) = GeoFormatTypes.WellKnownText(GeoFormatTypes.Geom(), x) + +function ArrowTypes.fromarrow(::Type{Geometry{X}}, x) where {X} + nt = nested_eltype(x) + D = length(nt.types) + T = nt.types[1] + return Geometry{X,D,T}(x) +end +ArrowTypes.fromarrow(::Type{Extents.Extent}, x) = Extents.Extent(X=(x.xmin, x.xmax), Y=(x.ymin, x.ymax)) + +nested_eltype(x) = nested_eltype(typeof(x)) +nested_eltype(::Type{T}) where {T<:AbstractArray} = nested_eltype(eltype(T)) +nested_eltype(::Type{T}) where {T} = T diff --git a/src/io.jl b/src/io.jl new file mode 100644 index 0000000..dedc0eb --- /dev/null +++ b/src/io.jl @@ -0,0 +1,25 @@ +""" + write(path, table; kwargs...) + +Write a geospatial table to a file. Like Arrow.write, but with geospatial metadata. +Any kwargs are passed to Arrow.write. +""" +function write(path, t; kwargs...) + projjson = "" + crs = Dict("crs" => projjson) + colmetadata = + Dict(:geometry => ["ARROW:extension:metadata" => JSON3.write(crs)]) + Arrow.write(path, t; colmetadata, kwargs...) +end + +""" + read(path; kwargs...) + +Read a geospatial table from a file. Like Arrow.Table, but with geospatial metadata. +Any kwargs are passed to Arrow.Table. +""" +function read(path; kwargs...) + t = Arrow.Table(path; kwargs...) + meta = Arrow.getmetadata(t) + return t +end diff --git a/src/type.jl b/src/type.jl new file mode 100644 index 0000000..3419554 --- /dev/null +++ b/src/type.jl @@ -0,0 +1,25 @@ +struct Geometry{X,D,T,G} + geom::G +end +Base.show(io::IO, x::Geometry{X,D,T}) where {X,D,T} = print(io, "$X geometry in $(D)D with eltype $T") +Geometry{X,D,T}(x) where {X,D,T} = Geometry{X,D,T,typeof(x)}(x) +Geometry{PointTrait}(x::Vararg{T,D}) where {T,D} = Geometry{PointTrait,D,T}(reinterpret(NTuple{D,T}, x)) +Geometry{PointTrait,D,T}(x, y, z) where {T,D} = Geometry{PointTrait,D,T}((x, y, z)) +Geometry{PointTrait,D,T}(x, y) where {T,D} = Geometry{PointTrait,D,T}((x, y)) + +Base.getindex(x::Geometry{X,D,T}, i) where {X,D,T} = Geometry{childtrait(X()),D,T}(Base.getindex(x.geom, i)) +Base.getindex(x::Geometry{PointTrait,D,T}, i) where {D,T} = Base.getindex(x.geom, i) + +GeoInterface.isgeometry(::Type{<:Geometry}) = true +GeoInterface.ncoord(_, ::Geometry{X,D}) where {X,D} = D +GeoInterface.getcoord(::PointTrait, g::Geometry, i) = Base.getindex(g.geom, i) +GeoInterface.geomtrait(::Geometry{X}) where {X} = X() +GeoInterface.ngeom(_, g::Geometry) = length(g.geom) +GeoInterface.getgeom(_, g::Geometry, i) = Base.getindex(g, i) + +childtrait(::LineStringTrait) = PointTrait +childtrait(::LinearRingTrait) = PointTrait +childtrait(::PolygonTrait) = LinearRingTrait +childtrait(::MultiPointTrait) = PointTrait +childtrait(::MultiLineStringTrait) = LineStringTrait +childtrait(::MultiPolygonTrait) = PolygonTrait diff --git a/test/links.txt b/test/links.txt new file mode 100644 index 0000000..83135cb --- /dev/null +++ b/test/links.txt @@ -0,0 +1,36 @@ +https://raw.githubusercontent.com/geopandas/geopandas/refs/heads/main/geopandas/io/tests/data/geoarrow/example-point.arrow +https://raw.githubusercontent.com/geopandas/geopandas/refs/heads/main/geopandas/io/tests/data/geoarrow/example-point-interleaved.arrow +https://raw.githubusercontent.com/geopandas/geopandas/refs/heads/main/geopandas/io/tests/data/geoarrow/example-point-wkb.arrow +https://raw.githubusercontent.com/geopandas/geopandas/refs/heads/main/geopandas/io/tests/data/geoarrow/example-linestring.arrow +https://raw.githubusercontent.com/geopandas/geopandas/refs/heads/main/geopandas/io/tests/data/geoarrow/example-linestring-interleaved.arrow +https://raw.githubusercontent.com/geopandas/geopandas/refs/heads/main/geopandas/io/tests/data/geoarrow/example-linestring-wkb.arrow +https://raw.githubusercontent.com/geopandas/geopandas/refs/heads/main/geopandas/io/tests/data/geoarrow/example-polygon.arrow +https://raw.githubusercontent.com/geopandas/geopandas/refs/heads/main/geopandas/io/tests/data/geoarrow/example-polygon-interleaved.arrow +https://raw.githubusercontent.com/geopandas/geopandas/refs/heads/main/geopandas/io/tests/data/geoarrow/example-polygon-wkb.arrow +https://raw.githubusercontent.com/geopandas/geopandas/refs/heads/main/geopandas/io/tests/data/geoarrow/example-multipoint.arrow +https://raw.githubusercontent.com/geopandas/geopandas/refs/heads/main/geopandas/io/tests/data/geoarrow/example-multipoint-interleaved.arrow +https://raw.githubusercontent.com/geopandas/geopandas/refs/heads/main/geopandas/io/tests/data/geoarrow/example-multipoint-wkb.arrow +https://raw.githubusercontent.com/geopandas/geopandas/refs/heads/main/geopandas/io/tests/data/geoarrow/example-multilinestring.arrow +https://raw.githubusercontent.com/geopandas/geopandas/refs/heads/main/geopandas/io/tests/data/geoarrow/example-multilinestring-interleaved.arrow +https://raw.githubusercontent.com/geopandas/geopandas/refs/heads/main/geopandas/io/tests/data/geoarrow/example-multilinestring-wkb.arrow +https://raw.githubusercontent.com/geopandas/geopandas/refs/heads/main/geopandas/io/tests/data/geoarrow/example-multipolygon.arrow +https://raw.githubusercontent.com/geopandas/geopandas/refs/heads/main/geopandas/io/tests/data/geoarrow/example-multipolygon-interleaved.arrow +https://raw.githubusercontent.com/geopandas/geopandas/refs/heads/main/geopandas/io/tests/data/geoarrow/example-multipolygon-wkb.arrow +https://raw.githubusercontent.com/geopandas/geopandas/refs/heads/main/geopandas/io/tests/data/geoarrow/example-point_z.arrow +https://raw.githubusercontent.com/geopandas/geopandas/refs/heads/main/geopandas/io/tests/data/geoarrow/example-point_z-interleaved.arrow +https://raw.githubusercontent.com/geopandas/geopandas/refs/heads/main/geopandas/io/tests/data/geoarrow/example-point_z-wkb.arrow +https://raw.githubusercontent.com/geopandas/geopandas/refs/heads/main/geopandas/io/tests/data/geoarrow/example-linestring_z.arrow +https://raw.githubusercontent.com/geopandas/geopandas/refs/heads/main/geopandas/io/tests/data/geoarrow/example-linestring_z-interleaved.arrow +https://raw.githubusercontent.com/geopandas/geopandas/refs/heads/main/geopandas/io/tests/data/geoarrow/example-linestring_z-wkb.arrow +https://raw.githubusercontent.com/geopandas/geopandas/refs/heads/main/geopandas/io/tests/data/geoarrow/example-polygon_z.arrow +https://raw.githubusercontent.com/geopandas/geopandas/refs/heads/main/geopandas/io/tests/data/geoarrow/example-polygon_z-interleaved.arrow +https://raw.githubusercontent.com/geopandas/geopandas/refs/heads/main/geopandas/io/tests/data/geoarrow/example-polygon_z-wkb.arrow +https://raw.githubusercontent.com/geopandas/geopandas/refs/heads/main/geopandas/io/tests/data/geoarrow/example-multipoint_z.arrow +https://raw.githubusercontent.com/geopandas/geopandas/refs/heads/main/geopandas/io/tests/data/geoarrow/example-multipoint_z-interleaved.arrow +https://raw.githubusercontent.com/geopandas/geopandas/refs/heads/main/geopandas/io/tests/data/geoarrow/example-multipoint_z-wkb.arrow +https://raw.githubusercontent.com/geopandas/geopandas/refs/heads/main/geopandas/io/tests/data/geoarrow/example-multilinestring_z.arrow +https://raw.githubusercontent.com/geopandas/geopandas/refs/heads/main/geopandas/io/tests/data/geoarrow/example-multilinestring_z-interleaved.arrow +https://raw.githubusercontent.com/geopandas/geopandas/refs/heads/main/geopandas/io/tests/data/geoarrow/example-multilinestring_z-wkb.arrow +https://raw.githubusercontent.com/geopandas/geopandas/refs/heads/main/geopandas/io/tests/data/geoarrow/example-multipolygon_z.arrow +https://raw.githubusercontent.com/geopandas/geopandas/refs/heads/main/geopandas/io/tests/data/geoarrow/example-multipolygon_z-interleaved.arrow +https://raw.githubusercontent.com/geopandas/geopandas/refs/heads/main/geopandas/io/tests/data/geoarrow/example-multipolygon_z-wkb.arrow diff --git a/test/runtests.jl b/test/runtests.jl index 3889705..2f29804 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,6 +1,56 @@ using GeoArrow +using Arrow +using GeoInterface +using Downloads using Test +# ENV["JULIA_CONDAPKG_OFFLINE"] = true +ENV["JULIA_CONDAPKG_ENV"] = joinpath(@__DIR__, ".cpenv") +using PythonCall +# ga = pyimport("geoarrow.pyarrow") +feather = pyimport("pyarrow.feather") @testset "GeoArrow.jl" begin - # Write your tests here. + @testset "Test datasets" begin + # Data taken from the geopandas tests, courtesy of Joris Van den Bossche + for url in readlines("links.txt") + fn = joinpath("data", split(url, "/")[end]) + isfile(fn) && continue + try + @info "Downloading $fn" + Downloads.download(url, fn) + catch + @warn "Failed to download $fn" + end + end + + for arrowfn in filter(endswith("arrow"), readdir("data", join=true)) + @testset "$arrowfn" begin + t = Arrow.Table(arrowfn) + geom = t.geometry[1] + @test GeoInterface.isgeometry(geom) + @test GeoInterface.geomtrait(geom) isa GeoInterface.AbstractGeometryTrait + @test GeoInterface.ncoord(geom) in [2, 3] + @test GeoInterface.testgeometry(geom) + + io = IOBuffer() + GeoArrow.write(io, t; compress=:zstd) + seekstart(io) + nt = GeoArrow.read(io, convert=true) + ngeom = t.geometry[1] + @test GeoInterface.isgeometry(geom) + + @test ngeom == geom + end + end + end + @testset "Python" begin + for arrowfn in filter(endswith("arrow"), readdir("data", join=true)) + @testset "$arrowfn" begin + t = Arrow.Table(arrowfn) + fn = joinpath("data/write", basename(arrowfn)) + GeoArrow.write(fn, t) + pt = feather.read_table(fn) + end + end + end end