# -*- coding: utf-8; mode: tcl; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- vim:fenc=utf-8:ft=tcl:et:sw=4:ts=4:sts=4

PortSystem          1.0
PortGroup           cmake 1.1
PortGroup           conflicts_build 1.0
PortGroup           legacysupport 1.1
PortGroup           boost 1.0
PortGroup           active_variants 1.1

# clock_gettime needed for abseil
# https://github.com/macports/macports-ports/pull/19905#issuecomment-1680281240
legacysupport.newest_darwin_requires_legacy 15

name                apache-arrow
version             21.0.0

categories          devel
license             Apache-2
maintainers         nomaintainer

description         Powering In-Memory Analytics

long_description    Apache Arrow is a development platform for in-memory \
                    analytics. It contains a set of technologies that \
                    enable big data systems to process and move data fast.

boost.version       1.81

# Build instructions: https://github.com/apache/arrow/blob/master/docs/source/developers/python.rst#build-and-test
cmake.generator     Ninja
cmake.build_type    Release
set cmake_build_type \
                    [string tolower ${cmake.build_type}]

if {${subport} eq ${name}} {
    PortGroup           github 1.0

    github.setup        apache arrow ${version} ${name}-
    github.tarball_from archive
    revision            0

    checksums           rmd160  fb3f84eafb150c5199006fe782b65a5812dcbd6f \
                        sha256  e92401790fdba33bfb4b8aa522626d800ea7fda4b6f036aaf39849927d2cf88d \
                        size    17241418

    compiler.cxx_standard \
                        2017

    universal_variant   no

    patchfiles          patch-cpp-src-parquet-size_statistics.cc.diff

    cmake.source_dir    ${worksrcpath}/cpp

    # LLVM only needed for Gandiva: https://github.com/apache/arrow/issues/34999
    # I.e. it is here for no reason: Gandiva is not built by default and not asked to be built in configure.args.
    # At least remove a dependency on LLVM for PPC, where it is broken.
    # If Gandiva is added to the build, please add it inside the clause below.
    if {${build_arch} ni [list ppc ppc64]} {
        set llvm_version    16
        set llvm_prefix ${prefix}/libexec/llvm-${llvm_version}
        set llvm_bin    ${llvm_prefix}/bin

        cmake.module_path-append \
                        ${llvm_prefix}/lib/cmake/llvm
        depends_lib-append \
                        port:llvm-${llvm_version}
        configure.args-append \
                        -DLLVM_ROOT:PATH=${llvm_prefix}
    }

    cmake.module_path-append \
                        ${prefix}/lib/cmake/grpc/modules

    depends_build-append \
                        port:pkgconfig

    depends_lib-append \
                        port:abseil \
                        path:lib/libssl.dylib:openssl \
                        port:brotli \
                        port:bzip2 \
                        port:c-ares \
                        port:gmake \
                        port:grpc \
                        path:lib/libglog.dylib:google-glog \
                        port:libutf8proc \
                        port:lz4 \
                        port:ninja \
                        port:protobuf3-cpp \
                        path:lib/pkgconfig/RapidJSON.pc:rapidjson \
                        port:re2 \
                        port:snappy \
                        port:thrift \
                        port:wget \
                        port:zlib \
                        port:zstd

    # https://trac.macports.org/ticket/67951
    conflicts_build     boost

    # Remove incorrect make dependency on SDK that cmake adds on some systems
    # See https://github.com/grpc/grpc/issues/24902
    post-configure {
        set macos_sdk [file tail ${configure.sdkroot}]

        set cmakefiles_dirs {}
        fs-traverse f ${cmake.build_dir} {
            if { [file isdirectory ${f}]
                 && [string match "CMakeFiles" [file tail ${f}]] } {
                append cmakefiles_dirs " " ${f}
            }
        }
        foreach d ${cmakefiles_dirs} {
            foreach f [glob -nocomplain -type f ${d}/*.dir/*.make] {
                reinplace -q -E "/:\[\[:space:]]+\\/\[^\[:space:]]+\\/${macos_sdk}\\/\[^\[:space:]]+\\/CoreFoundation\\.framework/d" ${f}
            }
            foreach f [glob -nocomplain -type f ${d}/*.dir/link.txt] {
                reinplace -q -E "s|(\[\[:space:]]+)/\[^\[:space:]]+/${macos_sdk}/\[^\[:space:]]+/CoreFoundation\.framework\[\[:space:]]*| |g" ${f}
            }
        }
    }
}

# used for both apache-arrow and py*-pyarrow

# macOS wheel:
# https://github.com/apache/arrow/blob/main/ci/scripts/python_wheel_macos_build.sh
# Re rpath see: https://arrow.apache.org/docs/r/articles/developers/setup.html#rpath-issues
# Also: https://github.com/apache/arrow/issues/35045,
# https://github.com/apache/arrow/issues/37010

configure.args-append \
                    -DARROW_ACERO:BOOL=OFF \
                    -DARROW_BUILD_SHARED:BOOL=ON \
                    -DARROW_BUILD_STATIC:BOOL=OFF \
                    -DARROW_COMPUTE:BOOL=ON \
                    -DARROW_CSV:BOOL=ON \
                    -DARROW_DATASET:BOOL=ON \
                    -DARROW_DEPENDENCY_USE_SHARED:BOOL=ON \
                    -DARROW_EXTRA_ERROR_CONTEXT:BOOL=ON \
                    -DARROW_FILESYSTEM:BOOL=ON \
                    -DARROW_FLIGHT:BOOL=ON \
                    -DARROW_GCS:BOOL=ON \
                    -DARROW_HDFS:BOOL=ON \
                    -DARROW_INSTALL_NAME_RPATH:BOOL=OFF \
                    -DARROW_JEMALLOC:BOOL=OFF \
                    -DARROW_JSON:BOOL=ON \
                    -DARROW_MIMALLOC:BOOL=ON \
                    -DARROW_ORC:BOOL=ON \
                    -DARROW_PACKAGE_KIND="python-wheel-macos" \
                    -DARROW_PARQUET:BOOL=ON \
                    -DARROW_PLASMA:BOOL=ON \
                    -DARROW_PROTOBUF_USE_SHARED:BOOL=ON \
                    -DARROW_PYTHON:BOOL=OFF \
                    -DARROW_RPATH_ORIGIN:BOOL=ON \
                    -DARROW_S3:BOOL=OFF \
                    -DARROW_SUBSTRAIT:BOOL=ON \
                    -DARROW_USE_CCACHE:BOOL=ON \
                    -DARROW_TENSORFLOW:BOOL=OFF \
                    -DARROW_UTF8PROC_USE_SHARED:BOOL=ON \
                    -DARROW_WITH_BROTLI:BOOL=ON \
                    -DARROW_WITH_BZ2:BOOL=ON \
                    -DARROW_WITH_LZ4:BOOL=ON \
                    -DARROW_WITH_SNAPPY:BOOL=ON \
                    -DARROW_WITH_ZLIB:BOOL=ON \
                    -DARROW_WITH_ZSTD:BOOL=ON \
                    -Dc-ares_INCLUDE_DIR:PATH=${prefix}/include \
                    -DgRPC_ROOT:PATH=${prefix} \
                    -DRE2_SOURCE:STRING=SYSTEM

if {${os.arch} eq "arm" && ${os.platform} eq "darwin"} {
    configure.args-append \
                    -DCMAKE_APPLE_SILICON_PROCESSOR=arm64
}

if {${os.platform} eq "darwin" && ${os.major} < 12} {
    # arrow downloads mimalloc version which is broken for < 10.8 due to missing MACH_TASK_BASIC_INFO_COUNT etc.
    configure.args-replace \
                    -DARROW_MIMALLOC:BOOL=ON -DARROW_MIMALLOC:BOOL=OFF
}

# Build auto-detects cache if it is installed and a part of it attempts
# to write to CCACHE_DIR which is not allow if configure.ccache=off.
# So have to set CCACHE_DIR to the build area to avoid this...
if {![option configure.ccache]} {
    configure.env-append   CCACHE_DIR=${workpath}/.ccache
    build.env-append       CCACHE_DIR=${workpath}/.ccache
    destroot.env-append    CCACHE_DIR=${workpath}/.ccache
    configure.args-replace -DARROW_USE_CCACHE:BOOL=ON \
                           -DARROW_USE_CCACHE:BOOL=OFF
}

destroot.target     install

if {${name} eq ${subport}} {
    configure.args-append \
                    -DARROW_BUILD_UTILITIES:BOOL=ON

    variant tensorflow description {Build with tensorflow support enabled} {
        configure.args-replace \
                    -DARROW_TENSORFLOW:BOOL=OFF \
                    -DARROW_TENSORFLOW:BOOL=ON
    }
}

# lib[^[:space:]]+
proc change_arrow_rpath { binname libname_re } {
    global prefix python.pkgd
    foreach path [exec bash -c \
        "otool -L \"${binname}\" | grep -E -o -e '@rpath/${libname_re}' || true"] {
        set newpath [strsed ${path} "s|@rpath|${python.pkgd}/pyarrow|"]
        if {[file tail ${binname}] eq [file tail ${path}]} {
            system "install_name_tool -id \"${newpath}\" \"${binname}\""
        } else {
            system "install_name_tool -change \"${path}\" \"${newpath}\" \"${binname}\""
        }
    }
}

# create Python subports
set python_versions [list 39 310 311 312 313]
foreach v ${python_versions} {
    subport py${v}-pyarrow {
        set python.version  ${v}
    }
}

# Python bindings for supported Python versions
if {[string match "py*" ${subport}]} {
    PortGroup       python 1.0

    python.rootname pyarrow
    revision        0

    categories-prepend \
                    devel

    python.versions {*}${python_versions}

    checksums       rmd160  b62c83584820bd6b9f45bd37a666e8c83ed79d53 \
                    sha256  5051f2dccf0e283ff56335760cbc8622cf52264d67e359d5569541ac11b6d5bc \
                    size    1133487

    depends_build-append \
                    port:bash \
                    port:cctools \
                    port:py${python.version}-pip \
                    port:py${python.version}-setuptools_scm

    depends_lib-append \
                    port:apache-arrow \
                    port:py${python.version}-brotli \
                    port:py${python.version}-cython \
                    port:py${python.version}-pycares \
                    port:py${python.version}-numpy

    depends_test-append \
                    port:py${python.version}-cffi \
                    port:py${python.version}-hypothesis \
                    port:py${python.version}-pandas \
                    port:py${python.version}-pytest \
                    port:py${python.version}-pytest-lazy-fixture \
                    port:py${python.version}-tz

    # the build must link against its own libraries
    cmake.install_rpath

    use_configure   yes

    # install apache-arrow for this ${python.branch} in ${configure_destdir}
    set configure_destdir ${workpath}/${name}-DESTDIR
    post-extract {
        # see portdestroot::destroot_start
        set mtree [findBinary mtree ${portutil::autoconf::mtree_path}]
        file mkdir "${configure_destdir}"
        if { ${os.platform} eq "darwin" } {
            system -W ${configure_destdir} "${mtree} -e -U -f [file join ${portsharepath} install macosx.mtree]"
            file mkdir "${configure_destdir}${applications_dir}"
            file mkdir "${configure_destdir}${frameworks_dir}"
        }
        file mkdir "${configure_destdir}${prefix}"
        system -W ${configure_destdir}${prefix} "${mtree} -e -U -f [file join ${portsharepath} install prefix.mtree]"
    }
    post-configure {
        system -W ${configure.dir} \
                    "ninja all -j${build.jobs} -v"
        system -W ${configure.dir} \
                    "export DESTDIR=${configure_destdir}; \
                    ninja install"
    }

    cmake.module_path-prepend \
                    ${configure_destdir}${prefix}/lib/cmake

    compiler.library_path \
                    "${configure_destdir}${prefix}/lib:${prefix}/lib"
    configure.ldflags-prepend \
                    -L${configure_destdir}${prefix}/lib

    configure.pre_args-delete \
                    --prefix=${prefix}
    configure.pre_args-replace \
                    {-DCMAKE_C_COMPILER="$CC"} \
                    -DCMAKE_C_COMPILER:FILEPATH=${configure.cc}
    configure.pre_args-replace \
                    {-DCMAKE_CXX_COMPILER="$CXX"} \
                    -DCMAKE_CXX_COMPILER:FILEPATH=${configure.cxx}
    configure.pre_args-append \
                    {-DARROW_SIMD_LEVEL="SSE4_2"}
    if {${cmake.generator} ne {Ninja}} {
        configure.pre_args-replace \
                    "-DCMAKE_MAKE_PROGRAM=${python.bin} setup.py --no-user-cfg" \
                    -DCMAKE_MAKE_PROGRAM:STRING=[portbuild::build_getmaketype]
    }

    # macOS wheel:
    # https://github.com/apache/arrow/blob/main/ci/scripts/python_wheel_macos_build.sh
    configure.args-replace \
                    -DARROW_PYTHON:BOOL=OFF \
                    -DARROW_PYTHON:BOOL=ON
    configure.args-replace \
                    -DARROW_RPATH_ORIGIN:BOOL=OFF \
                    -DARROW_RPATH_ORIGIN:BOOL=ON
    configure.args-append \
                    -DPython3_EXECUTABLE:FILEPATH=${python.bin} \
                    -Dre2_DIR:PATH=${prefix}/lib/cmake/grpc/modules

    build.env-append \
                    "_PYTHON_HOST_PLATFORM=macosx-${macosx_deployment_target}-${build_arch}" \
                    "PKG_CONFIG_PATH=/usr/lib/pkgconfig:${build.dir}/install/lib/pkgconfig" \
                    PYARROW_BOOST_USE_SHARED=1 \
                    PYARROW_BUILD_TYPE=${cmake_build_type} \
                    PYARROW_BUNDLE_ARROW_CPP=1 \
                    PYARROW_CMAKE_OPTIONS=[exec echo {*}${configure.pre_args}] \
                    PYARROW_INSTALL_TESTS=1 \
                    PYARROW_CMAKE_GENERATOR=${cmake.generator} \
                    PYARROW_WITH_DATASET=1 \
                    PYARROW_WITH_FLIGHT=1 \
                    PYARROW_WITH_HDFS=1 \
                    PYARROW_WITH_ORC=1 \
                    PYARROW_WITH_PARQUET=1 \
                    PYARROW_WITH_PLASMA=1 \
                    PYARROW_WITH_S3=0 \
                    PYARROW_WITH_TENSORFLOW=0

    build.dir       ${worksrcpath}
    build.cmd       ${python.bin} setup.py --no-user-cfg
    build.target    bdist_wheel
    build.post_args

    pre-destroot {
        foreach f [glob ${build.dir}/dist/pyarrow-${version}*.whl] {
            destroot.args-append \
                    ${f}
        }
    }

    destroot.cmd    pip-${python.branch}
    destroot.args   --ignore-installed \
                    --no-cache-dir \
                    --no-dependencies \
                    --root ${destroot}
    destroot.post_args

    post-destroot {
        fs-traverse f ${destroot}${python.pkgd}/pyarrow {
            if {[file isfile ${f}]
                && [regexp {\.(so|dylib)$} ${f}]} {
                change_arrow_rpath ${f} {lib[^[:space:]]+}
            }
        }
    }

    post-destroot {
        set docdir ${prefix}/share/doc/${subport}
        xinstall -d ${destroot}${docdir}
        xinstall -m 0644 -W ${worksrcpath} README.md ${destroot}${docdir}
        foreach f [glob -nocomplain ${destroot}${prefix}/bin/*] {
            move ${f} ${f}-${python.branch}
        }
    }

    test.run        yes
    test.env-append \
                    ARROW_TEST_DATA=${worksrcpath}/testing/data \
                    PARQUET_TEST_DATA=${worksrcpath}/cpp/submodules/parquet-testing/data
    test.target

    # Currently tensorflow is broken see Portfile
    # https://github.com/macports/macports-ports/blob/2fb2ee24cba7359d7c78aa9d88bebaadfa00b66b/python/py-tensorflow/Portfile#L12
    #     variant tensorflow description {Build with tensorflow support enabled} {
    #         require_active_variants apache-arrow tensorflow
    #
    #         build.env-replace \
    #                     PYARROW_WITH_TENSORFLOW=0 \
    #                     PYARROW_WITH_TENSORFLOW=1
    #
    #         # py-tensorflow-macos requires minimum macOS 12.0 and Python 3.8
    #         if {${os.major} >= 21 && ${python.version} >= 38} {
    #             depends_lib-append \
    #                     path:${python.pkgd}/tensorflow:py${python.version}-tensorflow-macos
    #         } else {
    #             depends_lib-append \
    #                     path:${python.pkgd}/tensorflow:py${python.version}-tensorflow
    #         }
    #     }

    # if { ![variant_isset tensorflow] } {
    #     notes-append "\
    # - ${subport} is now built with tensorflow support disabled by default.\
    # To enable it, install the port with '+tensorflow'.\
    # "
    # }

    livecheck.type      none
} else {
    github.livecheck.regex {([0-9.]+)}
}
