diff --git a/Doc/ChangeLog b/Doc/ChangeLog index 80964cba2d..4abd225652 100644 --- a/Doc/ChangeLog +++ b/Doc/ChangeLog @@ -1,3 +1,10 @@ +Jan 21, 2026, version 10.3.1 + + * (67) bug fix: incorrect JIT kernel constructed for R=masker(C,M,Z) + when R is hypersparse. Found by Roi Lipman, FalkorDB. + * (66) bug fix: GB_bitonic did not compile with the MS cl compiler. + Found by Erik Welch, NVIDIA. + Dec 3, 2025, version 10.3.0 * GrB_extract: performance improvement for C=A(I,J) when A is very large diff --git a/Doc/GraphBLAS_UserGuide.pdf b/Doc/GraphBLAS_UserGuide.pdf index cd01ef2d65..14bc183482 100644 Binary files a/Doc/GraphBLAS_UserGuide.pdf and b/Doc/GraphBLAS_UserGuide.pdf differ diff --git a/Doc/GraphBLAS_version.tex b/Doc/GraphBLAS_version.tex index 0baf6d5321..267f6f81f7 100644 --- a/Doc/GraphBLAS_version.tex +++ b/Doc/GraphBLAS_version.tex @@ -1,5 +1,5 @@ % version of SuiteSparse:GraphBLAS \date{VERSION -10.3.0, -Dec 3, 2025} +10.3.1, +Jan 21, 2026} diff --git a/Doc/UserGuide/GrB_release.tex b/Doc/UserGuide/GrB_release.tex index 976d3abbb6..7dcd6c0a1e 100644 --- a/Doc/UserGuide/GrB_release.tex +++ b/Doc/UserGuide/GrB_release.tex @@ -5,6 +5,15 @@ \section{Release Notes} \begin{itemize} +\item Jan 21, 2026: version 10.3.1 + + \begin{itemize} + \item (67) bug fix: incorrect JIT kernel constructed for R=masker(C,M,Z) + when R is hypersparse. Found by Roi Lipman, FalkorDB. + \item (66) bug fix: \verb'GB_bitonic' did not compile with the MS cl + compiler. Found by Erik Welch, NVIDIA. + \end{itemize} + \item Dec 3, 2025: version 10.3.0 \begin{itemize} diff --git a/Include/GraphBLAS.h b/Include/GraphBLAS.h index cacf58ad11..86ab614231 100644 --- a/Include/GraphBLAS.h +++ b/Include/GraphBLAS.h @@ -1,4 +1,4 @@ -// SuiteSparse:GraphBLAS 10.3.0 +// SuiteSparse:GraphBLAS 10.3.1 //------------------------------------------------------------------------------ // GraphBLAS.h: definitions for the GraphBLAS package //------------------------------------------------------------------------------ @@ -286,10 +286,10 @@ // The version of this implementation, and the GraphBLAS API version: #define GxB_IMPLEMENTATION_NAME "SuiteSparse:GraphBLAS" -#define GxB_IMPLEMENTATION_DATE "Dec 3, 2025" +#define GxB_IMPLEMENTATION_DATE "Jan 21, 2026" #define GxB_IMPLEMENTATION_MAJOR 10 #define GxB_IMPLEMENTATION_MINOR 3 -#define GxB_IMPLEMENTATION_SUB 0 +#define GxB_IMPLEMENTATION_SUB 1 #define GxB_SPEC_DATE "Dec 22, 2023" #define GxB_SPEC_MAJOR 2 #define GxB_SPEC_MINOR 1 diff --git a/README.md b/README.md index be849071b3..1d416dd328 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2025, All Rights Reserved. SPDX-License-Identifier: Apache-2.0 -VERSION 10.3.0, Dec 3, 2025 +VERSION 10.3.1, Jan 21, 2026 SuiteSparse:GraphBLAS is a complete implementation of the GraphBLAS standard, which defines a set of sparse matrix operations on an extended algebra of diff --git a/Source/global/GB_Global.c b/Source/global/GB_Global.c index c2a010509c..fc7e0a237c 100644 --- a/Source/global/GB_Global.c +++ b/Source/global/GB_Global.c @@ -253,7 +253,7 @@ static GB_Global_struct GB_Global = .gpu_count = 0, // # of GPUs in the system // OpenMP locks - .lock_is_created = {0, 0, 0, 0}, + .lock_is_created = {0, 0, 0, 0, 0, 0, 0, 0}, // of size GB_GLOBAL_NLOCKS } ; //============================================================================== diff --git a/Source/jit_kernels/include/GB_jit_kernel_proto.h b/Source/jit_kernels/include/GB_jit_kernel_proto.h index 5bdcb65789..cad1dc0cd7 100644 --- a/Source/jit_kernels/include/GB_jit_kernel_proto.h +++ b/Source/jit_kernels/include/GB_jit_kernel_proto.h @@ -727,7 +727,12 @@ GrB_Info GB_jit_kernel_kroner \ ( \ GrB_Matrix C, \ const GrB_Matrix A, \ + const bool A_transpose, \ const GrB_Matrix B, \ + const bool B_transpose, \ + const GrB_Matrix Mask, \ + const bool Mask_struct, \ + const bool Mask_comp, \ const int nthreads, \ const void *theta, \ const GB_callback_struct *restrict my_callback \ diff --git a/Source/jit_wrappers/GB_kroner_jit.c b/Source/jit_wrappers/GB_kroner_jit.c index 174ec0e37e..1919b47f7d 100644 --- a/Source/jit_wrappers/GB_kroner_jit.c +++ b/Source/jit_wrappers/GB_kroner_jit.c @@ -20,7 +20,12 @@ GrB_Info GB_kroner_jit const GrB_BinaryOp binaryop, const bool flipij, const GrB_Matrix A, + const bool A_transpose, const GrB_Matrix B, + const bool B_transpose, + const GrB_Matrix Mask, + const bool Mask_struct, + const bool Mask_comp, const int nthreads ) { @@ -41,7 +46,7 @@ GrB_Info GB_kroner_jit GB_JIT_KERNEL_KRONER, /* is_ewisemult: */ false, /* C_iso: */ C->iso, /* C_in_iso: */ false, C_sparsity, C->type, C->p_is_32, C->j_is_32, C->i_is_32, - /* M: */ NULL, true, false, binaryop, flipij, false, A, B) ; + /* M: */ Mask, Mask_struct, Mask_comp, binaryop, flipij, false, A, B) ; //-------------------------------------------------------------------------- // get the kernel function pointer, loading or compiling it if needed @@ -60,6 +65,6 @@ GrB_Info GB_kroner_jit #include "include/GB_pedantic_disable.h" GB_jit_dl_function GB_jit_kernel = (GB_jit_dl_function) dl_function ; - return (GB_jit_kernel (C, A, B, nthreads, binaryop->theta, &GB_callback)) ; + return (GB_jit_kernel (C, A, A_transpose, B, B_transpose, Mask, Mask_struct, Mask_comp, nthreads, binaryop->theta, &GB_callback)) ; } diff --git a/Source/jit_wrappers/GB_masker_phase1_jit.c b/Source/jit_wrappers/GB_masker_phase1_jit.c index f43fe6b915..410939a585 100644 --- a/Source/jit_wrappers/GB_masker_phase1_jit.c +++ b/Source/jit_wrappers/GB_masker_phase1_jit.c @@ -29,6 +29,7 @@ GrB_Info GB_masker_phase1_jit // count nnz in each R(:,j) const int64_t *restrict R_to_Z, const bool Rp_is_32, // if true, Rp is 32-bit; else 64-bit const bool Rj_is_32, // if true, Rh is 32-bit; else 64-bit + const int R_sparsity, // GxB_SPARSE or GxB_HYPERSPARSE // original input: const GrB_Matrix M, // required mask const bool Mask_comp, // if true, then M is complemented @@ -45,7 +46,8 @@ GrB_Info GB_masker_phase1_jit // count nnz in each R(:,j) GB_jit_encoding encoding ; char *suffix ; uint64_t hash = GB_encodify_masker (&encoding, &suffix, - GB_JIT_KERNEL_MASKER_PHASE1, NULL, Rp_is_32, Rj_is_32, false, + GB_JIT_KERNEL_MASKER_PHASE1, R_sparsity, /* rtype: */ NULL, + Rp_is_32, Rj_is_32, /* Ri is not accessed: */ false, M, Mask_struct, Mask_comp, C, Z) ; //-------------------------------------------------------------------------- diff --git a/Source/jit_wrappers/GB_masker_phase2_jit.c b/Source/jit_wrappers/GB_masker_phase2_jit.c index c014467352..7897a49ef9 100644 --- a/Source/jit_wrappers/GB_masker_phase2_jit.c +++ b/Source/jit_wrappers/GB_masker_phase2_jit.c @@ -23,6 +23,7 @@ GrB_Info GB_masker_phase2_jit // phase2 for R = masker (C,M,Z) const int64_t *restrict R_to_M, const int64_t *restrict R_to_C, const int64_t *restrict R_to_Z, + const int R_sparsity, // any sparsity format // original input: const GrB_Matrix M, // required mask const bool Mask_comp, // if true, then M is complemented @@ -45,7 +46,8 @@ GrB_Info GB_masker_phase2_jit // phase2 for R = masker (C,M,Z) GB_jit_encoding encoding ; char *suffix ; uint64_t hash = GB_encodify_masker (&encoding, &suffix, - GB_JIT_KERNEL_MASKER_PHASE2, R, R->p_is_32, R->j_is_32, R->i_is_32, + GB_JIT_KERNEL_MASKER_PHASE2, R_sparsity, R->type, + R->p_is_32, R->j_is_32, R->i_is_32, M, Mask_struct, Mask_comp, C, Z) ; //-------------------------------------------------------------------------- diff --git a/Source/jitifyer/GB_encodify_masker.c b/Source/jitifyer/GB_encodify_masker.c index f1588eca6c..628f881b48 100644 --- a/Source/jitifyer/GB_encodify_masker.c +++ b/Source/jitifyer/GB_encodify_masker.c @@ -20,7 +20,8 @@ uint64_t GB_encodify_masker // encode a masker problem char **suffix, // suffix for user-defined kernel // input: const GB_jit_kcode kcode, // kernel to encode - const GrB_Matrix R, // may be NULL, for phase1 + const int R_sparsity, // any sparsity format + const GrB_Type rtype, const bool Rp_is_32, // if true, R->p is 32 bit; else 64 bit const bool Rj_is_32, // if true, R->h is 32 bit; else 64 bit const bool Ri_is_32, // if true, R->i is 32 bit; else 64 bit @@ -33,11 +34,10 @@ uint64_t GB_encodify_masker // encode a masker problem { //-------------------------------------------------------------------------- - // check if the R->type is JIT'able + // check if the rtype is JIT'able //-------------------------------------------------------------------------- - GrB_Type rtype = (R == NULL) ? NULL : R->type ; - if (R != NULL && rtype->hash == UINT64_MAX) + if (rtype != NULL && rtype->hash == UINT64_MAX) { // cannot JIT this type memset (encoding, 0, sizeof (GB_jit_encoding)) ; @@ -50,7 +50,8 @@ uint64_t GB_encodify_masker // encode a masker problem //-------------------------------------------------------------------------- GB_encodify_kcode (encoding, kcode) ; - GB_enumify_masker (&encoding->code, R, Rp_is_32, Rj_is_32, Ri_is_32, + GB_enumify_masker (&encoding->code, R_sparsity, rtype, + Rp_is_32, Rj_is_32, Ri_is_32, M, Mask_struct, Mask_comp, C, Z) ; //-------------------------------------------------------------------------- diff --git a/Source/jitifyer/GB_enumify_masker.c b/Source/jitifyer/GB_enumify_masker.c index 2f87cce1bd..b6bbff0dca 100644 --- a/Source/jitifyer/GB_enumify_masker.c +++ b/Source/jitifyer/GB_enumify_masker.c @@ -15,7 +15,8 @@ void GB_enumify_masker // enumify a masker problem // output: uint64_t *method_code, // unique encoding of the entire operation // input: - const GrB_Matrix R, // NULL for phase 1 + const int R_sparsity, // any sparsity format + const GrB_Type rtype, // the type of R (NULL for phase1) const bool Rp_is_32, // if true, R->p is 32-bit; else 64-bit const bool Rj_is_32, // if true, R->h is 32-bit; else 64-bit const bool Ri_is_32, // if true, R->i is 32-bit; else 64-bit @@ -28,12 +29,11 @@ void GB_enumify_masker // enumify a masker problem { //-------------------------------------------------------------------------- - // get the types of R, C, and Z + // check inputs //-------------------------------------------------------------------------- - GrB_Type rtype = (R == NULL) ? NULL : R->type ; - ASSERT (GB_IMPLIES (R != NULL, rtype == C->type)) ; - ASSERT (GB_IMPLIES (R != NULL, rtype == Z->type)) ; + ASSERT (GB_IMPLIES (rtype != NULL, rtype == C->type)) ; + ASSERT (GB_IMPLIES (rtype != NULL, rtype == Z->type)) ; //-------------------------------------------------------------------------- // enumify the types @@ -55,7 +55,6 @@ void GB_enumify_masker // enumify a masker problem // enumify the sparsity structures of R, C, M, and Z //-------------------------------------------------------------------------- - int R_sparsity = GB_sparsity (R) ; int C_sparsity = GB_sparsity (C) ; int M_sparsity = GB_sparsity (M) ; int Z_sparsity = GB_sparsity (Z) ; diff --git a/Source/jitifyer/GB_stringify.h b/Source/jitifyer/GB_stringify.h index 3fa499df20..5471bbaf24 100644 --- a/Source/jitifyer/GB_stringify.h +++ b/Source/jitifyer/GB_stringify.h @@ -1573,6 +1573,7 @@ GrB_Info GB_masker_phase1_jit // count nnz in each R(:,j) const int64_t *restrict R_to_Z, const bool Rp_is_32, // if true, Rp is 32-bit; else 64-bit const bool Rj_is_32, // if true, Rh is 32-bit; else 64-bit + const int R_sparsity, // GxB_SPARSE or GxB_HYPERSPARSE // original input: const GrB_Matrix M, // required mask const bool Mask_comp, // if true, then M is complemented @@ -1592,6 +1593,7 @@ GrB_Info GB_masker_phase2_jit // phase2 for R = masker (C,M,Z) const int64_t *restrict R_to_M, const int64_t *restrict R_to_C, const int64_t *restrict R_to_Z, + const int R_sparsity, // any sparsity format // original input: const GrB_Matrix M, // required mask const bool Mask_comp, // if true, then M is complemented @@ -1614,7 +1616,8 @@ uint64_t GB_encodify_masker // encode a masker problem char **suffix, // suffix for user-defined kernel // input: const GB_jit_kcode kcode, // kernel to encode - const GrB_Matrix R, // may be NULL, for phase1 + const int R_sparsity, // GxB_SPARSE or GxB_HYPERSPARSE + const GrB_Type rtype, const bool Rp_is_32, // if true, R->p is 32 bit; else 64 bit const bool Rj_is_32, // if true, R->h is 32 bit; else 64 bit const bool Ri_is_32, // if true, R->i is 32 bit; else 64 bit @@ -1630,7 +1633,8 @@ void GB_enumify_masker // enumify a masker problem // output: uint64_t *method_code, // unique encoding of the entire operation // input: - const GrB_Matrix R, // NULL for phase 1 + const int R_sparsity, // GxB_SPARSE or GxB_HYPERSPARSE + const GrB_Type rtype, // the type of R (NULL for phase1) const bool Rp_is_32, // if true, R->p is 32-bit; else 64-bit const bool Rj_is_32, // if true, R->h is 32-bit; else 64-bit const bool Ri_is_32, // if true, R->i is 32-bit; else 64-bit @@ -1833,7 +1837,12 @@ GrB_Info GB_kroner_jit const GrB_BinaryOp binaryop, const bool flipij, const GrB_Matrix A, + const bool A_transpose, const GrB_Matrix B, + const bool B_transpose, + const GrB_Matrix Mask, + const bool Mask_struct, + const bool Mask_comp, const int nthreads ) ; diff --git a/Source/kronecker/GB_kron.c b/Source/kronecker/GB_kron.c index 5c462da512..7e414d7fbe 100644 --- a/Source/kronecker/GB_kron.c +++ b/Source/kronecker/GB_kron.c @@ -23,16 +23,76 @@ GB_Matrix_free (&T) ; \ } +#define GBI(Ai,p,avlen) ((Ai == NULL) ? ((p) % (avlen)) : Ai [p]) + +#define GBB(Ab,p) ((Ab == NULL) ? 1 : Ab [p]) + +#define GBP(Ap,k,avlen) ((Ap == NULL) ? ((k) * (avlen)) : Ap [k]) + +#define GBH(Ah,k) ((Ah == NULL) ? (k) : Ah [k]) + #include "kronecker/GB_kron.h" #include "mxm/GB_mxm.h" #include "transpose/GB_transpose.h" #include "mask/GB_accum_mask.h" +static bool GB_lookup_xoffset ( + GrB_Index *p, + GrB_Matrix A, + GrB_Index row, + GrB_Index col +) +{ + GrB_Index vector = A->is_csc ? col : row ; + GrB_Index coord = A->is_csc ? row : col ; + + if (A->p == NULL) + { + GrB_Index offset = vector * A->vlen + coord ; + if (A->b == NULL || ((int8_t *)A->b)[offset]) + { + *p = A->iso ? 0 : offset ; + return true ; + } + return false ; + } + + int64_t start, end ; + bool res ; + + if (A->h == NULL) + { + start = A->p_is_32 ? ((uint32_t *)A->p)[vector] : ((uint64_t *)A->p)[vector] ; + end = A->p_is_32 ? ((uint32_t *)A->p)[vector + 1] : ((uint64_t *)A->p)[vector + 1] ; + end-- ; + if (start > end) return false ; + res = GB_binary_search(coord, A->i, A->i_is_32, &start, &end) ; + if (res) { *p = A->iso ? 0 : start ; } + return res ; + } + else + { + start = 0 ; end = A->plen - 1 ; + res = GB_binary_search(vector, A->h, A->j_is_32, &start, &end) ; + if (!res) return false ; + int64_t k = start ; + start = A->p_is_32 ? ((uint32_t *)A->p)[k] : ((uint64_t *)A->p)[k] ; + end = A->p_is_32 ? ((uint32_t *)A->p)[k+1] : ((uint64_t *)A->p)[k+1] ; + end-- ; + if (start > end) return false ; + res = GB_binary_search(coord, A->i, A->i_is_32, &start, &end) ; + if (res) { *p = A->iso ? 0 : start ; } + return res ; + } +} + +#include "emult/GB_emult.h" + GrB_Info GB_kron // C = accum (C, kron(A,B)) ( GrB_Matrix C, // input/output matrix for results const bool C_replace, // if true, clear C before writing to it - const GrB_Matrix M, // optional mask for C, unused if NULL + const GrB_Matrix Mask, // optional mask for C, unused if NULL const bool Mask_comp, // if true, use !M const bool Mask_struct, // if true, use the only structure of M const GrB_BinaryOp accum, // optional accum for Z=accum(C,T) @@ -51,6 +111,8 @@ GrB_Info GB_kron // C = accum (C, kron(A,B)) // C may be aliased with M, A, and/or B + GrB_Matrix M = Mask ; + GrB_Info info ; struct GB_Matrix_opaque T_header, AT_header, BT_header ; GrB_Matrix T = NULL, AT = NULL, BT = NULL ; @@ -104,6 +166,81 @@ GrB_Info GB_kron // C = accum (C, kron(A,B)) // quick return if an empty mask is complemented GB_RETURN_IF_QUICK_MASK (C, C_replace, M, Mask_comp, Mask_struct) ; + // check if it's possible to apply mask immediately in kron + // TODO: MT should have its own 32/64 bitness controls + + bool Mask_is_applicable = M != NULL && !Mask_comp ; + if (Mask_is_applicable) { + bool MT_hypersparse = (A->h != NULL) || (B->h != NULL) ; + size_t allocated = 0 ; + + GrB_Matrix MT = NULL ; struct GB_Matrix_opaque MT_header ; + GB_CLEAR_MATRIX_HEADER (MT, &MT_header) ; + + bool A_is_pattern, B_is_pattern ; + GB_binop_pattern (&A_is_pattern, &B_is_pattern, false, op->opcode) ; + + GrB_Info masked_kroner_info = GB_kroner (MT, C->is_csc, op, false, A, A_is_pattern, A_transpose, B, B_is_pattern, B_transpose, + M, Mask_comp, Mask_struct, Werk) ; + if (masked_kroner_info != GrB_SUCCESS) + { + return masked_kroner_info ; + } + + if (MT->is_csc != C->is_csc) { + GrB_Info MTtranspose = GB_transpose_in_place (MT, true, Werk) ; + if (MTtranspose != GrB_SUCCESS) + { + GB_FREE_WORKSPACE ; + GB_Matrix_free (&MT) ; + return MTtranspose ; + } + } + + if (MT_hypersparse) + { + uint32_t *MTh32 = NULL ; uint64_t *MTh64 = NULL ; + if (MT->j_is_32) + { + MTh32 = GB_malloc_memory (MT->vdim, sizeof(uint32_t), &allocated) ; + } + else + { + MTh64 = GB_malloc_memory (MT->vdim, sizeof(uint64_t), &allocated) ; + } + + if (MTh32 == NULL && MTh64 == NULL) + { + GB_FREE_WORKSPACE ; + GB_Matrix_free (&MT) ; + return GrB_OUT_OF_MEMORY ; + } + + double work = M->vdim ; + int nthreads_max = GB_Context_nthreads_max ( ) ; + double chunk = GB_Context_chunk ( ) ; + int masked_hyper_threads = GB_nthreads (work, chunk, nthreads_max) ; + + #pragma omp parallel for num_threads(masked_hyper_threads) schedule(static) + for (GrB_Index i = 0; i < MT->vdim; i++) + { + if (MT->j_is_32) { MTh32[i] = i ; } else { MTh64[i] = i ; } + } + + MT->h = MTh32 ? (void *)MTh32 : (void *)MTh64 ; + + GrB_Info MThyperprune = GB_hyper_prune (MT, Werk) ; + if (MThyperprune != GrB_SUCCESS) + { + GB_FREE_WORKSPACE ; + GB_Matrix_free (&MT) ; + return MThyperprune ; + } + } + + return (GB_accum_mask (C, NULL, NULL, accum, &MT, C_replace, Mask_comp, Mask_struct, Werk)) ; + } + //-------------------------------------------------------------------------- // transpose A and B if requested //-------------------------------------------------------------------------- @@ -152,8 +289,8 @@ GrB_Info GB_kron // C = accum (C, kron(A,B)) GB_CLEAR_MATRIX_HEADER (T, &T_header) ; GB_OK (GB_kroner (T, T_is_csc, op, flipij, - A_transpose ? AT : A, A_is_pattern, - B_transpose ? BT : B, B_is_pattern, Werk)) ; + A_transpose ? AT : A, A_is_pattern, A_transpose, + B_transpose ? BT : B, B_is_pattern, B_transpose, M, Mask_comp, Mask_struct, Werk)) ; GB_FREE_WORKSPACE ; ASSERT_MATRIX_OK (T, "T = kron(A,B)", GB0) ; diff --git a/Source/kronecker/GB_kron.h b/Source/kronecker/GB_kron.h index a6c1a3f3ae..a25b0cfa15 100644 --- a/Source/kronecker/GB_kron.h +++ b/Source/kronecker/GB_kron.h @@ -15,7 +15,7 @@ GrB_Info GB_kron // C = accum (C, kron(A,B)) ( GrB_Matrix C, // input/output matrix for results const bool C_replace, // if true, clear C before writing to it - const GrB_Matrix M, // optional mask for C, unused if NULL + const GrB_Matrix Mask, // optional mask for C, unused if NULL const bool Mask_comp, // if true, use !M const bool Mask_struct, // if true, use the only structure of M const GrB_BinaryOp accum, // optional accum for Z=accum(C,T) @@ -35,8 +35,13 @@ GrB_Info GB_kroner // C = kron (A,B) const bool flipij, // if true, i and j are flipped: z=(x,y,j,i) const GrB_Matrix A, // input matrix bool A_is_pattern, // true if values of A are not used + bool A_transpose, const GrB_Matrix B, // input matrix bool B_is_pattern, // true if values of B are not used + bool B_transpose, + const GrB_Matrix Mask, + const bool Mask_comp, + const bool Mask_struct, GB_Werk Werk ) ; diff --git a/Source/kronecker/GB_kroner.c b/Source/kronecker/GB_kroner.c index afbb655ee4..fabfd2ceb2 100644 --- a/Source/kronecker/GB_kroner.c +++ b/Source/kronecker/GB_kroner.c @@ -24,12 +24,187 @@ GB_phybix_free (C) ; \ } +#define GBI(Ai,p,avlen) ((Ai == NULL) ? ((p) % (avlen)) : Ai [p]) +#define GBB(Ab,p) ((Ab == NULL) ? 1 : Ab [p]) +#define GBP(Ap,k,avlen) ((Ap == NULL) ? ((k) * (avlen)) : Ap [k]) +#define GBH(Ah,k) ((Ah == NULL) ? (k) : Ah [k]) + +#include "mxm/GB_mxm.h" #include "kronecker/GB_kron.h" #include "emult/GB_emult.h" #include "slice/include/GB_search_for_vector.h" #include "jitifyer/GB_stringify.h" -GrB_Info GB_kroner // C = kron (A,B) +//------------------------------------------------------------------------------ +// GB_lookup_xoffset: find the offset of (row,col) in a Ax if present +//------------------------------------------------------------------------------ + +static bool GB_lookup_xoffset +( + GrB_Index *p, + GrB_Matrix A, + GrB_Index row, + GrB_Index col +) +{ + GrB_Index vector = A->is_csc ? col : row ; + GrB_Index coord = A->is_csc ? row : col ; + + if (A->p == NULL) + { + GrB_Index offset = vector * A->vlen + coord ; + if (A->b == NULL || ((int8_t *) A->b) [offset]) + { + *p = A->iso ? 0 : offset ; + return true ; + } + return false ; + } + + int64_t start, end ; + bool res ; + + if (A->h == NULL) + { + start = A->p_is_32 ? ((uint32_t *) A->p) [vector] + : ((uint64_t *) A->p) [vector] ; + end = A->p_is_32 ? ((uint32_t *) A->p) [vector + 1] + : ((uint64_t *) A->p) [vector + 1] ; + end-- ; + if (start > end) return false ; + res = GB_binary_search (coord, A->i, A->i_is_32, &start, &end) ; + if (res) { *p = A->iso ? 0 : start ; } + return res ; + } + else + { + start = 0 ; end = A->plen - 1 ; + res = GB_binary_search (vector, A->h, A->j_is_32, &start, &end) ; + if (!res) return false ; + int64_t k = start ; + start = A->p_is_32 ? ((uint32_t *) A->p) [k] + : ((uint64_t *) A->p) [k] ; + end = A->p_is_32 ? ((uint32_t *) A->p) [k + 1] + : ((uint64_t *) A->p) [k + 1] ; + end-- ; + if (start > end) return false ; + res = GB_binary_search (coord, A->i, A->i_is_32, &start, &end) ; + if (res) { *p = A->iso ? 0 : start ; } + return res ; + } +} + +//------------------------------------------------------------------------------ +// GB_kroner_generic: generic kernel for masked and default paths +//------------------------------------------------------------------------------ + +static GrB_Info GB_kroner_generic +( + GrB_Matrix C, + const GrB_BinaryOp op, + const bool flipij, + const GrB_Matrix A, + const GrB_Matrix B, + bool A_is_pattern, + bool B_is_pattern, + bool A_transpose, + bool B_transpose, + const GrB_Matrix Mask, + const bool Mask_comp, + const bool Mask_struct, + const bool C_is_full, + const int nthreads +) +{ + GrB_Type ctype = op->ztype ; + const size_t csize = ctype->size ; + + GB_Ap_DECLARE (Ap, const) ; GB_Ap_PTR (Ap, A) ; + GB_Ah_DECLARE (Ah, const) ; GB_Ah_PTR (Ah, A) ; + const int64_t avlen = A->vlen ; + + GB_Bp_DECLARE (Bp, const) ; GB_Bp_PTR (Bp, B) ; + GB_Bh_DECLARE (Bh, const) ; GB_Bh_PTR (Bh, B) ; + const int64_t bvlen = B->vlen ; + const int64_t bnvec = B->nvec ; + + GB_Cp_DECLARE (Cp, ) ; GB_Cp_PTR (Cp, C) ; + const int64_t cnvec = C->nvec ; + const int64_t cvlen = C->vlen ; + const int64_t cnz = C->nvals ; + + #define GB_NO_MASK (Mask == NULL) + #define GB_MASK_STRUCT Mask_struct + #define GB_MASK_COMP Mask_comp + #define GB_Cp_IS_32 C->p_is_32 + #define GB_A_TYPE GB_void + #define GB_B_TYPE GB_void + #define GB_C_TYPE GB_void + #define GB_A_ISO A_iso + #define GB_B_ISO B_iso + #define GB_C_ISO C_iso + #define GB_C_IS_FULL C_is_full + + const bool A_iso = A->iso ; + const bool B_iso = B->iso ; + const bool C_iso = C->iso ; + const int64_t asize = A->type->size ; + const int64_t bsize = B->type->size ; + + GxB_binary_function fmult = op->binop_function ; + GxB_index_binary_function fmult_idx = op->idxbinop_function ; + const void *theta = op->theta ; + + GB_cast_function cast_A = NULL, cast_B = NULL ; + if (!A_is_pattern) + cast_A = GB_cast_factory (op->xtype->code, A->type->code) ; + if (!B_is_pattern) + cast_B = GB_cast_factory (op->ytype->code, B->type->code) ; + + #define GB_DECLAREA(a) GB_void a [GB_VLA(asize)] + #define GB_DECLAREB(b) GB_void b [GB_VLA(bsize)] + + #define GB_GETA(a,Ax,p,iso) \ + { \ + if (!A_is_pattern) \ + cast_A (a, Ax + (p)*asize, asize) ; \ + } + + #define GB_GETB(b,Bx,p,iso) \ + { \ + if (!B_is_pattern) \ + cast_B (b, Bx + (p)*bsize, bsize) ; \ + } + + #define GB_KRONECKER_OP(Cx,pC,a,ix,jx,b,iy,jy) \ + { \ + if (fmult != NULL) \ + { \ + fmult (Cx + (pC)*csize, a, b) ; \ + } \ + else \ + { \ + if (flipij) \ + fmult_idx (Cx + (pC)*csize, \ + a, jx, ix, b, jy, iy, theta) ; \ + else \ + fmult_idx (Cx + (pC)*csize, \ + a, ix, jx, b, iy, jy, theta) ; \ + } \ + } + + #define GB_GENERIC + #include "ewise/include/GB_ewise_shared_definitions.h" + #include "kronecker/template/GB_kroner_template.c" + + return GrB_SUCCESS ; +} + +//------------------------------------------------------------------------------ +// GB_kroner: Kronecker product, C = kron (A,B) +//------------------------------------------------------------------------------ + +GrB_Info GB_kroner ( GrB_Matrix C, // output matrix const bool C_is_csc, // desired format of C @@ -37,8 +212,13 @@ GrB_Info GB_kroner // C = kron (A,B) const bool flipij, // if true, i and j are flipped: z=(x,y,j,i) const GrB_Matrix A_in, // input matrix bool A_is_pattern, // true if values of A are not used + bool A_transpose, const GrB_Matrix B_in, // input matrix bool B_is_pattern, // true if values of B are not used + bool B_transpose, + const GrB_Matrix Mask, + const bool Mask_comp, + const bool Mask_struct, GB_Werk Werk ) { @@ -65,12 +245,260 @@ GrB_Info GB_kroner // C = kron (A,B) GB_MATRIX_WAIT (B_in) ; //-------------------------------------------------------------------------- - // bitmap case: create sparse copies of A and B if they are bitmap + // common definitions for masked and default //-------------------------------------------------------------------------- + GrB_Type ctype = op->ztype ; + const size_t csize = ctype->size ; + GB_void cscalar [GB_VLA(csize)] ; + bool C_iso = GB_emult_iso (cscalar, ctype, A_in, B_in, op) ; + GrB_Matrix A = A_in ; + GrB_Matrix B = B_in ; + + //-------------------------------------------------------------------------- + // apply mask immediately if possible + //-------------------------------------------------------------------------- + + if (Mask != NULL && !Mask_comp) + { + GB_MATRIX_WAIT (Mask) ; + + int64_t bnrows = B_transpose ? GB_NCOLS (B) : GB_NROWS (B) ; + int64_t bncols = B_transpose ? GB_NROWS (B) : GB_NCOLS (B) ; + size_t allocated = 0 ; + int64_t centries = 0 ; + uint64_t nvecs = 0 ; + + //---------------------------------------------------------------------- + // allocate Cp + //---------------------------------------------------------------------- + + uint32_t *Cp32 = NULL ; uint64_t *Cp64 = NULL ; + if (Mask->p_is_32) + Cp32 = GB_calloc_memory (Mask->vdim + 1, sizeof (uint32_t), + &allocated) ; + else + Cp64 = GB_calloc_memory (Mask->vdim + 1, sizeof (uint64_t), + &allocated) ; + + if (Cp32 == NULL && Cp64 == NULL) + { + GB_FREE_WORKSPACE ; + return GrB_OUT_OF_MEMORY ; + } + + GB_Mp_DECLARE (Mp, ) ; GB_Mp_PTR (Mp, Mask) ; + GB_Mh_DECLARE (Mh, ) ; GB_Mh_PTR (Mh, Mask) ; + GB_Mi_DECLARE (Mi, ) ; GB_Mi_PTR (Mi, Mask) ; + + GB_cast_function cast_A = GB_cast_factory (op->xtype->code, + A->type->code) ; + GB_cast_function cast_B = GB_cast_factory (op->ytype->code, + B->type->code) ; + + double work = Mask->vdim ; + int nthreads_max = GB_Context_nthreads_max ( ) ; + double chunk = GB_Context_chunk ( ) ; + int nthreads = GB_nthreads (work, chunk, nthreads_max) ; + + int64_t vlen = Mask->vlen ; + + //---------------------------------------------------------------------- + // count entries per vector + //---------------------------------------------------------------------- + + #pragma omp parallel num_threads(nthreads) + { + GrB_Index offset ; + + #pragma omp for reduction(+:nvecs) schedule(static) + for (GrB_Index k = 0 ; k < Mask->nvec ; k++) + { + GrB_Index j = Mh32 ? GBH (Mh32, k) : GBH (Mh64, k) ; + + int64_t pM_start = Mp32 ? GBP (Mp32, k, vlen) + : GBP (Mp64, k, vlen) ; + int64_t pM_end = Mp32 ? GBP (Mp32, k+1, vlen) + : GBP (Mp64, k+1, vlen) ; + bool nonempty = false ; + + for (GrB_Index p = pM_start ; p < pM_end ; p++) + { + if (!GBB (Mask->b, p)) continue ; + + int64_t i = Mi32 ? GBI (Mi32, p, vlen) + : GBI (Mi64, p, vlen) ; + GrB_Index Mrow = Mask->is_csc ? i : j ; + GrB_Index Mcol = Mask->is_csc ? j : i ; + + if (Mask_struct || (Mask->iso ? ((bool *) Mask->x) [0] + : ((bool *) Mask->x) [p])) + { + GrB_Index arow = A_transpose ? (Mcol / bncols) + : (Mrow / bnrows) ; + GrB_Index acol = A_transpose ? (Mrow / bnrows) + : (Mcol / bncols) ; + GrB_Index brow = B_transpose ? (Mcol % bncols) + : (Mrow % bnrows) ; + GrB_Index bcol = B_transpose ? (Mrow % bnrows) + : (Mcol % bncols) ; + + if (!GB_lookup_xoffset (&offset, A, arow, acol)) + continue ; + if (!GB_lookup_xoffset (&offset, B, brow, bcol)) + continue ; + + if (Mask->p_is_32) { (Cp32 [j])++ ; } + else { (Cp64 [j])++ ; } + nonempty = true ; + } + } + if (nonempty) nvecs++ ; + } + } + + //---------------------------------------------------------------------- + // prefix sum to get centries + //---------------------------------------------------------------------- + + if (Mask->p_is_32) + GB_cumsum (Cp32, Mask->p_is_32, Mask->vdim, NULL, nthreads, Werk) ; + else + GB_cumsum (Cp64, Mask->p_is_32, Mask->vdim, NULL, nthreads, Werk) ; + + centries = Mask->p_is_32 ? (int64_t) Cp32 [Mask->vdim] + : (int64_t) Cp64 [Mask->vdim] ; + + //---------------------------------------------------------------------- + // allocate Ci + //---------------------------------------------------------------------- + + uint32_t *Ci32 = NULL ; uint64_t *Ci64 = NULL ; + if (Mask->i_is_32) + Ci32 = GB_malloc_memory (centries, sizeof (uint32_t), &allocated) ; + else + Ci64 = GB_malloc_memory (centries, sizeof (uint64_t), &allocated) ; + + if (centries > 0 && Ci32 == NULL && Ci64 == NULL) + { + if (Mask->p_is_32) GB_free_memory (&Cp32, + (Mask->vdim + 1) * sizeof (uint32_t)) ; + else GB_free_memory (&Cp64, + (Mask->vdim + 1) * sizeof (uint64_t)) ; + GB_FREE_WORKSPACE ; + return GrB_OUT_OF_MEMORY ; + } + + //---------------------------------------------------------------------- + // allocate Cx + //---------------------------------------------------------------------- + + void *Cx = NULL ; + if (C_iso) + { + Cx = GB_malloc_memory (1, op->ztype->size, &allocated) ; + if (Cx == NULL) + { + if (Mask->i_is_32) GB_free_memory (&Ci32, + centries * sizeof (uint32_t)) ; + else GB_free_memory (&Ci64, + centries * sizeof (uint64_t)) ; + if (Mask->p_is_32) GB_free_memory (&Cp32, + (Mask->vdim + 1) * sizeof (uint32_t)) ; + else GB_free_memory (&Cp64, + (Mask->vdim + 1) * sizeof (uint64_t)) ; + GB_FREE_WORKSPACE ; + return GrB_OUT_OF_MEMORY ; + } + memcpy (Cx, cscalar, csize) ; + } + else + { + Cx = GB_malloc_memory (centries, op->ztype->size, &allocated) ; + if (centries > 0 && Cx == NULL) + { + if (Mask->i_is_32) GB_free_memory (&Ci32, + centries * sizeof (uint32_t)) ; + else GB_free_memory (&Ci64, + centries * sizeof (uint64_t)) ; + if (Mask->p_is_32) GB_free_memory (&Cp32, + (Mask->vdim + 1) * sizeof (uint32_t)) ; + else GB_free_memory (&Cp64, + (Mask->vdim + 1) * sizeof (uint64_t)) ; + GB_FREE_WORKSPACE ; + return GrB_OUT_OF_MEMORY ; + } + } + + //---------------------------------------------------------------------- + // allocate and initialize C matrix header + //---------------------------------------------------------------------- + + GrB_Info Calloc = GB_new_bix (&C, op->ztype, vlen, Mask->vdim, + GB_ph_null, Mask->is_csc, GxB_SPARSE, false, Mask->hyper_switch, + Mask->vdim, centries, false, C_iso, + Mask->p_is_32, Mask->j_is_32, Mask->i_is_32) ; + if (Calloc != GrB_SUCCESS) + { + if (C_iso) GB_free_memory (&Cx, op->ztype->size) ; + else GB_free_memory (&Cx, centries * op->ztype->size) ; + if (Mask->i_is_32) GB_free_memory (&Ci32, + centries * sizeof (uint32_t)) ; + else GB_free_memory (&Ci64, + centries * sizeof (uint64_t)) ; + if (Mask->p_is_32) GB_free_memory (&Cp32, + (Mask->vdim + 1) * sizeof (uint32_t)) ; + else GB_free_memory (&Cp64, + (Mask->vdim + 1) * sizeof (uint64_t)) ; + GB_FREE_WORKSPACE ; + return Calloc ; + } + + GB_free_memory (&C->i, C->i_size) ; + GB_free_memory (&C->x, C->x_size) ; + + C->p = Mask->p_is_32 ? (void *) Cp32 : (void *) Cp64 ; + C->i = Mask->i_is_32 ? (void *) Ci32 : (void *) Ci64 ; + C->x = Cx ; + C->p_size = (Mask->p_is_32 ? sizeof (uint32_t) + : sizeof (uint64_t)) + * (Mask->vdim + 1) ; + C->i_size = (Mask->i_is_32 ? sizeof (uint32_t) + : sizeof (uint64_t)) * centries ; + C->x_size = C->iso ? op->ztype->size + : op->ztype->size * centries ; + C->magic = GB_MAGIC ; + C->nvals = centries ; + C->nvec_nonempty = (int64_t) nvecs ; + + //---------------------------------------------------------------------- + // evaluate + //---------------------------------------------------------------------- + + info = GB_kroner_jit (C, op, false, A, A_transpose, B, B_transpose, Mask, Mask_struct, Mask_comp, nthreads) ; + + if (info != GrB_SUCCESS) + { + info = GB_kroner_generic (C, op, false, A, B, + A_is_pattern, B_is_pattern, A_transpose, B_transpose, + Mask, Mask_comp, Mask_struct, false, nthreads) ; + } + + GB_FREE_WORKSPACE ; + return info ; + } + + //-------------------------------------------------------------------------- + // default case (no mask, or complemented mask) + //-------------------------------------------------------------------------- + + //-------------------------------------------------------------------------- + // bitmap case: create sparse copies of A and B if they are bitmap + //-------------------------------------------------------------------------- + if (GB_IS_BITMAP (A)) - { + { GBURBLE ("A:") ; GB_CLEAR_MATRIX_HEADER (Awork, &Awork_header) ; GB_OK (GB_dup_worker (&Awork, A->iso, A, true, NULL)) ; @@ -80,9 +508,8 @@ GrB_Info GB_kroner // C = kron (A,B) A = Awork ; } - GrB_Matrix B = B_in ; if (GB_IS_BITMAP (B)) - { + { GBURBLE ("B:") ; GB_CLEAR_MATRIX_HEADER (Bwork, &Bwork_header) ; GB_OK (GB_dup_worker (&Bwork, B->iso, B, true, NULL)) ; @@ -102,7 +529,7 @@ GrB_Info GB_kroner // C = kron (A,B) const int64_t avlen = A->vlen ; const int64_t avdim = A->vdim ; const int64_t anvec = A->nvec ; - const int64_t anz = GB_nnz (A) ; + const int64_t anz = GB_nnz (A) ; GB_Bp_DECLARE (Bp, const) ; GB_Bp_PTR (Bp, B) ; GB_Bh_DECLARE (Bh, const) ; GB_Bh_PTR (Bh, B) ; @@ -110,7 +537,7 @@ GrB_Info GB_kroner // C = kron (A,B) const int64_t bvlen = B->vlen ; const int64_t bvdim = B->vdim ; const int64_t bnvec = B->nvec ; - const int64_t bnz = GB_nnz (B) ; + const int64_t bnz = GB_nnz (B) ; //-------------------------------------------------------------------------- // determine the number of threads to use @@ -118,57 +545,40 @@ GrB_Info GB_kroner // C = kron (A,B) double work = ((double) anz) * ((double) bnz) + (((double) anvec) * ((double) bnvec)) ; - int nthreads_max = GB_Context_nthreads_max ( ) ; double chunk = GB_Context_chunk ( ) ; int nthreads = GB_nthreads (work, chunk, nthreads_max) ; - //-------------------------------------------------------------------------- - // check if C is iso and compute its iso value if it is - //-------------------------------------------------------------------------- - - GrB_Type ctype = op->ztype ; - const size_t csize = ctype->size ; - GB_void cscalar [GB_VLA(csize)] ; - bool C_iso = GB_emult_iso (cscalar, ctype, A, B, op) ; - //-------------------------------------------------------------------------- // allocate the output matrix C //-------------------------------------------------------------------------- - // C has the same type as z for the multiply operator, z=op(x,y) - uint64_t cvlen, cvdim, cnzmax, cnvec ; - bool ok = GB_int64_multiply (&cvlen, avlen, bvlen) ; - ok = ok & GB_int64_multiply (&cvdim, avdim, bvdim) ; - ok = ok & GB_int64_multiply (&cnzmax, anz, bnz) ; - ok = ok & GB_int64_multiply (&cnvec, anvec, bnvec) ; + bool ok = GB_int64_multiply (&cvlen, avlen, bvlen) ; + ok = ok & GB_int64_multiply (&cvdim, avdim, bvdim) ; + ok = ok & GB_int64_multiply (&cnzmax, anz, bnz) ; + ok = ok & GB_int64_multiply (&cnvec, anvec, bnvec) ; ASSERT (ok) ; if (C_iso) - { - // the values of A and B are no longer needed if C is iso + { GBURBLE ("(iso kron) ") ; A_is_pattern = true ; B_is_pattern = true ; } - // C is hypersparse if either A or B are hypersparse. It is never bitmap. bool C_is_hyper = (cvdim > 1) && (Ah != NULL || Bh != NULL) ; - bool C_is_full = GB_as_if_full (A) && GB_as_if_full (B) ; - int C_sparsity = C_is_full ? GxB_FULL : - ((C_is_hyper) ? GxB_HYPERSPARSE : GxB_SPARSE) ; - - // determine the p_is_32, j_is_32, and i_is_32 settings for the new matrix + bool C_is_full = GB_as_if_full (A) && GB_as_if_full (B) ; + int C_sparsity = C_is_full ? GxB_FULL : + C_is_hyper ? GxB_HYPERSPARSE : GxB_SPARSE ; bool Cp_is_32, Cj_is_32, Ci_is_32 ; GB_determine_pji_is_32 (&Cp_is_32, &Cj_is_32, &Ci_is_32, C_sparsity, cnzmax, (int64_t) cvlen, (int64_t) cvdim, Werk) ; - GB_OK (GB_new_bix (&C, // full, sparse, or hyper; existing header - ctype, (int64_t) cvlen, (int64_t) cvdim, GB_ph_malloc, C_is_csc, - C_sparsity, true, B->hyper_switch, cnvec, cnzmax, true, C_iso, - Cp_is_32, Cj_is_32, Ci_is_32)) ; + GB_OK (GB_new_bix (&C, ctype, (int64_t) cvlen, (int64_t) cvdim, + GB_ph_malloc, C_is_csc, C_sparsity, true, B->hyper_switch, + cnvec, cnzmax, true, C_iso, Cp_is_32, Cj_is_32, Ci_is_32)) ; //-------------------------------------------------------------------------- // compute the column counts of C: Cp and Ch if C is hypersparse @@ -176,34 +586,25 @@ GrB_Info GB_kroner // C = kron (A,B) GB_Cp_DECLARE (Cp, ) ; GB_Cp_PTR (Cp, C) ; GB_Ch_DECLARE (Ch, ) ; GB_Ch_PTR (Ch, C) ; - #define GB_Cp_IS_32 Cp_is_32 + //#define GB_Cp_IS_32 Cp_is_32 if (!C_is_full) - { - // C is sparse or hypersparse + { int64_t kC ; #pragma omp parallel for num_threads(nthreads) schedule(static) - for (kC = 0 ; kC < cnvec ; kC++) + for (kC = 0 ; kC < (int64_t) cnvec ; kC++) { const int64_t kA = kC / bnvec ; const int64_t kB = kC % bnvec ; - // get A(:,jA), the (kA)th vector of A const int64_t jA = GBh_A (Ah, kA) ; const int64_t aknz = (Ap == NULL) ? avlen : (GB_IGET (Ap, kA+1) - GB_IGET (Ap, kA)) ; - // get B(:,jB), the (kB)th vector of B const int64_t jB = GBh_B (Bh, kB) ; const int64_t bknz = (Bp == NULL) ? bvlen : (GB_IGET (Bp, kB+1) - GB_IGET (Bp, kB)) ; - // determine # entries in C(:,jC), the (kC)th vector of C - // int64_t kC = kA * bnvec + kB ; - // Cp [kC] = aknz * bknz ; GB_ISET (Cp, kC, aknz * bknz) ; if (C_is_hyper) - { - // Ch [kC] = jA * bvdim + jB ; GB_ISET (Ch, kC, jA * bvdim + jB) ; - } } int64_t nvec_nonempty ; @@ -216,19 +617,17 @@ GrB_Info GB_kroner // C = kron (A,B) C->magic = GB_MAGIC ; //-------------------------------------------------------------------------- - // C = kron (A,B) where C is iso and/or full full + // C = kron (A,B) where C is iso and/or full //-------------------------------------------------------------------------- if (C_iso) - { - // C->x [0] = cscalar = op (A,B) + { memcpy (C->x, cscalar, csize) ; if (C_is_full) - { - // no more work to do if C is iso and full + { ASSERT_MATRIX_OK (C, "C=kron(A,B), iso full", GB0) ; GB_FREE_WORKSPACE ; - return (GrB_SUCCESS) ; + return GrB_SUCCESS ; } } @@ -238,93 +637,22 @@ GrB_Info GB_kroner // C = kron (A,B) int64_t cnz = GB_nnz (C) ; if (cnz == 0) - { + { GB_FREE_WORKSPACE ; - return (GrB_SUCCESS) ; + return GrB_SUCCESS ; } //-------------------------------------------------------------------------- - // C = kron (A,B) + // evaluate: JIT or generic //-------------------------------------------------------------------------- - // via the JIT kernel - info = GB_kroner_jit (C, op, flipij, A, B, nthreads) ; - - if (info == GrB_NO_VALUE) - { - // via the generic kernel - #define GB_A_TYPE GB_void - #define GB_B_TYPE GB_void - #define GB_C_TYPE GB_void - #define GB_A_ISO A_iso - #define GB_B_ISO B_iso - #define GB_C_ISO C_iso - const bool A_iso = A->iso ; - const bool B_iso = B->iso ; - const int64_t asize = A->type->size ; - const int64_t bsize = B->type->size ; - - GxB_binary_function fmult = op->binop_function ; - GxB_index_binary_function fmult_idx = op->idxbinop_function ; - const void *theta = op->theta ; - GB_cast_function cast_A = NULL, cast_B = NULL ; - if (!A_is_pattern) - { - cast_A = GB_cast_factory (op->xtype->code, A->type->code) ; - } - if (!B_is_pattern) - { - cast_B = GB_cast_factory (op->ytype->code, B->type->code) ; - } - - #define GB_C_IS_FULL C_is_full - - #define GB_DECLAREA(a) GB_void a [GB_VLA(asize)] - #define GB_DECLAREB(b) GB_void b [GB_VLA(bsize)] - - #define GB_GETA(a,Ax,p,iso) \ - { \ - if (!A_is_pattern) \ - { \ - cast_A (a, Ax + (p)*asize, asize) ; \ - } \ - } - - #define GB_GETB(b,Bx,p,iso) \ - { \ - if (!B_is_pattern) \ - { \ - cast_B (b, Bx + (p)*bsize, bsize) ; \ - } \ - } - - #define GB_KRONECKER_OP(Cx,pC,a,ix,jx,b,iy,jy) \ - { \ - if (fmult != NULL) \ - { \ - /* standard binary operator */ \ - fmult (Cx +(pC)*csize, a, b) ; \ - } \ - else \ - { \ - /* index binary operator */ \ - if (flipij) \ - { \ - fmult_idx (Cx +(pC)*csize, \ - a, jx, ix, b, jy, iy, theta) ; \ - } \ - else \ - { \ - fmult_idx (Cx +(pC)*csize, \ - a, ix, jx, b, iy, jy, theta) ; \ - } \ - } \ - } + info = GB_kroner_jit (C, op, flipij, A, A_transpose, B, B_transpose, Mask, Mask_struct, Mask_comp, nthreads) ; - #define GB_GENERIC - #include "ewise/include/GB_ewise_shared_definitions.h" - #include "kronecker/template/GB_kroner_template.c" - info = GrB_SUCCESS ; + if (info != GrB_SUCCESS) + { + info = GB_kroner_generic (C, op, flipij, A, B, + A_is_pattern, B_is_pattern, A_transpose, B_transpose, + Mask, Mask_comp, Mask_struct, C_is_full, nthreads) ; } //-------------------------------------------------------------------------- @@ -332,16 +660,11 @@ GrB_Info GB_kroner // C = kron (A,B) //-------------------------------------------------------------------------- if (info == GrB_SUCCESS) - { + { GB_OK (GB_hyper_prune (C, Werk)) ; ASSERT_MATRIX_OK (C, "C=kron(A,B)", GB0) ; } - //-------------------------------------------------------------------------- - // return result - //-------------------------------------------------------------------------- - GB_FREE_WORKSPACE ; - return (info) ; + return info ; } - diff --git a/Source/kronecker/template/GB_kroner_template.c b/Source/kronecker/template/GB_kroner_template.c index bdc76990c0..c306df0058 100644 --- a/Source/kronecker/template/GB_kroner_template.c +++ b/Source/kronecker/template/GB_kroner_template.c @@ -47,135 +47,334 @@ // C = kron (A,B) //-------------------------------------------------------------------------- - int tid ; - #pragma omp parallel for num_threads(nthreads) schedule(static) - for (tid = 0 ; tid < nthreads ; tid++) + if (!GB_NO_MASK && !GB_MASK_COMP) { + #define GB_GETVECTOR(A, row, col) GrB_Index vector_ = (A)->is_csc ? (col) : (row) + + #define GB_GETCOORD(A, row, col) GrB_Index coord_ = (A)->is_csc ? (row) : (col) + + #define GB_LOOKUP_XOFFSET_BITMAP_FULL(offset, A) \ + { \ + GrB_Index offset_ = vector_ * (A)->vlen + \ + coord_ ; \ + if ((A)->b == NULL || \ + (((int8_t *) (A)->b)[offset_])) \ + { \ + offset = (A)->iso ? 0 : offset_ ; \ + } \ + else \ + { \ + offset = -1 ; \ + } \ + } - //---------------------------------------------------------------------- - // get the iso values of A and B - //---------------------------------------------------------------------- + #define GB_LOOKUP_XOFFSET_SPARSE(offset, A) \ + { \ + start_ = (A)->p_is_32 ? \ + ((int32_t *)(A)->p) [vector_] : \ + ((int64_t *)(A)->p) [vector_] ; \ + end_ = (A)->p_is_32 ? \ + ((int32_t *)(A)->p) [vector_ + 1] : \ + ((int64_t *)(A)->p) [vector_ + 1] ; \ + end_-- ; \ + if (start_ > end_) \ + { \ + offset = -1 ; \ + } \ + else \ + { \ + if (GB_binary_search (coord_, (A)->i, \ + (A)->i_is_32, &start_, &end_)) \ + { \ + offset = (A)->iso ? 0 : start_ ; \ + } \ + else \ + { \ + offset = - 1 ; \ + } \ + } \ + } - GB_DECLAREA (a) ; - if (GB_A_ISO) - { - GB_GETA (a, Ax, 0, true) ; + #define GB_LOOKUP_XOFFSET_HYPER(offset, A) \ + { \ + start_ = 0 ; \ + end_ = (A)->plen - 1 ; \ + if (!GB_binary_search (vector_, (A)->h, \ + (A)->j_is_32, &start_, &end_)) \ + { \ + offset = - 1; \ + } \ + else \ + { \ + int64_t k_ = start_ ; \ + start_ = (A)->p_is_32 ? \ + ((int32_t *)(A)->p) [k_] : \ + ((int64_t *)(A)->p) [k_] ; \ + end_ = (A)->p_is_32 ? \ + ((int32_t *)(A)->p) [k_ + 1] : \ + ((int64_t *)(A)->p) [k_ + 1] ; \ + end_-- ; \ + if (start_ > end_) \ + { \ + offset = -1 ; \ + } \ + else \ + { \ + if (GB_binary_search (coord_, (A)->i, \ + (A)->i_is_32, &start_, &end_)) \ + { \ + offset = (A)->iso ? 0 : start_ ; \ + } \ + else \ + { \ + offset = -1 ; \ + } \ + } \ + } \ } - GB_DECLAREB (b) ; - if (GB_B_ISO) - { - GB_GETB (b, Bx, 0, true) ; + + #define GB_LOOKUP_XOFFSET(offset, A, row, col) \ + { \ + GB_GETVECTOR(A, row, col) ; \ + GB_GETCOORD(A, row, col) ; \ + if ((A)->p == NULL) \ + { \ + GB_LOOKUP_XOFFSET_BITMAP_FULL (offset, A) ; \ + } \ + else \ + { \ + int64_t start_, end_ ; \ + if ((A)->h == NULL) \ + { \ + GB_LOOKUP_XOFFSET_SPARSE(offset, A) ; \ + } \ + else \ + { \ + GB_LOOKUP_XOFFSET_HYPER(offset, A) ; \ + } \ + } \ } - //---------------------------------------------------------------------- - // construct the task to compute Ci,Cx [pC:pC_end-1] - //---------------------------------------------------------------------- + #define GB_NROWS(A) ((A)->is_csc ? (A)->vlen : (A)->vdim) + #define GB_NCOLS(A) ((A)->is_csc ? (A)->vdim : (A)->vlen) + - int64_t pC, pC_end ; - GB_PARTITION (pC, pC_end, cnz, tid, nthreads) ; + GB_Mp_DECLARE(Mp, ) ; + GB_Mp_PTR(Mp, Mask) ; - // find where this task starts in C - int64_t kC_task = GB_search_for_vector (Cp, GB_Cp_IS_32, pC, 0, cnvec, - cvlen) ; - int64_t pC_delta = pC - GBp_C (Cp, kC_task, cvlen) ; + GB_Mh_DECLARE(Mh, ) ; + GB_Mh_PTR(Mh, Mask) ; - //---------------------------------------------------------------------- - // compute C(:,kC) for all vectors kC in this task - //---------------------------------------------------------------------- + GB_Mi_DECLARE(Mi, ) ; + GB_Mi_PTR(Mi, Mask) ; - for (int64_t kC = kC_task ; kC < cnvec && pC < pC_end ; kC++) + + int64_t bnrows = (B_transpose) ? GB_NCOLS (B) : GB_NROWS (B) ; + int64_t bncols = (B_transpose) ? GB_NROWS (B) : GB_NCOLS (B) ; + + #pragma omp parallel num_threads(nthreads) { + GrB_Index offset = -1 ; + GB_DECLAREA (a_elem) ; + GB_DECLAREB (b_elem) ; + int64_t vlen = Mask->vlen ; - //------------------------------------------------------------------ - // get the vectors C(:,jC), A(:,jA), and B(:,jB) - //------------------------------------------------------------------ - - // C(:,jC) = kron (A(:,jA), B(:,jB), the (kC)th vector of C, - // where jC = GBh_C (Ch, kC) - int64_t kA = kC / bnvec ; - int64_t kB = kC % bnvec ; - - // get A(:,jA), the (kA)th vector of A - int64_t jA = GBh_A (Ah, kA) ; - int64_t pA_start = GBp_A (Ap, kA, avlen) ; - int64_t pA_end = GBp_A (Ap, kA+1, avlen) ; - - // get B(:,jB), the (kB)th vector of B - int64_t jB = GBh_B (Bh, kB) ; - int64_t pB_start = GBp_B (Bp, kB, bvlen) ; - int64_t pB_end = GBp_B (Bp, kB+1, bvlen) ; - int64_t bknz = pB_end - pB_start ; - - // shift into the middle of A(:,jA) and B(:,jB) for the first - // vector of C for this task. - int64_t pA_delta = 0 ; - int64_t pB_delta = 0 ; - if (kC == kC_task && bknz > 0) + #pragma omp for schedule(static) + for (GrB_Index k = 0 ; k < Mask->nvec ; k++) + { + GrB_Index j = GBh_M (Mh, k) ; + + int64_t pA_start = GBp_M (Mp, k, vlen) ; + int64_t pA_end = GBp_M (Mp, k+1, vlen) ; + GrB_Index pos = Mask->p_is_32 ? ((int32_t *)C->p)[j] : ((int64_t *)C->p)[j] ; + for (GrB_Index p = pA_start ; p < pA_end ; p++) + { + if (!GBb_M (Mask->b, p)) continue ; + + int64_t i = GBi_M (Mi, p, vlen) ; + GrB_Index Mrow = Mask->is_csc ? i : j ; GrB_Index Mcol = Mask->is_csc ? j : i ; + + // extract elements from A and B, + // initialize offset in MTi and MTx, + // get result of op, place it in MTx + + if (GB_MASK_STRUCT || (Mask->iso ? ((bool *)Mask->x)[0] : ((bool *)Mask->x)[p])) + { + GrB_Index arow = A_transpose ? (Mcol / bncols) : (Mrow / bnrows); + GrB_Index acol = A_transpose ? (Mrow / bnrows) : (Mcol / bncols); + + GrB_Index brow = B_transpose ? (Mcol % bncols) : (Mrow % bnrows); + GrB_Index bcol = B_transpose ? (Mrow % bnrows) : (Mcol % bncols); + + GB_LOOKUP_XOFFSET (offset, A, arow, acol) ; + if (offset == -1) + { + continue; + } + // iso of A or of C (?) + GB_GETA (a_elem, Ax, offset, GB_A_ISO) ; + + GB_LOOKUP_XOFFSET (offset, B, brow, bcol) ; + if (offset == -1) + { + continue; + } + GB_GETB (b_elem, Bx, offset, GB_B_ISO) ; + + GrB_Index ix, jx, iy, jy ; + ix = A_transpose ? acol : arow ; + jx = A_transpose ? arow : acol ; + iy = B_transpose ? bcol : brow ; + jy = B_transpose ? brow : bcol ; + + if (Mask->i_is_32) + { + ((int32_t *)C->i)[pos] = i ; + } + else + { + ((int64_t *)C->i)[pos] = i ; + } + + GB_KRONECKER_OP (Cx, pos, a_elem, ix, jx, b_elem, iy, jy) ; + + pos++ ; + } + } + } + } + } + else + { + int tid ; + #pragma omp parallel for num_threads(nthreads) schedule(static) + for (tid = 0 ; tid < nthreads ; tid++) + { + + //---------------------------------------------------------------------- + // get the iso values of A and B + //---------------------------------------------------------------------- + + GB_DECLAREA (a) ; + if (GB_A_ISO) + { + GB_GETA (a, Ax, 0, true) ; + } + GB_DECLAREB (b) ; + if (GB_B_ISO) { - pA_delta = pC_delta / bknz ; - pB_delta = pC_delta % bknz ; + GB_GETB (b, Bx, 0, true) ; } - //------------------------------------------------------------------ - // for all entries in A(:,jA), skipping entries for first vector - //------------------------------------------------------------------ + //---------------------------------------------------------------------- + // construct the task to compute Ci,Cx [pC:pC_end-1] + //---------------------------------------------------------------------- - int64_t pA = pA_start + pA_delta ; - pA_delta = 0 ; - for ( ; pA < pA_end && pC < pC_end ; pA++) - { + int64_t pC, pC_end ; + GB_PARTITION (pC, pC_end, cnz, tid, nthreads) ; + + // find where this task starts in C + int64_t kC_task = GB_search_for_vector (Cp, GB_Cp_IS_32, pC, 0, cnvec, + cvlen) ; + int64_t pC_delta = pC - GBp_C (Cp, kC_task, cvlen) ; - //-------------------------------------------------------------- - // a = A(iA,jA), typecasted to op->xtype - //-------------------------------------------------------------- + //---------------------------------------------------------------------- + // compute C(:,kC) for all vectors kC in this task + //---------------------------------------------------------------------- - int64_t iA = GBi_A (Ai, pA, avlen) ; - int64_t iAblock = iA * bvlen ; - if (!GB_A_ISO) + for (int64_t kC = kC_task ; kC < cnvec && pC < pC_end ; kC++) + { + + //------------------------------------------------------------------ + // get the vectors C(:,jC), A(:,jA), and B(:,jB) + //------------------------------------------------------------------ + + // C(:,jC) = kron (A(:,jA), B(:,jB), the (kC)th vector of C, + // where jC = GBh_C (Ch, kC) + int64_t kA = kC / bnvec ; + int64_t kB = kC % bnvec ; + + // get A(:,jA), the (kA)th vector of A + int64_t jA = GBh_A (Ah, kA) ; + int64_t pA_start = GBp_A (Ap, kA, avlen) ; + int64_t pA_end = GBp_A (Ap, kA+1, avlen) ; + + // get B(:,jB), the (kB)th vector of B + int64_t jB = GBh_B (Bh, kB) ; + int64_t pB_start = GBp_B (Bp, kB, bvlen) ; + int64_t pB_end = GBp_B (Bp, kB+1, bvlen) ; + int64_t bknz = pB_end - pB_start ; + + // shift into the middle of A(:,jA) and B(:,jB) for the first + // vector of C for this task. + int64_t pA_delta = 0 ; + int64_t pB_delta = 0 ; + if (kC == kC_task && bknz > 0) { - GB_GETA (a, Ax, pA, false) ; + pA_delta = pC_delta / bknz ; + pB_delta = pC_delta % bknz ; } - //-------------------------------------------------------------- - // for all entries in B(:,jB), skipping entries for 1st vector - //-------------------------------------------------------------- + //------------------------------------------------------------------ + // for all entries in A(:,jA), skipping entries for first vector + //------------------------------------------------------------------ - // scan B(:,jB), skipping to the first entry of C if this is - // the first time B is accessed in this task - int64_t pB = pB_start + pB_delta ; - pB_delta = 0 ; - for ( ; pB < pB_end && pC < pC_end ; pB++) - { + int64_t pA = pA_start + pA_delta ; + pA_delta = 0 ; + for ( ; pA < pA_end && pC < pC_end ; pA++) + { - //---------------------------------------------------------- - // b = B(iB,jB), typecasted to op->ytype - //---------------------------------------------------------- + //-------------------------------------------------------------- + // a = A(iA,jA), typecasted to op->xtype + //-------------------------------------------------------------- - int64_t iB = GBi_B (Bi, pB, bvlen) ; - if (!GB_B_ISO) + int64_t iA = GBi_A (Ai, pA, avlen) ; + int64_t iAblock = iA * bvlen ; + if (!GB_A_ISO) { - GB_GETB (b, Bx, pB, false) ; + GB_GETA (a, Ax, pA, false) ; } - //---------------------------------------------------------- - // C(iC,jC) = A(iA,jA) * B(iB,jB) - //---------------------------------------------------------- + //-------------------------------------------------------------- + // for all entries in B(:,jB), skipping entries for 1st vector + //-------------------------------------------------------------- - if (!GB_C_IS_FULL) - { - // save the row index iC - // Ci [pC] = iAblock + iB ; - GB_ISET (Ci, pC, iAblock + iB) ; - } - // Cx [pC] = op (a, b) - if (!GB_C_ISO) + // scan B(:,jB), skipping to the first entry of C if this is + // the first time B is accessed in this task + int64_t pB = pB_start + pB_delta ; + pB_delta = 0 ; + for ( ; pB < pB_end && pC < pC_end ; pB++) { - GB_KRONECKER_OP (Cx, pC, a, iA, jA, b, iB, jB) ; + + //---------------------------------------------------------- + // b = B(iB,jB), typecasted to op->ytype + //---------------------------------------------------------- + + int64_t iB = GBi_B (Bi, pB, bvlen) ; + if (!GB_B_ISO) + { + GB_GETB (b, Bx, pB, false) ; + } + + //---------------------------------------------------------- + // C(iC,jC) = A(iA,jA) * B(iB,jB) + //---------------------------------------------------------- + + if (!GB_C_IS_FULL) + { + // save the row index iC + // Ci [pC] = iAblock + iB ; + GB_ISET (Ci, pC, iAblock + iB) ; + } + // Cx [pC] = op (a, b) + if (!GB_C_ISO) + { + GB_KRONECKER_OP (Cx, pC, a, iA, jA, b, iB, jB) ; + } + pC++ ; } - pC++ ; } } } } } - diff --git a/Source/mask/GB_mask.h b/Source/mask/GB_mask.h index a5286fc665..0c2b5b113f 100644 --- a/Source/mask/GB_mask.h +++ b/Source/mask/GB_mask.h @@ -54,6 +54,7 @@ GrB_Info GB_masker_phase1 // count nnz in each R(:,j) const int64_t *restrict R_to_Z, const bool Rp_is_32, const bool Rj_is_32, + const int R_sparsity, // GxB_SPARSE or GxB_HYPERSPARSE // original input: const GrB_Matrix M, // required mask const bool Mask_comp, // if true, then M is complemented @@ -85,7 +86,7 @@ GrB_Info GB_masker_phase2 // phase2 for R = masker (C,M,Z) const bool Rp_is_32, const bool Rj_is_32, const bool Ri_is_32, - const int R_sparsity, + const int R_sparsity, // any sparsity format // original input: const GrB_Matrix M, // required mask const bool Mask_comp, // if true, then M is complemented diff --git a/Source/mask/GB_masker.c b/Source/mask/GB_masker.c index 8fe8cc2d79..eb1eaf546f 100644 --- a/Source/mask/GB_masker.c +++ b/Source/mask/GB_masker.c @@ -208,7 +208,7 @@ GrB_Info GB_masker // R = masker (C, M, Z) // from phase1a: TaskList, R_ntasks, R_nthreads, // from phase0: - Rnvec, Rh, R_to_M, R_to_C, R_to_Z, Rp_is_32, Rj_is_32, + Rnvec, Rh, R_to_M, R_to_C, R_to_Z, Rp_is_32, Rj_is_32, R_sparsity, // original input: M, Mask_comp, Mask_struct, C, Z, Werk) ; if (info != GrB_SUCCESS) diff --git a/Source/mask/GB_masker_phase1.c b/Source/mask/GB_masker_phase1.c index 5e23d999b5..c7c6a0eaf9 100644 --- a/Source/mask/GB_masker_phase1.c +++ b/Source/mask/GB_masker_phase1.c @@ -43,6 +43,7 @@ GrB_Info GB_masker_phase1 // count nnz in each R(:,j) const int64_t *restrict R_to_Z, const bool Rp_is_32, const bool Rj_is_32, + const int R_sparsity, // GxB_SPARSE or GxB_HYPERSPARSE // original input: const GrB_Matrix M, // required mask const bool Mask_comp, // if true, then M is complemented @@ -60,6 +61,7 @@ GrB_Info GB_masker_phase1 // count nnz in each R(:,j) ASSERT (Rp_handle != NULL) ; ASSERT (Rp_size_handle != NULL) ; ASSERT (Rnvec_nonempty != NULL) ; + ASSERT (R_sparsity == GxB_SPARSE || R_sparsity == GxB_HYPERSPARSE) ; ASSERT_MATRIX_OK (M, "M for mask phase1", GB0) ; ASSERT (!GB_ZOMBIES (M)) ; @@ -109,7 +111,7 @@ GrB_Info GB_masker_phase1 // count nnz in each R(:,j) R_ntasks, // # of tasks R_nthreads, // # of threads to use // analysis from phase0: - Rnvec, Rh, R_to_M, R_to_C, R_to_Z, Rp_is_32, Rj_is_32, + Rnvec, Rh, R_to_M, R_to_C, R_to_Z, Rp_is_32, Rj_is_32, R_sparsity, // original input: M, Mask_comp, Mask_struct, C, Z) ; diff --git a/Source/mask/GB_masker_phase2.c b/Source/mask/GB_masker_phase2.c index f7b2e4b93e..52a01aaa6a 100644 --- a/Source/mask/GB_masker_phase2.c +++ b/Source/mask/GB_masker_phase2.c @@ -65,7 +65,7 @@ GrB_Info GB_masker_phase2 // phase2 for R = masker (C,M,Z) const bool Rp_is_32, const bool Rj_is_32, const bool Ri_is_32, - const int R_sparsity, + const int R_sparsity, // any sparsity format // original input: const GrB_Matrix M, // required mask const bool Mask_comp, // if true, then M is complemented @@ -243,7 +243,8 @@ GrB_Info GB_masker_phase2 // phase2 for R = masker (C,M,Z) //---------------------------------------------------------------------- info = GB_masker_phase2_jit (R, TaskList, R_ntasks, R_nthreads, - R_to_M, R_to_C, R_to_Z, M, Mask_comp, Mask_struct, C, Z, + R_to_M, R_to_C, R_to_Z, R_sparsity, + M, Mask_comp, Mask_struct, C, Z, C_ek_slicing, C_ntasks, C_nthreads, M_ek_slicing, M_ntasks, M_nthreads) ; diff --git a/Test/test226.m b/Test/test226.m index 41f77fd3d3..4778425a8a 100644 --- a/Test/test226.m +++ b/Test/test226.m @@ -9,6 +9,8 @@ A.matrix = sprand (5, 10, 0.4) ; B.matrix = ones (3, 2) ; B.iso = true ; +M.matrix = sprandn (15, 20,0.2) ~= 0 ; +MT.matrix = sprandn (9, 4, 20,0.2) ~= 0 ; mult.opname = 'times' ; mult.optype = 'double' ; @@ -18,14 +20,26 @@ C2 = GB_spec_kron (Cin, [ ], [ ], mult, A, B, [ ]) ; GB_spec_compare (C1, C2) ; +C1 = GB_mex_kron (Cin, M, [ ], mult, A, B, [ ]) ; +C2 = GB_spec_kron (Cin, M, [ ], mult, A, B, [ ]) ; +GB_spec_compare (C1, C2) ; + C1 = GB_mex_kron (Cin, [ ], [ ], mult, B, A, [ ]) ; C2 = GB_spec_kron (Cin, [ ], [ ], mult, B, A, [ ]) ; GB_spec_compare (C1, C2) ; +C1 = GB_mex_kron (Cin, M, [ ], mult, B, A, [ ]) ; +C2 = GB_spec_kron (Cin, M, [ ], mult, B, A, [ ]) ; +GB_spec_compare (C1, C2) ; + Cin = sparse (9, 4) ; C1 = GB_mex_kron (Cin, [ ], [ ], mult, B, B, [ ]) ; C2 = GB_spec_kron (Cin, [ ], [ ], mult, B, B, [ ]) ; GB_spec_compare (C1, C2) ; +C1 = GB_mex_kron (Cin, MT, [ ], mult, B, B, [ ]) ; +C2 = GB_spec_kron (Cin, MT, [ ], mult, B, B, [ ]) ; +GB_spec_compare (C1, C2) ; + fprintf ('\ntest226: all tests passed\n') ; diff --git a/Test/test227.m b/Test/test227.m index 98e75da2e2..50dd0153d1 100644 --- a/Test/test227.m +++ b/Test/test227.m @@ -16,6 +16,16 @@ dnt = struct ( 'inp1', 'tran' ) ; dtt = struct ( 'inp0', 'tran', 'inp1', 'tran' ) ; +dnn.mask = 'default' ; +dtn.mask = 'default' ; +dnt.mask = 'default' ; +dtt.mask = 'default' ; + +dnn.outp = 'default' ; +dtn.outp = 'default' ; +dnt.outp = 'default' ; +dtt.outp = 'default' ; + types = { 'int32', 'int64', 'single', 'double' } ; am = 5 ; @@ -23,13 +33,18 @@ bm = 4 ; bn = 2 ; -Ax = sparse (100 * sprandn (am,an, 0.5)) ; -Bx = sparse (100 * sprandn (bm,bn, 0.5)) ; +Ax_temp = 100 * sprandn (am, an, 0.5); +Bx_temp = 100 * sprandn (bm, bn, 0.5); + +Ax = sparse(round(Ax_temp)); +Bx = sparse(round(Bx_temp)); + cm = am * bm ; cn = an * bn ; Cx = sparse (cm,cn) ; -AT = Ax' ; -BT = Bx' ; +Maskmat = sprandn (cm,cn,0.2) ~= 0 ; +ATmat = Ax' ; +BTmat = Bx' ; for k2 = [4 7 45:52 ] for k1 = 1:4 @@ -64,9 +79,19 @@ B.is_csc = B_is_csc ; clear C - C.matrix = Cx ; + C.matrix = sparse (cm,cn) ; C.is_csc = C_is_csc ; + clear AT + AT.matrix = ATmat ; + AT.is_hyper = A_is_hyper ; + AT.is_csc = A.is_csc ; + + clear BT + BT.matrix = BTmat ; + BT.is_hyper = B_is_hyper ; + BT.is_csc = B.is_csc ; + %--------------------------------------- % kron(A,B) %--------------------------------------- @@ -103,6 +128,44 @@ C1 = GB_mex_kron (C, [ ], [ ], op, AT, BT, dtt) ; GB_spec_compare (C0, C1) ; + % tests with Mask + for Mask_is_hyper = 0:1 + for Mask_is_csc = 0:1 + fprintf('*') + + A.is_csc = A_is_csc ; + B.is_csc = B_is_csc ; + A.is_hyper = A_is_hyper ; + B.is_hyper = B_is_hyper ; + + clear M + M.matrix = Maskmat ; + M.is_hyper = Mask_is_hyper ; + M.is_csc = Mask_is_csc; + C.is_csc = C_is_csc ; + + % kron(A, B) with Mask + C0 = GB_spec_kron (C, M, [ ], op, A, B, dnn) ; + fprintf('#') ; + C1 = GB_mex_kron (C, M, [ ], op, A, B, dnn) ; + GB_spec_compare(C0, C1) ; + + % kron(A', B) with Mask + C0 = GB_spec_kron (C, M, [ ], op, AT, B, dtn) ; + C1 = GB_mex_kron (C, M, [ ], op, AT, B, dtn) ; + GB_spec_compare (C0, C1) ; + + % kron(A, B') with Mask + C0 = GB_spec_kron (C, M, [ ], op, A, BT, dnt) ; + C1 = GB_mex_kron (C, M, [ ], op, A, BT, dnt) ; + GB_spec_compare (C0, C1) ; + + % kron(A', B') with Mask + C0 = GB_spec_kron (C, M, [ ], op, AT, BT, dtt) ; + C1 = GB_mex_kron (C, M, [ ], op, AT, BT, dtt) ; + GB_spec_compare (C0, C1) ; + end + end end end end diff --git a/cmake_modules/GraphBLAS_version.cmake b/cmake_modules/GraphBLAS_version.cmake index a9208bb857..b2be1d004c 100644 --- a/cmake_modules/GraphBLAS_version.cmake +++ b/cmake_modules/GraphBLAS_version.cmake @@ -8,10 +8,10 @@ #------------------------------------------------------------------------------- # version of SuiteSparse:GraphBLAS -set ( GraphBLAS_DATE "Dec 3, 2025" ) +set ( GraphBLAS_DATE "Jan 21, 2026" ) set ( GraphBLAS_VERSION_MAJOR 10 CACHE STRING "" FORCE ) set ( GraphBLAS_VERSION_MINOR 3 CACHE STRING "" FORCE ) -set ( GraphBLAS_VERSION_SUB 0 CACHE STRING "" FORCE ) +set ( GraphBLAS_VERSION_SUB 1 CACHE STRING "" FORCE ) # GraphBLAS C API Specification version, at graphblas.org set ( GraphBLAS_API_DATE "Dec 22, 2023" )