1be1d678aSKris Buschelman 2d50806bdSBarry Smith /* 32499ec78SHong Zhang Defines matrix-matrix product routines for pairs of SeqAIJ matrices 4d50806bdSBarry Smith C = A * B 5d50806bdSBarry Smith */ 6d50806bdSBarry Smith 7c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 8c6db04a5SJed Brown #include <../src/mat/utils/freespace.h> 9c6db04a5SJed Brown #include <petscbt.h> 10af0996ceSBarry Smith #include <petsc/private/isimpl.h> 1107475bc1SBarry Smith #include <../src/mat/impls/dense/seq/dense.h> 127bab7c10SHong Zhang 139371c9d4SSatish Balay PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A, Mat B, Mat C) { 14df97dc6dSFande Kong PetscFunctionBegin; 15dbbe0bcdSBarry Smith if (C->ops->matmultnumeric) PetscCall((*C->ops->matmultnumeric)(A, B, C)); 16dbbe0bcdSBarry Smith else PetscCall(MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted(A, B, C)); 17df97dc6dSFande Kong PetscFunctionReturn(0); 18df97dc6dSFande Kong } 19df97dc6dSFande Kong 204222ddf1SHong Zhang /* Modified from MatCreateSeqAIJWithArrays() */ 219371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatSetSeqAIJWithArrays_private(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], MatType mtype, Mat mat) { 224222ddf1SHong Zhang PetscInt ii; 234222ddf1SHong Zhang Mat_SeqAIJ *aij; 24cbc6b225SStefano Zampini PetscBool isseqaij, osingle, ofree_a, ofree_ij; 255c66b693SKris Buschelman 265c66b693SKris Buschelman PetscFunctionBegin; 27aed4548fSBarry Smith PetscCheck(m <= 0 || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 289566063dSJacob Faibussowitsch PetscCall(MatSetSizes(mat, m, n, m, n)); 294222ddf1SHong Zhang 30e4e71118SStefano Zampini if (!mtype) { 319566063dSJacob Faibussowitsch PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat, MATSEQAIJ, &isseqaij)); 329566063dSJacob Faibussowitsch if (!isseqaij) PetscCall(MatSetType(mat, MATSEQAIJ)); 33e4e71118SStefano Zampini } else { 349566063dSJacob Faibussowitsch PetscCall(MatSetType(mat, mtype)); 35e4e71118SStefano Zampini } 36cbc6b225SStefano Zampini 374222ddf1SHong Zhang aij = (Mat_SeqAIJ *)(mat)->data; 38cbc6b225SStefano Zampini osingle = aij->singlemalloc; 39cbc6b225SStefano Zampini ofree_a = aij->free_a; 40cbc6b225SStefano Zampini ofree_ij = aij->free_ij; 41cbc6b225SStefano Zampini /* changes the free flags */ 429566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(mat, MAT_SKIP_ALLOCATION, NULL)); 43cbc6b225SStefano Zampini 449566063dSJacob Faibussowitsch PetscCall(PetscFree(aij->ilen)); 459566063dSJacob Faibussowitsch PetscCall(PetscFree(aij->imax)); 469566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m, &aij->imax)); 479566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m, &aij->ilen)); 48cbc6b225SStefano Zampini for (ii = 0, aij->nonzerorowcnt = 0, aij->rmax = 0; ii < m; ii++) { 49cbc6b225SStefano Zampini const PetscInt rnz = i[ii + 1] - i[ii]; 50cbc6b225SStefano Zampini aij->nonzerorowcnt += !!rnz; 51cbc6b225SStefano Zampini aij->rmax = PetscMax(aij->rmax, rnz); 52cbc6b225SStefano Zampini aij->ilen[ii] = aij->imax[ii] = i[ii + 1] - i[ii]; 53cbc6b225SStefano Zampini } 54cbc6b225SStefano Zampini aij->maxnz = i[m]; 55cbc6b225SStefano Zampini aij->nz = i[m]; 564222ddf1SHong Zhang 57cbc6b225SStefano Zampini if (osingle) { 589566063dSJacob Faibussowitsch PetscCall(PetscFree3(aij->a, aij->j, aij->i)); 59cbc6b225SStefano Zampini } else { 609566063dSJacob Faibussowitsch if (ofree_a) PetscCall(PetscFree(aij->a)); 619566063dSJacob Faibussowitsch if (ofree_ij) PetscCall(PetscFree(aij->j)); 629566063dSJacob Faibussowitsch if (ofree_ij) PetscCall(PetscFree(aij->i)); 63cbc6b225SStefano Zampini } 644222ddf1SHong Zhang aij->i = i; 654222ddf1SHong Zhang aij->j = j; 664222ddf1SHong Zhang aij->a = a; 674222ddf1SHong Zhang aij->nonew = -1; /* this indicates that inserting a new value in the matrix that generates a new nonzero is an error */ 68cbc6b225SStefano Zampini /* default to not retain ownership */ 69cbc6b225SStefano Zampini aij->singlemalloc = PETSC_FALSE; 704222ddf1SHong Zhang aij->free_a = PETSC_FALSE; 714222ddf1SHong Zhang aij->free_ij = PETSC_FALSE; 729566063dSJacob Faibussowitsch PetscCall(MatCheckCompressedRow(mat, aij->nonzerorowcnt, &aij->compressedrow, aij->i, m, 0.6)); 735c66b693SKris Buschelman PetscFunctionReturn(0); 745c66b693SKris Buschelman } 751c24bd37SHong Zhang 769371c9d4SSatish Balay PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A, Mat B, PetscReal fill, Mat C) { 774222ddf1SHong Zhang Mat_Product *product = C->product; 784222ddf1SHong Zhang MatProductAlgorithm alg; 794222ddf1SHong Zhang PetscBool flg; 804222ddf1SHong Zhang 814222ddf1SHong Zhang PetscFunctionBegin; 824222ddf1SHong Zhang if (product) { 834222ddf1SHong Zhang alg = product->alg; 844222ddf1SHong Zhang } else { 854222ddf1SHong Zhang alg = "sorted"; 864222ddf1SHong Zhang } 874222ddf1SHong Zhang /* sorted */ 889566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(alg, "sorted", &flg)); 894222ddf1SHong Zhang if (flg) { 909566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_Sorted(A, B, fill, C)); 914222ddf1SHong Zhang PetscFunctionReturn(0); 924222ddf1SHong Zhang } 934222ddf1SHong Zhang 944222ddf1SHong Zhang /* scalable */ 959566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(alg, "scalable", &flg)); 964222ddf1SHong Zhang if (flg) { 979566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(A, B, fill, C)); 984222ddf1SHong Zhang PetscFunctionReturn(0); 994222ddf1SHong Zhang } 1004222ddf1SHong Zhang 1014222ddf1SHong Zhang /* scalable_fast */ 1029566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(alg, "scalable_fast", &flg)); 1034222ddf1SHong Zhang if (flg) { 1049566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(A, B, fill, C)); 1054222ddf1SHong Zhang PetscFunctionReturn(0); 1064222ddf1SHong Zhang } 1074222ddf1SHong Zhang 1084222ddf1SHong Zhang /* heap */ 1099566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(alg, "heap", &flg)); 1104222ddf1SHong Zhang if (flg) { 1119566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(A, B, fill, C)); 1124222ddf1SHong Zhang PetscFunctionReturn(0); 1134222ddf1SHong Zhang } 1144222ddf1SHong Zhang 1154222ddf1SHong Zhang /* btheap */ 1169566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(alg, "btheap", &flg)); 1174222ddf1SHong Zhang if (flg) { 1189566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(A, B, fill, C)); 1194222ddf1SHong Zhang PetscFunctionReturn(0); 1204222ddf1SHong Zhang } 1214222ddf1SHong Zhang 1224222ddf1SHong Zhang /* llcondensed */ 1239566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(alg, "llcondensed", &flg)); 1244222ddf1SHong Zhang if (flg) { 1259566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(A, B, fill, C)); 1264222ddf1SHong Zhang PetscFunctionReturn(0); 1274222ddf1SHong Zhang } 1284222ddf1SHong Zhang 1294222ddf1SHong Zhang /* rowmerge */ 1309566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(alg, "rowmerge", &flg)); 1314222ddf1SHong Zhang if (flg) { 1329566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_RowMerge(A, B, fill, C)); 1334222ddf1SHong Zhang PetscFunctionReturn(0); 1344222ddf1SHong Zhang } 1354222ddf1SHong Zhang 1364222ddf1SHong Zhang #if defined(PETSC_HAVE_HYPRE) 1379566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(alg, "hypre", &flg)); 1384222ddf1SHong Zhang if (flg) { 1399566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_AIJ_AIJ_wHYPRE(A, B, fill, C)); 1404222ddf1SHong Zhang PetscFunctionReturn(0); 1414222ddf1SHong Zhang } 1424222ddf1SHong Zhang #endif 1434222ddf1SHong Zhang 1444222ddf1SHong Zhang SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product Algorithm is not supported"); 1454222ddf1SHong Zhang } 1464222ddf1SHong Zhang 1479371c9d4SSatish Balay PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(Mat A, Mat B, PetscReal fill, Mat C) { 148b561aa9dSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)B->data, *c; 149971236abSHong Zhang PetscInt *ai = a->i, *bi = b->i, *ci, *cj; 150c1ba5575SJed Brown PetscInt am = A->rmap->N, bn = B->cmap->N, bm = B->rmap->N; 151b561aa9dSHong Zhang PetscReal afill; 152eca6b86aSHong Zhang PetscInt i, j, anzi, brow, bnzj, cnzi, *bj, *aj, *lnk, ndouble = 0, Crmax; 153eca6b86aSHong Zhang PetscTable ta; 154fb908031SHong Zhang PetscBT lnkbt; 1550298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 156b561aa9dSHong Zhang 157b561aa9dSHong Zhang PetscFunctionBegin; 158bd958071SHong Zhang /* Get ci and cj */ 159bd958071SHong Zhang /*---------------*/ 160fb908031SHong Zhang /* Allocate ci array, arrays for fill computation and */ 161fb908031SHong Zhang /* free space for accumulating nonzero column info */ 1629566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 2, &ci)); 163fb908031SHong Zhang ci[0] = 0; 164fb908031SHong Zhang 165fb908031SHong Zhang /* create and initialize a linked list */ 1669566063dSJacob Faibussowitsch PetscCall(PetscTableCreate(bn, bn, &ta)); 167c373ccc6SHong Zhang MatRowMergeMax_SeqAIJ(b, bm, ta); 1689566063dSJacob Faibussowitsch PetscCall(PetscTableGetCount(ta, &Crmax)); 1699566063dSJacob Faibussowitsch PetscCall(PetscTableDestroy(&ta)); 170eca6b86aSHong Zhang 1719566063dSJacob Faibussowitsch PetscCall(PetscLLCondensedCreate(Crmax, bn, &lnk, &lnkbt)); 172fb908031SHong Zhang 173fb908031SHong Zhang /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 1749566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, PetscIntSumTruncate(ai[am], bi[bm])), &free_space)); 1752205254eSKarl Rupp 176fb908031SHong Zhang current_space = free_space; 177fb908031SHong Zhang 178fb908031SHong Zhang /* Determine ci and cj */ 179fb908031SHong Zhang for (i = 0; i < am; i++) { 180fb908031SHong Zhang anzi = ai[i + 1] - ai[i]; 181fb908031SHong Zhang aj = a->j + ai[i]; 182fb908031SHong Zhang for (j = 0; j < anzi; j++) { 183fb908031SHong Zhang brow = aj[j]; 184fb908031SHong Zhang bnzj = bi[brow + 1] - bi[brow]; 185fb908031SHong Zhang bj = b->j + bi[brow]; 186fb908031SHong Zhang /* add non-zero cols of B into the sorted linked list lnk */ 1879566063dSJacob Faibussowitsch PetscCall(PetscLLCondensedAddSorted(bnzj, bj, lnk, lnkbt)); 188fb908031SHong Zhang } 1898c78258cSHong Zhang /* add possible missing diagonal entry */ 190*48a46eb9SPierre Jolivet if (C->force_diagonals) PetscCall(PetscLLCondensedAddSorted(1, &i, lnk, lnkbt)); 191fb908031SHong Zhang cnzi = lnk[0]; 192fb908031SHong Zhang 193fb908031SHong Zhang /* If free space is not available, make more free space */ 194fb908031SHong Zhang /* Double the amount of total space in the list */ 195fb908031SHong Zhang if (current_space->local_remaining < cnzi) { 1969566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), ¤t_space)); 197fb908031SHong Zhang ndouble++; 198fb908031SHong Zhang } 199fb908031SHong Zhang 200fb908031SHong Zhang /* Copy data into free space, then initialize lnk */ 2019566063dSJacob Faibussowitsch PetscCall(PetscLLCondensedClean(bn, cnzi, current_space->array, lnk, lnkbt)); 2022205254eSKarl Rupp 203fb908031SHong Zhang current_space->array += cnzi; 204fb908031SHong Zhang current_space->local_used += cnzi; 205fb908031SHong Zhang current_space->local_remaining -= cnzi; 2062205254eSKarl Rupp 207fb908031SHong Zhang ci[i + 1] = ci[i] + cnzi; 208fb908031SHong Zhang } 209fb908031SHong Zhang 210fb908031SHong Zhang /* Column indices are in the list of free space */ 211fb908031SHong Zhang /* Allocate space for cj, initialize cj, and */ 212fb908031SHong Zhang /* destroy list of free space and other temporary array(s) */ 2139566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ci[am] + 1, &cj)); 2149566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, cj)); 2159566063dSJacob Faibussowitsch PetscCall(PetscLLCondensedDestroy(lnk, lnkbt)); 216b561aa9dSHong Zhang 21726be0446SHong Zhang /* put together the new symbolic matrix */ 2189566063dSJacob Faibussowitsch PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), am, bn, ci, cj, NULL, ((PetscObject)A)->type_name, C)); 2199566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(C, A, B)); 22058c24d83SHong Zhang 22158c24d83SHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 22258c24d83SHong Zhang /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 2234222ddf1SHong Zhang c = (Mat_SeqAIJ *)(C->data); 224aa1aec99SHong Zhang c->free_a = PETSC_FALSE; 225e6b907acSBarry Smith c->free_ij = PETSC_TRUE; 22658c24d83SHong Zhang c->nonew = 0; 2274222ddf1SHong Zhang 2284222ddf1SHong Zhang /* fast, needs non-scalable O(bn) array 'abdense' */ 2294222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 2300b7e3e3dSHong Zhang 2317212da7cSHong Zhang /* set MatInfo */ 2327212da7cSHong Zhang afill = (PetscReal)ci[am] / (ai[am] + bi[bm]) + 1.e-5; 233f2b054eeSHong Zhang if (afill < 1.0) afill = 1.0; 2344222ddf1SHong Zhang C->info.mallocs = ndouble; 2354222ddf1SHong Zhang C->info.fill_ratio_given = fill; 2364222ddf1SHong Zhang C->info.fill_ratio_needed = afill; 2377212da7cSHong Zhang 2387212da7cSHong Zhang #if defined(PETSC_USE_INFO) 2397212da7cSHong Zhang if (ci[am]) { 2409566063dSJacob Faibussowitsch PetscCall(PetscInfo(C, "Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n", ndouble, (double)fill, (double)afill)); 2419566063dSJacob Faibussowitsch PetscCall(PetscInfo(C, "Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n", (double)afill)); 242f2b054eeSHong Zhang } else { 2439566063dSJacob Faibussowitsch PetscCall(PetscInfo(C, "Empty matrix product\n")); 244be0fcf8dSHong Zhang } 245f2b054eeSHong Zhang #endif 24658c24d83SHong Zhang PetscFunctionReturn(0); 24758c24d83SHong Zhang } 248d50806bdSBarry Smith 2499371c9d4SSatish Balay PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted(Mat A, Mat B, Mat C) { 250d13dce4bSSatish Balay PetscLogDouble flops = 0.0; 251d50806bdSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 252d50806bdSBarry Smith Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 253d50806bdSBarry Smith Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 25438baddfdSBarry Smith PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bjj, *ci = c->i, *cj = c->j; 255c58ca2e3SHong Zhang PetscInt am = A->rmap->n, cm = C->rmap->n; 256519eb980SHong Zhang PetscInt i, j, k, anzi, bnzi, cnzi, brow; 2572e5835c6SStefano Zampini PetscScalar *ca, valtmp; 258aa1aec99SHong Zhang PetscScalar *ab_dense; 2596718818eSStefano Zampini PetscContainer cab_dense; 2602e5835c6SStefano Zampini const PetscScalar *aa, *ba, *baj; 261d50806bdSBarry Smith 262d50806bdSBarry Smith PetscFunctionBegin; 2639566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 2649566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(B, &ba)); 265aa1aec99SHong Zhang if (!c->a) { /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */ 2669566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ci[cm] + 1, &ca)); 267aa1aec99SHong Zhang c->a = ca; 268aa1aec99SHong Zhang c->free_a = PETSC_TRUE; 2696718818eSStefano Zampini } else ca = c->a; 2706718818eSStefano Zampini 2716718818eSStefano Zampini /* TODO this should be done in the symbolic phase */ 2726718818eSStefano Zampini /* However, this function is so heavily used (sometimes in an hidden way through multnumeric function pointers 2736718818eSStefano Zampini that is hard to eradicate) */ 2749566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)C, "__PETSc__ab_dense", (PetscObject *)&cab_dense)); 2756718818eSStefano Zampini if (!cab_dense) { 2769566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(B->cmap->N, &ab_dense)); 2779566063dSJacob Faibussowitsch PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &cab_dense)); 2789566063dSJacob Faibussowitsch PetscCall(PetscContainerSetPointer(cab_dense, ab_dense)); 2799566063dSJacob Faibussowitsch PetscCall(PetscContainerSetUserDestroy(cab_dense, PetscContainerUserDestroyDefault)); 2809566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)C, "__PETSc__ab_dense", (PetscObject)cab_dense)); 2819566063dSJacob Faibussowitsch PetscCall(PetscObjectDereference((PetscObject)cab_dense)); 282d90d86d0SMatthew G. Knepley } 2839566063dSJacob Faibussowitsch PetscCall(PetscContainerGetPointer(cab_dense, (void **)&ab_dense)); 2849566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ab_dense, B->cmap->N)); 285aa1aec99SHong Zhang 286c124e916SHong Zhang /* clean old values in C */ 2879566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ca, ci[cm])); 288d50806bdSBarry Smith /* Traverse A row-wise. */ 289d50806bdSBarry Smith /* Build the ith row in C by summing over nonzero columns in A, */ 290d50806bdSBarry Smith /* the rows of B corresponding to nonzeros of A. */ 291d50806bdSBarry Smith for (i = 0; i < am; i++) { 292d50806bdSBarry Smith anzi = ai[i + 1] - ai[i]; 293d50806bdSBarry Smith for (j = 0; j < anzi; j++) { 294519eb980SHong Zhang brow = aj[j]; 295d50806bdSBarry Smith bnzi = bi[brow + 1] - bi[brow]; 296d50806bdSBarry Smith bjj = bj + bi[brow]; 297d50806bdSBarry Smith baj = ba + bi[brow]; 298519eb980SHong Zhang /* perform dense axpy */ 29936ec6d2dSHong Zhang valtmp = aa[j]; 3009371c9d4SSatish Balay for (k = 0; k < bnzi; k++) { ab_dense[bjj[k]] += valtmp * baj[k]; } 301519eb980SHong Zhang flops += 2 * bnzi; 302519eb980SHong Zhang } 3039371c9d4SSatish Balay aj += anzi; 3049371c9d4SSatish Balay aa += anzi; 305c58ca2e3SHong Zhang 306c58ca2e3SHong Zhang cnzi = ci[i + 1] - ci[i]; 307519eb980SHong Zhang for (k = 0; k < cnzi; k++) { 308519eb980SHong Zhang ca[k] += ab_dense[cj[k]]; 309519eb980SHong Zhang ab_dense[cj[k]] = 0.0; /* zero ab_dense */ 310519eb980SHong Zhang } 311519eb980SHong Zhang flops += cnzi; 3129371c9d4SSatish Balay cj += cnzi; 3139371c9d4SSatish Balay ca += cnzi; 314519eb980SHong Zhang } 3152e5835c6SStefano Zampini #if defined(PETSC_HAVE_DEVICE) 3162e5835c6SStefano Zampini if (C->offloadmask != PETSC_OFFLOAD_UNALLOCATED) C->offloadmask = PETSC_OFFLOAD_CPU; 3172e5835c6SStefano Zampini #endif 3189566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 3199566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 3209566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(flops)); 3219566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 3229566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(B, &ba)); 323c58ca2e3SHong Zhang PetscFunctionReturn(0); 324c58ca2e3SHong Zhang } 325c58ca2e3SHong Zhang 3269371c9d4SSatish Balay PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable(Mat A, Mat B, Mat C) { 327c58ca2e3SHong Zhang PetscLogDouble flops = 0.0; 328c58ca2e3SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 329c58ca2e3SHong Zhang Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 330c58ca2e3SHong Zhang Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 331c58ca2e3SHong Zhang PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bjj, *ci = c->i, *cj = c->j; 332c58ca2e3SHong Zhang PetscInt am = A->rmap->N, cm = C->rmap->N; 333c58ca2e3SHong Zhang PetscInt i, j, k, anzi, bnzi, cnzi, brow; 3342e5835c6SStefano Zampini PetscScalar *ca = c->a, valtmp; 3352e5835c6SStefano Zampini const PetscScalar *aa, *ba, *baj; 336c58ca2e3SHong Zhang PetscInt nextb; 337c58ca2e3SHong Zhang 338c58ca2e3SHong Zhang PetscFunctionBegin; 3399566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(A, &aa)); 3409566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(B, &ba)); 341cf742fccSHong Zhang if (!ca) { /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */ 3429566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ci[cm] + 1, &ca)); 343cf742fccSHong Zhang c->a = ca; 344cf742fccSHong Zhang c->free_a = PETSC_TRUE; 345cf742fccSHong Zhang } 346cf742fccSHong Zhang 347c58ca2e3SHong Zhang /* clean old values in C */ 3489566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ca, ci[cm])); 349c58ca2e3SHong Zhang /* Traverse A row-wise. */ 350c58ca2e3SHong Zhang /* Build the ith row in C by summing over nonzero columns in A, */ 351c58ca2e3SHong Zhang /* the rows of B corresponding to nonzeros of A. */ 352519eb980SHong Zhang for (i = 0; i < am; i++) { 353519eb980SHong Zhang anzi = ai[i + 1] - ai[i]; 354519eb980SHong Zhang cnzi = ci[i + 1] - ci[i]; 355519eb980SHong Zhang for (j = 0; j < anzi; j++) { 356519eb980SHong Zhang brow = aj[j]; 357519eb980SHong Zhang bnzi = bi[brow + 1] - bi[brow]; 358519eb980SHong Zhang bjj = bj + bi[brow]; 359519eb980SHong Zhang baj = ba + bi[brow]; 360519eb980SHong Zhang /* perform sparse axpy */ 36136ec6d2dSHong Zhang valtmp = aa[j]; 362c124e916SHong Zhang nextb = 0; 363c124e916SHong Zhang for (k = 0; nextb < bnzi; k++) { 364c124e916SHong Zhang if (cj[k] == bjj[nextb]) { /* ccol == bcol */ 36536ec6d2dSHong Zhang ca[k] += valtmp * baj[nextb++]; 366c124e916SHong Zhang } 367d50806bdSBarry Smith } 368d50806bdSBarry Smith flops += 2 * bnzi; 369d50806bdSBarry Smith } 3709371c9d4SSatish Balay aj += anzi; 3719371c9d4SSatish Balay aa += anzi; 3729371c9d4SSatish Balay cj += cnzi; 3739371c9d4SSatish Balay ca += cnzi; 374d50806bdSBarry Smith } 3752e5835c6SStefano Zampini #if defined(PETSC_HAVE_DEVICE) 3762e5835c6SStefano Zampini if (C->offloadmask != PETSC_OFFLOAD_UNALLOCATED) C->offloadmask = PETSC_OFFLOAD_CPU; 3772e5835c6SStefano Zampini #endif 3789566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 3799566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 3809566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(flops)); 3819566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(A, &aa)); 3829566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(B, &ba)); 383d50806bdSBarry Smith PetscFunctionReturn(0); 384d50806bdSBarry Smith } 385bc011b1eSHong Zhang 3869371c9d4SSatish Balay PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(Mat A, Mat B, PetscReal fill, Mat C) { 38725296bd5SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)B->data, *c; 38825296bd5SBarry Smith PetscInt *ai = a->i, *bi = b->i, *ci, *cj; 3893c50cad2SHong Zhang PetscInt am = A->rmap->N, bn = B->cmap->N, bm = B->rmap->N; 39025296bd5SBarry Smith MatScalar *ca; 39125296bd5SBarry Smith PetscReal afill; 392eca6b86aSHong Zhang PetscInt i, j, anzi, brow, bnzj, cnzi, *bj, *aj, *lnk, ndouble = 0, Crmax; 393eca6b86aSHong Zhang PetscTable ta; 3940298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 39525296bd5SBarry Smith 39625296bd5SBarry Smith PetscFunctionBegin; 3973c50cad2SHong Zhang /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_fast() */ 3983c50cad2SHong Zhang /*-----------------------------------------------------------------------------------------*/ 3993c50cad2SHong Zhang /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 4009566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 2, &ci)); 4013c50cad2SHong Zhang ci[0] = 0; 4023c50cad2SHong Zhang 4033c50cad2SHong Zhang /* create and initialize a linked list */ 4049566063dSJacob Faibussowitsch PetscCall(PetscTableCreate(bn, bn, &ta)); 405c373ccc6SHong Zhang MatRowMergeMax_SeqAIJ(b, bm, ta); 4069566063dSJacob Faibussowitsch PetscCall(PetscTableGetCount(ta, &Crmax)); 4079566063dSJacob Faibussowitsch PetscCall(PetscTableDestroy(&ta)); 408eca6b86aSHong Zhang 4099566063dSJacob Faibussowitsch PetscCall(PetscLLCondensedCreate_fast(Crmax, &lnk)); 4103c50cad2SHong Zhang 4113c50cad2SHong Zhang /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 4129566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, PetscIntSumTruncate(ai[am], bi[bm])), &free_space)); 4133c50cad2SHong Zhang current_space = free_space; 4143c50cad2SHong Zhang 4153c50cad2SHong Zhang /* Determine ci and cj */ 4163c50cad2SHong Zhang for (i = 0; i < am; i++) { 4173c50cad2SHong Zhang anzi = ai[i + 1] - ai[i]; 4183c50cad2SHong Zhang aj = a->j + ai[i]; 4193c50cad2SHong Zhang for (j = 0; j < anzi; j++) { 4203c50cad2SHong Zhang brow = aj[j]; 4213c50cad2SHong Zhang bnzj = bi[brow + 1] - bi[brow]; 4223c50cad2SHong Zhang bj = b->j + bi[brow]; 4233c50cad2SHong Zhang /* add non-zero cols of B into the sorted linked list lnk */ 4249566063dSJacob Faibussowitsch PetscCall(PetscLLCondensedAddSorted_fast(bnzj, bj, lnk)); 4253c50cad2SHong Zhang } 4268c78258cSHong Zhang /* add possible missing diagonal entry */ 427*48a46eb9SPierre Jolivet if (C->force_diagonals) PetscCall(PetscLLCondensedAddSorted_fast(1, &i, lnk)); 4283c50cad2SHong Zhang cnzi = lnk[1]; 4293c50cad2SHong Zhang 4303c50cad2SHong Zhang /* If free space is not available, make more free space */ 4313c50cad2SHong Zhang /* Double the amount of total space in the list */ 4323c50cad2SHong Zhang if (current_space->local_remaining < cnzi) { 4339566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), ¤t_space)); 4343c50cad2SHong Zhang ndouble++; 4353c50cad2SHong Zhang } 4363c50cad2SHong Zhang 4373c50cad2SHong Zhang /* Copy data into free space, then initialize lnk */ 4389566063dSJacob Faibussowitsch PetscCall(PetscLLCondensedClean_fast(cnzi, current_space->array, lnk)); 4392205254eSKarl Rupp 4403c50cad2SHong Zhang current_space->array += cnzi; 4413c50cad2SHong Zhang current_space->local_used += cnzi; 4423c50cad2SHong Zhang current_space->local_remaining -= cnzi; 4432205254eSKarl Rupp 4443c50cad2SHong Zhang ci[i + 1] = ci[i] + cnzi; 4453c50cad2SHong Zhang } 4463c50cad2SHong Zhang 4473c50cad2SHong Zhang /* Column indices are in the list of free space */ 4483c50cad2SHong Zhang /* Allocate space for cj, initialize cj, and */ 4493c50cad2SHong Zhang /* destroy list of free space and other temporary array(s) */ 4509566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ci[am] + 1, &cj)); 4519566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, cj)); 4529566063dSJacob Faibussowitsch PetscCall(PetscLLCondensedDestroy_fast(lnk)); 45325296bd5SBarry Smith 45425296bd5SBarry Smith /* Allocate space for ca */ 4559566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(ci[am] + 1, &ca)); 45625296bd5SBarry Smith 45725296bd5SBarry Smith /* put together the new symbolic matrix */ 4589566063dSJacob Faibussowitsch PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), am, bn, ci, cj, ca, ((PetscObject)A)->type_name, C)); 4599566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(C, A, B)); 46025296bd5SBarry Smith 46125296bd5SBarry Smith /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 46225296bd5SBarry Smith /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 4634222ddf1SHong Zhang c = (Mat_SeqAIJ *)(C->data); 46425296bd5SBarry Smith c->free_a = PETSC_TRUE; 46525296bd5SBarry Smith c->free_ij = PETSC_TRUE; 46625296bd5SBarry Smith c->nonew = 0; 4672205254eSKarl Rupp 4684222ddf1SHong Zhang /* slower, less memory */ 4694222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; 47025296bd5SBarry Smith 47125296bd5SBarry Smith /* set MatInfo */ 47225296bd5SBarry Smith afill = (PetscReal)ci[am] / (ai[am] + bi[bm]) + 1.e-5; 47325296bd5SBarry Smith if (afill < 1.0) afill = 1.0; 4744222ddf1SHong Zhang C->info.mallocs = ndouble; 4754222ddf1SHong Zhang C->info.fill_ratio_given = fill; 4764222ddf1SHong Zhang C->info.fill_ratio_needed = afill; 47725296bd5SBarry Smith 47825296bd5SBarry Smith #if defined(PETSC_USE_INFO) 47925296bd5SBarry Smith if (ci[am]) { 4809566063dSJacob Faibussowitsch PetscCall(PetscInfo(C, "Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n", ndouble, (double)fill, (double)afill)); 4819566063dSJacob Faibussowitsch PetscCall(PetscInfo(C, "Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n", (double)afill)); 48225296bd5SBarry Smith } else { 4839566063dSJacob Faibussowitsch PetscCall(PetscInfo(C, "Empty matrix product\n")); 48425296bd5SBarry Smith } 48525296bd5SBarry Smith #endif 48625296bd5SBarry Smith PetscFunctionReturn(0); 48725296bd5SBarry Smith } 48825296bd5SBarry Smith 4899371c9d4SSatish Balay PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(Mat A, Mat B, PetscReal fill, Mat C) { 490e9e4536cSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)B->data, *c; 491bf9555e6SHong Zhang PetscInt *ai = a->i, *bi = b->i, *ci, *cj; 49225c41797SHong Zhang PetscInt am = A->rmap->N, bn = B->cmap->N, bm = B->rmap->N; 493e9e4536cSHong Zhang MatScalar *ca; 494e9e4536cSHong Zhang PetscReal afill; 495eca6b86aSHong Zhang PetscInt i, j, anzi, brow, bnzj, cnzi, *bj, *aj, *lnk, ndouble = 0, Crmax; 496eca6b86aSHong Zhang PetscTable ta; 4970298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 498e9e4536cSHong Zhang 499e9e4536cSHong Zhang PetscFunctionBegin; 500bd958071SHong Zhang /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_Scalalbe() */ 501bd958071SHong Zhang /*---------------------------------------------------------------------------------------------*/ 502bd958071SHong Zhang /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 5039566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 2, &ci)); 504bd958071SHong Zhang ci[0] = 0; 505bd958071SHong Zhang 506bd958071SHong Zhang /* create and initialize a linked list */ 5079566063dSJacob Faibussowitsch PetscCall(PetscTableCreate(bn, bn, &ta)); 508c373ccc6SHong Zhang MatRowMergeMax_SeqAIJ(b, bm, ta); 5099566063dSJacob Faibussowitsch PetscCall(PetscTableGetCount(ta, &Crmax)); 5109566063dSJacob Faibussowitsch PetscCall(PetscTableDestroy(&ta)); 5119566063dSJacob Faibussowitsch PetscCall(PetscLLCondensedCreate_Scalable(Crmax, &lnk)); 512bd958071SHong Zhang 513bd958071SHong Zhang /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 5149566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, PetscIntSumTruncate(ai[am], bi[bm])), &free_space)); 515bd958071SHong Zhang current_space = free_space; 516bd958071SHong Zhang 517bd958071SHong Zhang /* Determine ci and cj */ 518bd958071SHong Zhang for (i = 0; i < am; i++) { 519bd958071SHong Zhang anzi = ai[i + 1] - ai[i]; 520bd958071SHong Zhang aj = a->j + ai[i]; 521bd958071SHong Zhang for (j = 0; j < anzi; j++) { 522bd958071SHong Zhang brow = aj[j]; 523bd958071SHong Zhang bnzj = bi[brow + 1] - bi[brow]; 524bd958071SHong Zhang bj = b->j + bi[brow]; 525bd958071SHong Zhang /* add non-zero cols of B into the sorted linked list lnk */ 5269566063dSJacob Faibussowitsch PetscCall(PetscLLCondensedAddSorted_Scalable(bnzj, bj, lnk)); 527bd958071SHong Zhang } 5288c78258cSHong Zhang /* add possible missing diagonal entry */ 529*48a46eb9SPierre Jolivet if (C->force_diagonals) PetscCall(PetscLLCondensedAddSorted_Scalable(1, &i, lnk)); 5308c78258cSHong Zhang 531bd958071SHong Zhang cnzi = lnk[0]; 532bd958071SHong Zhang 533bd958071SHong Zhang /* If free space is not available, make more free space */ 534bd958071SHong Zhang /* Double the amount of total space in the list */ 535bd958071SHong Zhang if (current_space->local_remaining < cnzi) { 5369566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), ¤t_space)); 537bd958071SHong Zhang ndouble++; 538bd958071SHong Zhang } 539bd958071SHong Zhang 540bd958071SHong Zhang /* Copy data into free space, then initialize lnk */ 5419566063dSJacob Faibussowitsch PetscCall(PetscLLCondensedClean_Scalable(cnzi, current_space->array, lnk)); 5422205254eSKarl Rupp 543bd958071SHong Zhang current_space->array += cnzi; 544bd958071SHong Zhang current_space->local_used += cnzi; 545bd958071SHong Zhang current_space->local_remaining -= cnzi; 5462205254eSKarl Rupp 547bd958071SHong Zhang ci[i + 1] = ci[i] + cnzi; 548bd958071SHong Zhang } 549bd958071SHong Zhang 550bd958071SHong Zhang /* Column indices are in the list of free space */ 551bd958071SHong Zhang /* Allocate space for cj, initialize cj, and */ 552bd958071SHong Zhang /* destroy list of free space and other temporary array(s) */ 5539566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ci[am] + 1, &cj)); 5549566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, cj)); 5559566063dSJacob Faibussowitsch PetscCall(PetscLLCondensedDestroy_Scalable(lnk)); 556e9e4536cSHong Zhang 557e9e4536cSHong Zhang /* Allocate space for ca */ 558bd958071SHong Zhang /*-----------------------*/ 5599566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(ci[am] + 1, &ca)); 560e9e4536cSHong Zhang 561e9e4536cSHong Zhang /* put together the new symbolic matrix */ 5629566063dSJacob Faibussowitsch PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), am, bn, ci, cj, ca, ((PetscObject)A)->type_name, C)); 5639566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(C, A, B)); 564e9e4536cSHong Zhang 565e9e4536cSHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 566e9e4536cSHong Zhang /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 5674222ddf1SHong Zhang c = (Mat_SeqAIJ *)(C->data); 568e9e4536cSHong Zhang c->free_a = PETSC_TRUE; 569e9e4536cSHong Zhang c->free_ij = PETSC_TRUE; 570e9e4536cSHong Zhang c->nonew = 0; 5712205254eSKarl Rupp 5724222ddf1SHong Zhang /* slower, less memory */ 5734222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; 574e9e4536cSHong Zhang 575e9e4536cSHong Zhang /* set MatInfo */ 576e9e4536cSHong Zhang afill = (PetscReal)ci[am] / (ai[am] + bi[bm]) + 1.e-5; 577e9e4536cSHong Zhang if (afill < 1.0) afill = 1.0; 5784222ddf1SHong Zhang C->info.mallocs = ndouble; 5794222ddf1SHong Zhang C->info.fill_ratio_given = fill; 5804222ddf1SHong Zhang C->info.fill_ratio_needed = afill; 581e9e4536cSHong Zhang 582e9e4536cSHong Zhang #if defined(PETSC_USE_INFO) 583e9e4536cSHong Zhang if (ci[am]) { 5849566063dSJacob Faibussowitsch PetscCall(PetscInfo(C, "Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n", ndouble, (double)fill, (double)afill)); 5859566063dSJacob Faibussowitsch PetscCall(PetscInfo(C, "Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n", (double)afill)); 586e9e4536cSHong Zhang } else { 5879566063dSJacob Faibussowitsch PetscCall(PetscInfo(C, "Empty matrix product\n")); 588e9e4536cSHong Zhang } 589e9e4536cSHong Zhang #endif 590e9e4536cSHong Zhang PetscFunctionReturn(0); 591e9e4536cSHong Zhang } 592e9e4536cSHong Zhang 5939371c9d4SSatish Balay PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(Mat A, Mat B, PetscReal fill, Mat C) { 5940ced3a2bSJed Brown Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)B->data, *c; 5950ced3a2bSJed Brown const PetscInt *ai = a->i, *bi = b->i, *aj = a->j, *bj = b->j; 5960ced3a2bSJed Brown PetscInt *ci, *cj, *bb; 5970ced3a2bSJed Brown PetscInt am = A->rmap->N, bn = B->cmap->N, bm = B->rmap->N; 5980ced3a2bSJed Brown PetscReal afill; 5990ced3a2bSJed Brown PetscInt i, j, col, ndouble = 0; 6000298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 6010ced3a2bSJed Brown PetscHeap h; 6020ced3a2bSJed Brown 6030ced3a2bSJed Brown PetscFunctionBegin; 604cd093f1dSJed Brown /* Get ci and cj - by merging sorted rows using a heap */ 6050ced3a2bSJed Brown /*---------------------------------------------------------------------------------------------*/ 6060ced3a2bSJed Brown /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 6079566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 2, &ci)); 6080ced3a2bSJed Brown ci[0] = 0; 6090ced3a2bSJed Brown 6100ced3a2bSJed Brown /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 6119566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, PetscIntSumTruncate(ai[am], bi[bm])), &free_space)); 6120ced3a2bSJed Brown current_space = free_space; 6130ced3a2bSJed Brown 6149566063dSJacob Faibussowitsch PetscCall(PetscHeapCreate(a->rmax, &h)); 6159566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(a->rmax, &bb)); 6160ced3a2bSJed Brown 6170ced3a2bSJed Brown /* Determine ci and cj */ 6180ced3a2bSJed Brown for (i = 0; i < am; i++) { 6190ced3a2bSJed Brown const PetscInt anzi = ai[i + 1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 6200ced3a2bSJed Brown const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 6210ced3a2bSJed Brown ci[i + 1] = ci[i]; 6220ced3a2bSJed Brown /* Populate the min heap */ 6230ced3a2bSJed Brown for (j = 0; j < anzi; j++) { 6240ced3a2bSJed Brown bb[j] = bi[acol[j]]; /* bb points at the start of the row */ 6250ced3a2bSJed Brown if (bb[j] < bi[acol[j] + 1]) { /* Add if row is nonempty */ 6269566063dSJacob Faibussowitsch PetscCall(PetscHeapAdd(h, j, bj[bb[j]++])); 6270ced3a2bSJed Brown } 6280ced3a2bSJed Brown } 6290ced3a2bSJed Brown /* Pick off the min element, adding it to free space */ 6309566063dSJacob Faibussowitsch PetscCall(PetscHeapPop(h, &j, &col)); 6310ced3a2bSJed Brown while (j >= 0) { 6320ced3a2bSJed Brown if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */ 6339566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscMin(PetscIntMultTruncate(2, current_space->total_array_size), 16 << 20), ¤t_space)); 6340ced3a2bSJed Brown ndouble++; 6350ced3a2bSJed Brown } 6360ced3a2bSJed Brown *(current_space->array++) = col; 6370ced3a2bSJed Brown current_space->local_used++; 6380ced3a2bSJed Brown current_space->local_remaining--; 6390ced3a2bSJed Brown ci[i + 1]++; 6400ced3a2bSJed Brown 6410ced3a2bSJed Brown /* stash if anything else remains in this row of B */ 6429566063dSJacob Faibussowitsch if (bb[j] < bi[acol[j] + 1]) PetscCall(PetscHeapStash(h, j, bj[bb[j]++])); 6430ced3a2bSJed Brown while (1) { /* pop and stash any other rows of B that also had an entry in this column */ 6440ced3a2bSJed Brown PetscInt j2, col2; 6459566063dSJacob Faibussowitsch PetscCall(PetscHeapPeek(h, &j2, &col2)); 6460ced3a2bSJed Brown if (col2 != col) break; 6479566063dSJacob Faibussowitsch PetscCall(PetscHeapPop(h, &j2, &col2)); 6489566063dSJacob Faibussowitsch if (bb[j2] < bi[acol[j2] + 1]) PetscCall(PetscHeapStash(h, j2, bj[bb[j2]++])); 6490ced3a2bSJed Brown } 6500ced3a2bSJed Brown /* Put any stashed elements back into the min heap */ 6519566063dSJacob Faibussowitsch PetscCall(PetscHeapUnstash(h)); 6529566063dSJacob Faibussowitsch PetscCall(PetscHeapPop(h, &j, &col)); 6530ced3a2bSJed Brown } 6540ced3a2bSJed Brown } 6559566063dSJacob Faibussowitsch PetscCall(PetscFree(bb)); 6569566063dSJacob Faibussowitsch PetscCall(PetscHeapDestroy(&h)); 6570ced3a2bSJed Brown 6580ced3a2bSJed Brown /* Column indices are in the list of free space */ 6590ced3a2bSJed Brown /* Allocate space for cj, initialize cj, and */ 6600ced3a2bSJed Brown /* destroy list of free space and other temporary array(s) */ 6619566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ci[am], &cj)); 6629566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, cj)); 6630ced3a2bSJed Brown 6640ced3a2bSJed Brown /* put together the new symbolic matrix */ 6659566063dSJacob Faibussowitsch PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), am, bn, ci, cj, NULL, ((PetscObject)A)->type_name, C)); 6669566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6670ced3a2bSJed Brown 6680ced3a2bSJed Brown /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6690ced3a2bSJed Brown /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 6704222ddf1SHong Zhang c = (Mat_SeqAIJ *)(C->data); 6710ced3a2bSJed Brown c->free_a = PETSC_TRUE; 6720ced3a2bSJed Brown c->free_ij = PETSC_TRUE; 6730ced3a2bSJed Brown c->nonew = 0; 67426fbe8dcSKarl Rupp 6754222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 6760ced3a2bSJed Brown 6770ced3a2bSJed Brown /* set MatInfo */ 6780ced3a2bSJed Brown afill = (PetscReal)ci[am] / (ai[am] + bi[bm]) + 1.e-5; 6790ced3a2bSJed Brown if (afill < 1.0) afill = 1.0; 6804222ddf1SHong Zhang C->info.mallocs = ndouble; 6814222ddf1SHong Zhang C->info.fill_ratio_given = fill; 6824222ddf1SHong Zhang C->info.fill_ratio_needed = afill; 6830ced3a2bSJed Brown 6840ced3a2bSJed Brown #if defined(PETSC_USE_INFO) 6850ced3a2bSJed Brown if (ci[am]) { 6869566063dSJacob Faibussowitsch PetscCall(PetscInfo(C, "Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n", ndouble, (double)fill, (double)afill)); 6879566063dSJacob Faibussowitsch PetscCall(PetscInfo(C, "Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n", (double)afill)); 6880ced3a2bSJed Brown } else { 6899566063dSJacob Faibussowitsch PetscCall(PetscInfo(C, "Empty matrix product\n")); 6900ced3a2bSJed Brown } 6910ced3a2bSJed Brown #endif 6920ced3a2bSJed Brown PetscFunctionReturn(0); 6930ced3a2bSJed Brown } 694e9e4536cSHong Zhang 6959371c9d4SSatish Balay PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(Mat A, Mat B, PetscReal fill, Mat C) { 6968a07c6f1SJed Brown Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)B->data, *c; 6978a07c6f1SJed Brown const PetscInt *ai = a->i, *bi = b->i, *aj = a->j, *bj = b->j; 6988a07c6f1SJed Brown PetscInt *ci, *cj, *bb; 6998a07c6f1SJed Brown PetscInt am = A->rmap->N, bn = B->cmap->N, bm = B->rmap->N; 7008a07c6f1SJed Brown PetscReal afill; 7018a07c6f1SJed Brown PetscInt i, j, col, ndouble = 0; 7020298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 7038a07c6f1SJed Brown PetscHeap h; 7048a07c6f1SJed Brown PetscBT bt; 7058a07c6f1SJed Brown 7068a07c6f1SJed Brown PetscFunctionBegin; 707cd093f1dSJed Brown /* Get ci and cj - using a heap for the sorted rows, but use BT so that each index is only added once */ 7088a07c6f1SJed Brown /*---------------------------------------------------------------------------------------------*/ 7098a07c6f1SJed Brown /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 7109566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 2, &ci)); 7118a07c6f1SJed Brown ci[0] = 0; 7128a07c6f1SJed Brown 7138a07c6f1SJed Brown /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 7149566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, PetscIntSumTruncate(ai[am], bi[bm])), &free_space)); 7152205254eSKarl Rupp 7168a07c6f1SJed Brown current_space = free_space; 7178a07c6f1SJed Brown 7189566063dSJacob Faibussowitsch PetscCall(PetscHeapCreate(a->rmax, &h)); 7199566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(a->rmax, &bb)); 7209566063dSJacob Faibussowitsch PetscCall(PetscBTCreate(bn, &bt)); 7218a07c6f1SJed Brown 7228a07c6f1SJed Brown /* Determine ci and cj */ 7238a07c6f1SJed Brown for (i = 0; i < am; i++) { 7248a07c6f1SJed Brown const PetscInt anzi = ai[i + 1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 7258a07c6f1SJed Brown const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 7268a07c6f1SJed Brown const PetscInt *fptr = current_space->array; /* Save beginning of the row so we can clear the BT later */ 7278a07c6f1SJed Brown ci[i + 1] = ci[i]; 7288a07c6f1SJed Brown /* Populate the min heap */ 7298a07c6f1SJed Brown for (j = 0; j < anzi; j++) { 7308a07c6f1SJed Brown PetscInt brow = acol[j]; 7318a07c6f1SJed Brown for (bb[j] = bi[brow]; bb[j] < bi[brow + 1]; bb[j]++) { 7328a07c6f1SJed Brown PetscInt bcol = bj[bb[j]]; 7338a07c6f1SJed Brown if (!PetscBTLookupSet(bt, bcol)) { /* new entry */ 7349566063dSJacob Faibussowitsch PetscCall(PetscHeapAdd(h, j, bcol)); 7358a07c6f1SJed Brown bb[j]++; 7368a07c6f1SJed Brown break; 7378a07c6f1SJed Brown } 7388a07c6f1SJed Brown } 7398a07c6f1SJed Brown } 7408a07c6f1SJed Brown /* Pick off the min element, adding it to free space */ 7419566063dSJacob Faibussowitsch PetscCall(PetscHeapPop(h, &j, &col)); 7428a07c6f1SJed Brown while (j >= 0) { 7438a07c6f1SJed Brown if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */ 7440298fd71SBarry Smith fptr = NULL; /* need PetscBTMemzero */ 7459566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscMin(PetscIntMultTruncate(2, current_space->total_array_size), 16 << 20), ¤t_space)); 7468a07c6f1SJed Brown ndouble++; 7478a07c6f1SJed Brown } 7488a07c6f1SJed Brown *(current_space->array++) = col; 7498a07c6f1SJed Brown current_space->local_used++; 7508a07c6f1SJed Brown current_space->local_remaining--; 7518a07c6f1SJed Brown ci[i + 1]++; 7528a07c6f1SJed Brown 7538a07c6f1SJed Brown /* stash if anything else remains in this row of B */ 7548a07c6f1SJed Brown for (; bb[j] < bi[acol[j] + 1]; bb[j]++) { 7558a07c6f1SJed Brown PetscInt bcol = bj[bb[j]]; 7568a07c6f1SJed Brown if (!PetscBTLookupSet(bt, bcol)) { /* new entry */ 7579566063dSJacob Faibussowitsch PetscCall(PetscHeapAdd(h, j, bcol)); 7588a07c6f1SJed Brown bb[j]++; 7598a07c6f1SJed Brown break; 7608a07c6f1SJed Brown } 7618a07c6f1SJed Brown } 7629566063dSJacob Faibussowitsch PetscCall(PetscHeapPop(h, &j, &col)); 7638a07c6f1SJed Brown } 7648a07c6f1SJed Brown if (fptr) { /* Clear the bits for this row */ 7659566063dSJacob Faibussowitsch for (; fptr < current_space->array; fptr++) PetscCall(PetscBTClear(bt, *fptr)); 7668a07c6f1SJed Brown } else { /* We reallocated so we don't remember (easily) how to clear only the bits we changed */ 7679566063dSJacob Faibussowitsch PetscCall(PetscBTMemzero(bn, bt)); 7688a07c6f1SJed Brown } 7698a07c6f1SJed Brown } 7709566063dSJacob Faibussowitsch PetscCall(PetscFree(bb)); 7719566063dSJacob Faibussowitsch PetscCall(PetscHeapDestroy(&h)); 7729566063dSJacob Faibussowitsch PetscCall(PetscBTDestroy(&bt)); 7738a07c6f1SJed Brown 7748a07c6f1SJed Brown /* Column indices are in the list of free space */ 7758a07c6f1SJed Brown /* Allocate space for cj, initialize cj, and */ 7768a07c6f1SJed Brown /* destroy list of free space and other temporary array(s) */ 7779566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ci[am], &cj)); 7789566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, cj)); 7798a07c6f1SJed Brown 7808a07c6f1SJed Brown /* put together the new symbolic matrix */ 7819566063dSJacob Faibussowitsch PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), am, bn, ci, cj, NULL, ((PetscObject)A)->type_name, C)); 7829566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(C, A, B)); 7838a07c6f1SJed Brown 7848a07c6f1SJed Brown /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 7858a07c6f1SJed Brown /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 7864222ddf1SHong Zhang c = (Mat_SeqAIJ *)(C->data); 7878a07c6f1SJed Brown c->free_a = PETSC_TRUE; 7888a07c6f1SJed Brown c->free_ij = PETSC_TRUE; 7898a07c6f1SJed Brown c->nonew = 0; 79026fbe8dcSKarl Rupp 7914222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 7928a07c6f1SJed Brown 7938a07c6f1SJed Brown /* set MatInfo */ 7948a07c6f1SJed Brown afill = (PetscReal)ci[am] / (ai[am] + bi[bm]) + 1.e-5; 7958a07c6f1SJed Brown if (afill < 1.0) afill = 1.0; 7964222ddf1SHong Zhang C->info.mallocs = ndouble; 7974222ddf1SHong Zhang C->info.fill_ratio_given = fill; 7984222ddf1SHong Zhang C->info.fill_ratio_needed = afill; 7998a07c6f1SJed Brown 8008a07c6f1SJed Brown #if defined(PETSC_USE_INFO) 8018a07c6f1SJed Brown if (ci[am]) { 8029566063dSJacob Faibussowitsch PetscCall(PetscInfo(C, "Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n", ndouble, (double)fill, (double)afill)); 8039566063dSJacob Faibussowitsch PetscCall(PetscInfo(C, "Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n", (double)afill)); 8048a07c6f1SJed Brown } else { 8059566063dSJacob Faibussowitsch PetscCall(PetscInfo(C, "Empty matrix product\n")); 8068a07c6f1SJed Brown } 8078a07c6f1SJed Brown #endif 8088a07c6f1SJed Brown PetscFunctionReturn(0); 8098a07c6f1SJed Brown } 8108a07c6f1SJed Brown 8119371c9d4SSatish Balay PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_RowMerge(Mat A, Mat B, PetscReal fill, Mat C) { 812d7ed1a4dSandi selinger Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)B->data, *c; 813d7ed1a4dSandi selinger const PetscInt *ai = a->i, *bi = b->i, *aj = a->j, *bj = b->j, *inputi, *inputj, *inputcol, *inputcol_L1; 814d7ed1a4dSandi selinger PetscInt *ci, *cj, *outputj, worki_L1[9], worki_L2[9]; 815d7ed1a4dSandi selinger PetscInt c_maxmem, a_maxrownnz = 0, a_rownnz; 816d7ed1a4dSandi selinger const PetscInt workcol[8] = {0, 1, 2, 3, 4, 5, 6, 7}; 817d7ed1a4dSandi selinger const PetscInt am = A->rmap->N, bn = B->cmap->N, bm = B->rmap->N; 818d7ed1a4dSandi selinger const PetscInt *brow_ptr[8], *brow_end[8]; 819d7ed1a4dSandi selinger PetscInt window[8]; 820d7ed1a4dSandi selinger PetscInt window_min, old_window_min, ci_nnz, outputi_nnz = 0, L1_nrows, L2_nrows; 821d7ed1a4dSandi selinger PetscInt i, k, ndouble = 0, L1_rowsleft, rowsleft; 822d7ed1a4dSandi selinger PetscReal afill; 823f83700f2Sandi selinger PetscInt *workj_L1, *workj_L2, *workj_L3; 8247660c4dbSandi selinger PetscInt L1_nnz, L2_nnz; 825d7ed1a4dSandi selinger 826d7ed1a4dSandi selinger /* Step 1: Get upper bound on memory required for allocation. 827d7ed1a4dSandi selinger Because of the way virtual memory works, 828d7ed1a4dSandi selinger only the memory pages that are actually needed will be physically allocated. */ 829d7ed1a4dSandi selinger PetscFunctionBegin; 8309566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &ci)); 831d7ed1a4dSandi selinger for (i = 0; i < am; i++) { 832d7ed1a4dSandi selinger const PetscInt anzi = ai[i + 1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 833d7ed1a4dSandi selinger const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 834d7ed1a4dSandi selinger a_rownnz = 0; 835d7ed1a4dSandi selinger for (k = 0; k < anzi; ++k) { 836d7ed1a4dSandi selinger a_rownnz += bi[acol[k] + 1] - bi[acol[k]]; 837d7ed1a4dSandi selinger if (a_rownnz > bn) { 838d7ed1a4dSandi selinger a_rownnz = bn; 839d7ed1a4dSandi selinger break; 840d7ed1a4dSandi selinger } 841d7ed1a4dSandi selinger } 842d7ed1a4dSandi selinger a_maxrownnz = PetscMax(a_maxrownnz, a_rownnz); 843d7ed1a4dSandi selinger } 844d7ed1a4dSandi selinger /* temporary work areas for merging rows */ 8459566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(a_maxrownnz * 8, &workj_L1)); 8469566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(a_maxrownnz * 8, &workj_L2)); 8479566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(a_maxrownnz, &workj_L3)); 848d7ed1a4dSandi selinger 8497660c4dbSandi selinger /* This should be enough for almost all matrices. If not, memory is reallocated later. */ 8507660c4dbSandi selinger c_maxmem = 8 * (ai[am] + bi[bm]); 851d7ed1a4dSandi selinger /* Step 2: Populate pattern for C */ 8529566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(c_maxmem, &cj)); 853d7ed1a4dSandi selinger 854d7ed1a4dSandi selinger ci_nnz = 0; 855d7ed1a4dSandi selinger ci[0] = 0; 856d7ed1a4dSandi selinger worki_L1[0] = 0; 857d7ed1a4dSandi selinger worki_L2[0] = 0; 858d7ed1a4dSandi selinger for (i = 0; i < am; i++) { 859d7ed1a4dSandi selinger const PetscInt anzi = ai[i + 1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 860d7ed1a4dSandi selinger const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 861d7ed1a4dSandi selinger rowsleft = anzi; 862d7ed1a4dSandi selinger inputcol_L1 = acol; 863d7ed1a4dSandi selinger L2_nnz = 0; 8647660c4dbSandi selinger L2_nrows = 1; /* Number of rows to be merged on Level 3. output of L3 already exists -> initial value 1 */ 8657660c4dbSandi selinger worki_L2[1] = 0; 86608fe1b3cSKarl Rupp outputi_nnz = 0; 867d7ed1a4dSandi selinger 868d7ed1a4dSandi selinger /* If the number of indices in C so far + the max number of columns in the next row > c_maxmem -> allocate more memory */ 869d7ed1a4dSandi selinger while (ci_nnz + a_maxrownnz > c_maxmem) { 870d7ed1a4dSandi selinger c_maxmem *= 2; 871d7ed1a4dSandi selinger ndouble++; 8729566063dSJacob Faibussowitsch PetscCall(PetscRealloc(sizeof(PetscInt) * c_maxmem, &cj)); 873d7ed1a4dSandi selinger } 874d7ed1a4dSandi selinger 875d7ed1a4dSandi selinger while (rowsleft) { 876d7ed1a4dSandi selinger L1_rowsleft = PetscMin(64, rowsleft); /* In the inner loop max 64 rows of B can be merged */ 877d7ed1a4dSandi selinger L1_nrows = 0; 8787660c4dbSandi selinger L1_nnz = 0; 879d7ed1a4dSandi selinger inputcol = inputcol_L1; 880d7ed1a4dSandi selinger inputi = bi; 881d7ed1a4dSandi selinger inputj = bj; 882d7ed1a4dSandi selinger 883d7ed1a4dSandi selinger /* The following macro is used to specialize for small rows in A. 884d7ed1a4dSandi selinger This helps with compiler unrolling, improving performance substantially. 885f83700f2Sandi selinger Input: inputj inputi inputcol bn 886d7ed1a4dSandi selinger Output: outputj outputi_nnz */ 887d7ed1a4dSandi selinger #define MatMatMultSymbolic_RowMergeMacro(ANNZ) \ 888d7ed1a4dSandi selinger window_min = bn; \ 8897660c4dbSandi selinger outputi_nnz = 0; \ 8907660c4dbSandi selinger for (k = 0; k < ANNZ; ++k) { \ 891d7ed1a4dSandi selinger brow_ptr[k] = inputj + inputi[inputcol[k]]; \ 892d7ed1a4dSandi selinger brow_end[k] = inputj + inputi[inputcol[k] + 1]; \ 893d7ed1a4dSandi selinger window[k] = (brow_ptr[k] != brow_end[k]) ? *brow_ptr[k] : bn; \ 894d7ed1a4dSandi selinger window_min = PetscMin(window[k], window_min); \ 895d7ed1a4dSandi selinger } \ 896d7ed1a4dSandi selinger while (window_min < bn) { \ 897d7ed1a4dSandi selinger outputj[outputi_nnz++] = window_min; \ 898d7ed1a4dSandi selinger /* advance front and compute new minimum */ \ 899d7ed1a4dSandi selinger old_window_min = window_min; \ 900d7ed1a4dSandi selinger window_min = bn; \ 901d7ed1a4dSandi selinger for (k = 0; k < ANNZ; ++k) { \ 902d7ed1a4dSandi selinger if (window[k] == old_window_min) { \ 903d7ed1a4dSandi selinger brow_ptr[k]++; \ 904d7ed1a4dSandi selinger window[k] = (brow_ptr[k] != brow_end[k]) ? *brow_ptr[k] : bn; \ 905d7ed1a4dSandi selinger } \ 906d7ed1a4dSandi selinger window_min = PetscMin(window[k], window_min); \ 907d7ed1a4dSandi selinger } \ 908d7ed1a4dSandi selinger } 909d7ed1a4dSandi selinger 910d7ed1a4dSandi selinger /************** L E V E L 1 ***************/ 911d7ed1a4dSandi selinger /* Merge up to 8 rows of B to L1 work array*/ 912d7ed1a4dSandi selinger while (L1_rowsleft) { 9137660c4dbSandi selinger outputi_nnz = 0; 9147660c4dbSandi selinger if (anzi > 8) outputj = workj_L1 + L1_nnz; /* Level 1 rowmerge*/ 9157660c4dbSandi selinger else outputj = cj + ci_nnz; /* Merge directly to C */ 9167660c4dbSandi selinger 917d7ed1a4dSandi selinger switch (L1_rowsleft) { 9189371c9d4SSatish Balay case 1: 9199371c9d4SSatish Balay brow_ptr[0] = inputj + inputi[inputcol[0]]; 920d7ed1a4dSandi selinger brow_end[0] = inputj + inputi[inputcol[0] + 1]; 921d7ed1a4dSandi selinger for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */ 922d7ed1a4dSandi selinger inputcol += L1_rowsleft; 923d7ed1a4dSandi selinger rowsleft -= L1_rowsleft; 924d7ed1a4dSandi selinger L1_rowsleft = 0; 925d7ed1a4dSandi selinger break; 9269371c9d4SSatish Balay case 2: 9279371c9d4SSatish Balay MatMatMultSymbolic_RowMergeMacro(2); 928d7ed1a4dSandi selinger inputcol += L1_rowsleft; 929d7ed1a4dSandi selinger rowsleft -= L1_rowsleft; 930d7ed1a4dSandi selinger L1_rowsleft = 0; 931d7ed1a4dSandi selinger break; 9329371c9d4SSatish Balay case 3: 9339371c9d4SSatish Balay MatMatMultSymbolic_RowMergeMacro(3); 934d7ed1a4dSandi selinger inputcol += L1_rowsleft; 935d7ed1a4dSandi selinger rowsleft -= L1_rowsleft; 936d7ed1a4dSandi selinger L1_rowsleft = 0; 937d7ed1a4dSandi selinger break; 9389371c9d4SSatish Balay case 4: 9399371c9d4SSatish Balay MatMatMultSymbolic_RowMergeMacro(4); 940d7ed1a4dSandi selinger inputcol += L1_rowsleft; 941d7ed1a4dSandi selinger rowsleft -= L1_rowsleft; 942d7ed1a4dSandi selinger L1_rowsleft = 0; 943d7ed1a4dSandi selinger break; 9449371c9d4SSatish Balay case 5: 9459371c9d4SSatish Balay MatMatMultSymbolic_RowMergeMacro(5); 946d7ed1a4dSandi selinger inputcol += L1_rowsleft; 947d7ed1a4dSandi selinger rowsleft -= L1_rowsleft; 948d7ed1a4dSandi selinger L1_rowsleft = 0; 949d7ed1a4dSandi selinger break; 9509371c9d4SSatish Balay case 6: 9519371c9d4SSatish Balay MatMatMultSymbolic_RowMergeMacro(6); 952d7ed1a4dSandi selinger inputcol += L1_rowsleft; 953d7ed1a4dSandi selinger rowsleft -= L1_rowsleft; 954d7ed1a4dSandi selinger L1_rowsleft = 0; 955d7ed1a4dSandi selinger break; 9569371c9d4SSatish Balay case 7: 9579371c9d4SSatish Balay MatMatMultSymbolic_RowMergeMacro(7); 958d7ed1a4dSandi selinger inputcol += L1_rowsleft; 959d7ed1a4dSandi selinger rowsleft -= L1_rowsleft; 960d7ed1a4dSandi selinger L1_rowsleft = 0; 961d7ed1a4dSandi selinger break; 9629371c9d4SSatish Balay default: 9639371c9d4SSatish Balay MatMatMultSymbolic_RowMergeMacro(8); 964d7ed1a4dSandi selinger inputcol += 8; 965d7ed1a4dSandi selinger rowsleft -= 8; 966d7ed1a4dSandi selinger L1_rowsleft -= 8; 967d7ed1a4dSandi selinger break; 968d7ed1a4dSandi selinger } 969d7ed1a4dSandi selinger inputcol_L1 = inputcol; 9707660c4dbSandi selinger L1_nnz += outputi_nnz; 9717660c4dbSandi selinger worki_L1[++L1_nrows] = L1_nnz; 972d7ed1a4dSandi selinger } 973d7ed1a4dSandi selinger 974d7ed1a4dSandi selinger /********************** L E V E L 2 ************************/ 975d7ed1a4dSandi selinger /* Merge from L1 work array to either C or to L2 work array */ 976d7ed1a4dSandi selinger if (anzi > 8) { 977d7ed1a4dSandi selinger inputi = worki_L1; 978d7ed1a4dSandi selinger inputj = workj_L1; 979d7ed1a4dSandi selinger inputcol = workcol; 980d7ed1a4dSandi selinger outputi_nnz = 0; 981d7ed1a4dSandi selinger 982d7ed1a4dSandi selinger if (anzi <= 64) outputj = cj + ci_nnz; /* Merge from L1 work array to C */ 983d7ed1a4dSandi selinger else outputj = workj_L2 + L2_nnz; /* Merge from L1 work array to L2 work array */ 984d7ed1a4dSandi selinger 985d7ed1a4dSandi selinger switch (L1_nrows) { 9869371c9d4SSatish Balay case 1: 9879371c9d4SSatish Balay brow_ptr[0] = inputj + inputi[inputcol[0]]; 988d7ed1a4dSandi selinger brow_end[0] = inputj + inputi[inputcol[0] + 1]; 989d7ed1a4dSandi selinger for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */ 990d7ed1a4dSandi selinger break; 991d7ed1a4dSandi selinger case 2: MatMatMultSymbolic_RowMergeMacro(2); break; 992d7ed1a4dSandi selinger case 3: MatMatMultSymbolic_RowMergeMacro(3); break; 993d7ed1a4dSandi selinger case 4: MatMatMultSymbolic_RowMergeMacro(4); break; 994d7ed1a4dSandi selinger case 5: MatMatMultSymbolic_RowMergeMacro(5); break; 995d7ed1a4dSandi selinger case 6: MatMatMultSymbolic_RowMergeMacro(6); break; 996d7ed1a4dSandi selinger case 7: MatMatMultSymbolic_RowMergeMacro(7); break; 997d7ed1a4dSandi selinger case 8: MatMatMultSymbolic_RowMergeMacro(8); break; 998d7ed1a4dSandi selinger default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatMatMult logic error: Not merging 1-8 rows from L1 work array!"); 999d7ed1a4dSandi selinger } 1000d7ed1a4dSandi selinger L2_nnz += outputi_nnz; 1001d7ed1a4dSandi selinger worki_L2[++L2_nrows] = L2_nnz; 1002d7ed1a4dSandi selinger 10037660c4dbSandi selinger /************************ L E V E L 3 **********************/ 10047660c4dbSandi selinger /* Merge from L2 work array to either C or to L2 work array */ 1005d7ed1a4dSandi selinger if (anzi > 64 && (L2_nrows == 8 || rowsleft == 0)) { 1006d7ed1a4dSandi selinger inputi = worki_L2; 1007d7ed1a4dSandi selinger inputj = workj_L2; 1008d7ed1a4dSandi selinger inputcol = workcol; 1009d7ed1a4dSandi selinger outputi_nnz = 0; 1010f83700f2Sandi selinger if (rowsleft) outputj = workj_L3; 1011d7ed1a4dSandi selinger else outputj = cj + ci_nnz; 1012d7ed1a4dSandi selinger switch (L2_nrows) { 10139371c9d4SSatish Balay case 1: 10149371c9d4SSatish Balay brow_ptr[0] = inputj + inputi[inputcol[0]]; 1015d7ed1a4dSandi selinger brow_end[0] = inputj + inputi[inputcol[0] + 1]; 1016d7ed1a4dSandi selinger for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */ 1017d7ed1a4dSandi selinger break; 1018d7ed1a4dSandi selinger case 2: MatMatMultSymbolic_RowMergeMacro(2); break; 1019d7ed1a4dSandi selinger case 3: MatMatMultSymbolic_RowMergeMacro(3); break; 1020d7ed1a4dSandi selinger case 4: MatMatMultSymbolic_RowMergeMacro(4); break; 1021d7ed1a4dSandi selinger case 5: MatMatMultSymbolic_RowMergeMacro(5); break; 1022d7ed1a4dSandi selinger case 6: MatMatMultSymbolic_RowMergeMacro(6); break; 1023d7ed1a4dSandi selinger case 7: MatMatMultSymbolic_RowMergeMacro(7); break; 1024d7ed1a4dSandi selinger case 8: MatMatMultSymbolic_RowMergeMacro(8); break; 1025d7ed1a4dSandi selinger default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatMatMult logic error: Not merging 1-8 rows from L2 work array!"); 1026d7ed1a4dSandi selinger } 1027d7ed1a4dSandi selinger L2_nrows = 1; 10287660c4dbSandi selinger L2_nnz = outputi_nnz; 10297660c4dbSandi selinger worki_L2[1] = outputi_nnz; 10307660c4dbSandi selinger /* Copy to workj_L2 */ 1031d7ed1a4dSandi selinger if (rowsleft) { 10327660c4dbSandi selinger for (k = 0; k < outputi_nnz; ++k) workj_L2[k] = outputj[k]; 1033d7ed1a4dSandi selinger } 1034d7ed1a4dSandi selinger } 1035d7ed1a4dSandi selinger } 1036d7ed1a4dSandi selinger } /* while (rowsleft) */ 1037d7ed1a4dSandi selinger #undef MatMatMultSymbolic_RowMergeMacro 1038d7ed1a4dSandi selinger 1039d7ed1a4dSandi selinger /* terminate current row */ 1040d7ed1a4dSandi selinger ci_nnz += outputi_nnz; 1041d7ed1a4dSandi selinger ci[i + 1] = ci_nnz; 1042d7ed1a4dSandi selinger } 1043d7ed1a4dSandi selinger 1044d7ed1a4dSandi selinger /* Step 3: Create the new symbolic matrix */ 10459566063dSJacob Faibussowitsch PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), am, bn, ci, cj, NULL, ((PetscObject)A)->type_name, C)); 10469566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(C, A, B)); 1047d7ed1a4dSandi selinger 1048d7ed1a4dSandi selinger /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1049d7ed1a4dSandi selinger /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 10504222ddf1SHong Zhang c = (Mat_SeqAIJ *)(C->data); 1051d7ed1a4dSandi selinger c->free_a = PETSC_TRUE; 1052d7ed1a4dSandi selinger c->free_ij = PETSC_TRUE; 1053d7ed1a4dSandi selinger c->nonew = 0; 1054d7ed1a4dSandi selinger 10554222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 1056d7ed1a4dSandi selinger 1057d7ed1a4dSandi selinger /* set MatInfo */ 1058d7ed1a4dSandi selinger afill = (PetscReal)ci[am] / (ai[am] + bi[bm]) + 1.e-5; 1059d7ed1a4dSandi selinger if (afill < 1.0) afill = 1.0; 10604222ddf1SHong Zhang C->info.mallocs = ndouble; 10614222ddf1SHong Zhang C->info.fill_ratio_given = fill; 10624222ddf1SHong Zhang C->info.fill_ratio_needed = afill; 1063d7ed1a4dSandi selinger 1064d7ed1a4dSandi selinger #if defined(PETSC_USE_INFO) 1065d7ed1a4dSandi selinger if (ci[am]) { 10669566063dSJacob Faibussowitsch PetscCall(PetscInfo(C, "Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n", ndouble, (double)fill, (double)afill)); 10679566063dSJacob Faibussowitsch PetscCall(PetscInfo(C, "Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n", (double)afill)); 1068d7ed1a4dSandi selinger } else { 10699566063dSJacob Faibussowitsch PetscCall(PetscInfo(C, "Empty matrix product\n")); 1070d7ed1a4dSandi selinger } 1071d7ed1a4dSandi selinger #endif 1072d7ed1a4dSandi selinger 1073d7ed1a4dSandi selinger /* Step 4: Free temporary work areas */ 10749566063dSJacob Faibussowitsch PetscCall(PetscFree(workj_L1)); 10759566063dSJacob Faibussowitsch PetscCall(PetscFree(workj_L2)); 10769566063dSJacob Faibussowitsch PetscCall(PetscFree(workj_L3)); 1077d7ed1a4dSandi selinger PetscFunctionReturn(0); 1078d7ed1a4dSandi selinger } 1079d7ed1a4dSandi selinger 1080cd093f1dSJed Brown /* concatenate unique entries and then sort */ 10819371c9d4SSatish Balay PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Sorted(Mat A, Mat B, PetscReal fill, Mat C) { 1082cd093f1dSJed Brown Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)B->data, *c; 1083cd093f1dSJed Brown const PetscInt *ai = a->i, *bi = b->i, *aj = a->j, *bj = b->j; 10848c78258cSHong Zhang PetscInt *ci, *cj, bcol; 1085cd093f1dSJed Brown PetscInt am = A->rmap->N, bn = B->cmap->N, bm = B->rmap->N; 1086cd093f1dSJed Brown PetscReal afill; 1087cd093f1dSJed Brown PetscInt i, j, ndouble = 0; 1088cd093f1dSJed Brown PetscSegBuffer seg, segrow; 1089cd093f1dSJed Brown char *seen; 1090cd093f1dSJed Brown 1091cd093f1dSJed Brown PetscFunctionBegin; 10929566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &ci)); 1093cd093f1dSJed Brown ci[0] = 0; 1094cd093f1dSJed Brown 1095cd093f1dSJed Brown /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 10969566063dSJacob Faibussowitsch PetscCall(PetscSegBufferCreate(sizeof(PetscInt), (PetscInt)(fill * (ai[am] + bi[bm])), &seg)); 10979566063dSJacob Faibussowitsch PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 100, &segrow)); 10989566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(bn, &seen)); 1099cd093f1dSJed Brown 1100cd093f1dSJed Brown /* Determine ci and cj */ 1101cd093f1dSJed Brown for (i = 0; i < am; i++) { 1102cd093f1dSJed Brown const PetscInt anzi = ai[i + 1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 1103cd093f1dSJed Brown const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 1104cd093f1dSJed Brown PetscInt packlen = 0, *PETSC_RESTRICT crow; 11058c78258cSHong Zhang 1106cd093f1dSJed Brown /* Pack segrow */ 1107cd093f1dSJed Brown for (j = 0; j < anzi; j++) { 1108cd093f1dSJed Brown PetscInt brow = acol[j], bjstart = bi[brow], bjend = bi[brow + 1], k; 1109cd093f1dSJed Brown for (k = bjstart; k < bjend; k++) { 11108c78258cSHong Zhang bcol = bj[k]; 1111cd093f1dSJed Brown if (!seen[bcol]) { /* new entry */ 1112cd093f1dSJed Brown PetscInt *PETSC_RESTRICT slot; 11139566063dSJacob Faibussowitsch PetscCall(PetscSegBufferGetInts(segrow, 1, &slot)); 1114cd093f1dSJed Brown *slot = bcol; 1115cd093f1dSJed Brown seen[bcol] = 1; 1116cd093f1dSJed Brown packlen++; 1117cd093f1dSJed Brown } 1118cd093f1dSJed Brown } 1119cd093f1dSJed Brown } 11208c78258cSHong Zhang 11218c78258cSHong Zhang /* Check i-th diagonal entry */ 11228c78258cSHong Zhang if (C->force_diagonals && !seen[i]) { 11238c78258cSHong Zhang PetscInt *PETSC_RESTRICT slot; 11249566063dSJacob Faibussowitsch PetscCall(PetscSegBufferGetInts(segrow, 1, &slot)); 11258c78258cSHong Zhang *slot = i; 11268c78258cSHong Zhang seen[i] = 1; 11278c78258cSHong Zhang packlen++; 11288c78258cSHong Zhang } 11298c78258cSHong Zhang 11309566063dSJacob Faibussowitsch PetscCall(PetscSegBufferGetInts(seg, packlen, &crow)); 11319566063dSJacob Faibussowitsch PetscCall(PetscSegBufferExtractTo(segrow, crow)); 11329566063dSJacob Faibussowitsch PetscCall(PetscSortInt(packlen, crow)); 1133cd093f1dSJed Brown ci[i + 1] = ci[i] + packlen; 1134cd093f1dSJed Brown for (j = 0; j < packlen; j++) seen[crow[j]] = 0; 1135cd093f1dSJed Brown } 11369566063dSJacob Faibussowitsch PetscCall(PetscSegBufferDestroy(&segrow)); 11379566063dSJacob Faibussowitsch PetscCall(PetscFree(seen)); 1138cd093f1dSJed Brown 1139cd093f1dSJed Brown /* Column indices are in the segmented buffer */ 11409566063dSJacob Faibussowitsch PetscCall(PetscSegBufferExtractAlloc(seg, &cj)); 11419566063dSJacob Faibussowitsch PetscCall(PetscSegBufferDestroy(&seg)); 1142cd093f1dSJed Brown 1143cd093f1dSJed Brown /* put together the new symbolic matrix */ 11449566063dSJacob Faibussowitsch PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), am, bn, ci, cj, NULL, ((PetscObject)A)->type_name, C)); 11459566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(C, A, B)); 1146cd093f1dSJed Brown 1147cd093f1dSJed Brown /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1148cd093f1dSJed Brown /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 11494222ddf1SHong Zhang c = (Mat_SeqAIJ *)(C->data); 1150cd093f1dSJed Brown c->free_a = PETSC_TRUE; 1151cd093f1dSJed Brown c->free_ij = PETSC_TRUE; 1152cd093f1dSJed Brown c->nonew = 0; 1153cd093f1dSJed Brown 11544222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 1155cd093f1dSJed Brown 1156cd093f1dSJed Brown /* set MatInfo */ 11572a09556fSStefano Zampini afill = (PetscReal)ci[am] / PetscMax(ai[am] + bi[bm], 1) + 1.e-5; 1158cd093f1dSJed Brown if (afill < 1.0) afill = 1.0; 11594222ddf1SHong Zhang C->info.mallocs = ndouble; 11604222ddf1SHong Zhang C->info.fill_ratio_given = fill; 11614222ddf1SHong Zhang C->info.fill_ratio_needed = afill; 1162cd093f1dSJed Brown 1163cd093f1dSJed Brown #if defined(PETSC_USE_INFO) 1164cd093f1dSJed Brown if (ci[am]) { 11659566063dSJacob Faibussowitsch PetscCall(PetscInfo(C, "Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n", ndouble, (double)fill, (double)afill)); 11669566063dSJacob Faibussowitsch PetscCall(PetscInfo(C, "Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n", (double)afill)); 1167cd093f1dSJed Brown } else { 11689566063dSJacob Faibussowitsch PetscCall(PetscInfo(C, "Empty matrix product\n")); 1169cd093f1dSJed Brown } 1170cd093f1dSJed Brown #endif 1171cd093f1dSJed Brown PetscFunctionReturn(0); 1172cd093f1dSJed Brown } 1173cd093f1dSJed Brown 11749371c9d4SSatish Balay PetscErrorCode MatDestroy_SeqAIJ_MatMatMultTrans(void *data) { 11756718818eSStefano Zampini Mat_MatMatTransMult *abt = (Mat_MatMatTransMult *)data; 11762128a86cSHong Zhang 11772128a86cSHong Zhang PetscFunctionBegin; 11789566063dSJacob Faibussowitsch PetscCall(MatTransposeColoringDestroy(&abt->matcoloring)); 11799566063dSJacob Faibussowitsch PetscCall(MatDestroy(&abt->Bt_den)); 11809566063dSJacob Faibussowitsch PetscCall(MatDestroy(&abt->ABt_den)); 11819566063dSJacob Faibussowitsch PetscCall(PetscFree(abt)); 11822128a86cSHong Zhang PetscFunctionReturn(0); 11832128a86cSHong Zhang } 11842128a86cSHong Zhang 11859371c9d4SSatish Balay PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(Mat A, Mat B, PetscReal fill, Mat C) { 118681d82fe4SHong Zhang Mat Bt; 118740192850SHong Zhang Mat_MatMatTransMult *abt; 11884222ddf1SHong Zhang Mat_Product *product = C->product; 11896718818eSStefano Zampini char *alg; 1190d2853540SHong Zhang 119181d82fe4SHong Zhang PetscFunctionBegin; 119228b400f6SJacob Faibussowitsch PetscCheck(product, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing product struct"); 119328b400f6SJacob Faibussowitsch PetscCheck(!product->data, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Extra product struct not empty"); 11946718818eSStefano Zampini 119581d82fe4SHong Zhang /* create symbolic Bt */ 11967fb60732SBarry Smith PetscCall(MatTransposeSymbolic(B, &Bt)); 11979566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizes(Bt, PetscAbs(A->cmap->bs), PetscAbs(B->cmap->bs))); 11989566063dSJacob Faibussowitsch PetscCall(MatSetType(Bt, ((PetscObject)A)->type_name)); 119981d82fe4SHong Zhang 120081d82fe4SHong Zhang /* get symbolic C=A*Bt */ 12019566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(product->alg, &alg)); 12029566063dSJacob Faibussowitsch PetscCall(MatProductSetAlgorithm(C, "sorted")); /* set algorithm for C = A*Bt */ 12039566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(A, Bt, fill, C)); 12049566063dSJacob Faibussowitsch PetscCall(MatProductSetAlgorithm(C, alg)); /* resume original algorithm for ABt product */ 12059566063dSJacob Faibussowitsch PetscCall(PetscFree(alg)); 120681d82fe4SHong Zhang 1207a5b23f4aSJose E. Roman /* create a supporting struct for reuse intermediate dense matrices with matcoloring */ 12089566063dSJacob Faibussowitsch PetscCall(PetscNew(&abt)); 12092128a86cSHong Zhang 12106718818eSStefano Zampini product->data = abt; 12116718818eSStefano Zampini product->destroy = MatDestroy_SeqAIJ_MatMatMultTrans; 12126718818eSStefano Zampini 12134222ddf1SHong Zhang C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ; 12142128a86cSHong Zhang 12154222ddf1SHong Zhang abt->usecoloring = PETSC_FALSE; 12169566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg, "color", &abt->usecoloring)); 121740192850SHong Zhang if (abt->usecoloring) { 1218b9af6bddSHong Zhang /* Create MatTransposeColoring from symbolic C=A*B^T */ 1219b9af6bddSHong Zhang MatTransposeColoring matcoloring; 1220335efc43SPeter Brune MatColoring coloring; 12212128a86cSHong Zhang ISColoring iscoloring; 12222128a86cSHong Zhang Mat Bt_dense, C_dense; 1223e8354b3eSHong Zhang 12244222ddf1SHong Zhang /* inode causes memory problem */ 12259566063dSJacob Faibussowitsch PetscCall(MatSetOption(C, MAT_USE_INODES, PETSC_FALSE)); 12264222ddf1SHong Zhang 12279566063dSJacob Faibussowitsch PetscCall(MatColoringCreate(C, &coloring)); 12289566063dSJacob Faibussowitsch PetscCall(MatColoringSetDistance(coloring, 2)); 12299566063dSJacob Faibussowitsch PetscCall(MatColoringSetType(coloring, MATCOLORINGSL)); 12309566063dSJacob Faibussowitsch PetscCall(MatColoringSetFromOptions(coloring)); 12319566063dSJacob Faibussowitsch PetscCall(MatColoringApply(coloring, &iscoloring)); 12329566063dSJacob Faibussowitsch PetscCall(MatColoringDestroy(&coloring)); 12339566063dSJacob Faibussowitsch PetscCall(MatTransposeColoringCreate(C, iscoloring, &matcoloring)); 12342205254eSKarl Rupp 123540192850SHong Zhang abt->matcoloring = matcoloring; 12362205254eSKarl Rupp 12379566063dSJacob Faibussowitsch PetscCall(ISColoringDestroy(&iscoloring)); 12382128a86cSHong Zhang 12392128a86cSHong Zhang /* Create Bt_dense and C_dense = A*Bt_dense */ 12409566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &Bt_dense)); 12419566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bt_dense, A->cmap->n, matcoloring->ncolors, A->cmap->n, matcoloring->ncolors)); 12429566063dSJacob Faibussowitsch PetscCall(MatSetType(Bt_dense, MATSEQDENSE)); 12439566063dSJacob Faibussowitsch PetscCall(MatSeqDenseSetPreallocation(Bt_dense, NULL)); 12442205254eSKarl Rupp 12452128a86cSHong Zhang Bt_dense->assembled = PETSC_TRUE; 124640192850SHong Zhang abt->Bt_den = Bt_dense; 12472128a86cSHong Zhang 12489566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &C_dense)); 12499566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C_dense, A->rmap->n, matcoloring->ncolors, A->rmap->n, matcoloring->ncolors)); 12509566063dSJacob Faibussowitsch PetscCall(MatSetType(C_dense, MATSEQDENSE)); 12519566063dSJacob Faibussowitsch PetscCall(MatSeqDenseSetPreallocation(C_dense, NULL)); 12522205254eSKarl Rupp 12532128a86cSHong Zhang Bt_dense->assembled = PETSC_TRUE; 125440192850SHong Zhang abt->ABt_den = C_dense; 1255f94ccd6cSHong Zhang 1256f94ccd6cSHong Zhang #if defined(PETSC_USE_INFO) 12571ce71dffSSatish Balay { 12584222ddf1SHong Zhang Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 12599371c9d4SSatish Balay PetscCall(PetscInfo(C, "Use coloring of C=A*B^T; B^T: %" PetscInt_FMT " %" PetscInt_FMT ", Bt_dense: %" PetscInt_FMT ",%" PetscInt_FMT "; Cnz %" PetscInt_FMT " / (cm*ncolors %" PetscInt_FMT ") = %g\n", B->cmap->n, B->rmap->n, Bt_dense->rmap->n, 12609371c9d4SSatish Balay Bt_dense->cmap->n, c->nz, A->rmap->n * matcoloring->ncolors, (double)(((PetscReal)(c->nz)) / ((PetscReal)(A->rmap->n * matcoloring->ncolors))))); 12611ce71dffSSatish Balay } 1262f94ccd6cSHong Zhang #endif 12632128a86cSHong Zhang } 1264e99be685SHong Zhang /* clean up */ 12659566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Bt)); 12665df89d91SHong Zhang PetscFunctionReturn(0); 12675df89d91SHong Zhang } 12685df89d91SHong Zhang 12699371c9d4SSatish Balay PetscErrorCode MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(Mat A, Mat B, Mat C) { 12705df89d91SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)B->data, *c = (Mat_SeqAIJ *)C->data; 1271e2cac8caSJed Brown PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, anzi, bnzj, nexta, nextb, *acol, *bcol, brow; 127281d82fe4SHong Zhang PetscInt cm = C->rmap->n, *ci = c->i, *cj = c->j, i, j, cnzi, *ccol; 12735df89d91SHong Zhang PetscLogDouble flops = 0.0; 1274aa1aec99SHong Zhang MatScalar *aa = a->a, *aval, *ba = b->a, *bval, *ca, *cval; 12756718818eSStefano Zampini Mat_MatMatTransMult *abt; 12766718818eSStefano Zampini Mat_Product *product = C->product; 12775df89d91SHong Zhang 12785df89d91SHong Zhang PetscFunctionBegin; 127928b400f6SJacob Faibussowitsch PetscCheck(product, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing product struct"); 12806718818eSStefano Zampini abt = (Mat_MatMatTransMult *)product->data; 128128b400f6SJacob Faibussowitsch PetscCheck(abt, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing product struct"); 128258ed3ceaSHong Zhang /* clear old values in C */ 128358ed3ceaSHong Zhang if (!c->a) { 12849566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(ci[cm] + 1, &ca)); 128558ed3ceaSHong Zhang c->a = ca; 128658ed3ceaSHong Zhang c->free_a = PETSC_TRUE; 128758ed3ceaSHong Zhang } else { 128858ed3ceaSHong Zhang ca = c->a; 12899566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ca, ci[cm] + 1)); 129058ed3ceaSHong Zhang } 129158ed3ceaSHong Zhang 129240192850SHong Zhang if (abt->usecoloring) { 129340192850SHong Zhang MatTransposeColoring matcoloring = abt->matcoloring; 129440192850SHong Zhang Mat Bt_dense, C_dense = abt->ABt_den; 1295c8db22f6SHong Zhang 1296b9af6bddSHong Zhang /* Get Bt_dense by Apply MatTransposeColoring to B */ 129740192850SHong Zhang Bt_dense = abt->Bt_den; 12989566063dSJacob Faibussowitsch PetscCall(MatTransColoringApplySpToDen(matcoloring, B, Bt_dense)); 1299c8db22f6SHong Zhang 1300c8db22f6SHong Zhang /* C_dense = A*Bt_dense */ 13019566063dSJacob Faibussowitsch PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(A, Bt_dense, C_dense)); 1302c8db22f6SHong Zhang 13032128a86cSHong Zhang /* Recover C from C_dense */ 13049566063dSJacob Faibussowitsch PetscCall(MatTransColoringApplyDenToSp(matcoloring, C_dense, C)); 1305c8db22f6SHong Zhang PetscFunctionReturn(0); 1306c8db22f6SHong Zhang } 1307c8db22f6SHong Zhang 13081fa1209cSHong Zhang for (i = 0; i < cm; i++) { 130981d82fe4SHong Zhang anzi = ai[i + 1] - ai[i]; 131081d82fe4SHong Zhang acol = aj + ai[i]; 13116973769bSHong Zhang aval = aa + ai[i]; 13121fa1209cSHong Zhang cnzi = ci[i + 1] - ci[i]; 13131fa1209cSHong Zhang ccol = cj + ci[i]; 13146973769bSHong Zhang cval = ca + ci[i]; 13151fa1209cSHong Zhang for (j = 0; j < cnzi; j++) { 131681d82fe4SHong Zhang brow = ccol[j]; 131781d82fe4SHong Zhang bnzj = bi[brow + 1] - bi[brow]; 131881d82fe4SHong Zhang bcol = bj + bi[brow]; 13196973769bSHong Zhang bval = ba + bi[brow]; 13206973769bSHong Zhang 132181d82fe4SHong Zhang /* perform sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */ 13229371c9d4SSatish Balay nexta = 0; 13239371c9d4SSatish Balay nextb = 0; 132481d82fe4SHong Zhang while (nexta < anzi && nextb < bnzj) { 13257b6d5e96SMark Adams while (nexta < anzi && acol[nexta] < bcol[nextb]) nexta++; 132681d82fe4SHong Zhang if (nexta == anzi) break; 13277b6d5e96SMark Adams while (nextb < bnzj && acol[nexta] > bcol[nextb]) nextb++; 132881d82fe4SHong Zhang if (nextb == bnzj) break; 132981d82fe4SHong Zhang if (acol[nexta] == bcol[nextb]) { 13306973769bSHong Zhang cval[j] += aval[nexta] * bval[nextb]; 13319371c9d4SSatish Balay nexta++; 13329371c9d4SSatish Balay nextb++; 133381d82fe4SHong Zhang flops += 2; 13341fa1209cSHong Zhang } 13351fa1209cSHong Zhang } 133681d82fe4SHong Zhang } 133781d82fe4SHong Zhang } 13389566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 13399566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 13409566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(flops)); 13415df89d91SHong Zhang PetscFunctionReturn(0); 13425df89d91SHong Zhang } 13435df89d91SHong Zhang 13449371c9d4SSatish Balay PetscErrorCode MatDestroy_SeqAIJ_MatTransMatMult(void *data) { 13456718818eSStefano Zampini Mat_MatTransMatMult *atb = (Mat_MatTransMatMult *)data; 13466d373c3eSHong Zhang 13476d373c3eSHong Zhang PetscFunctionBegin; 13489566063dSJacob Faibussowitsch PetscCall(MatDestroy(&atb->At)); 13491baa6e33SBarry Smith if (atb->destroy) PetscCall((*atb->destroy)(atb->data)); 13509566063dSJacob Faibussowitsch PetscCall(PetscFree(atb)); 13516d373c3eSHong Zhang PetscFunctionReturn(0); 13526d373c3eSHong Zhang } 13536d373c3eSHong Zhang 13549371c9d4SSatish Balay PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A, Mat B, PetscReal fill, Mat C) { 1355089a957eSStefano Zampini Mat At = NULL; 13564222ddf1SHong Zhang Mat_Product *product = C->product; 1357089a957eSStefano Zampini PetscBool flg, def, square; 1358bc011b1eSHong Zhang 1359bc011b1eSHong Zhang PetscFunctionBegin; 1360089a957eSStefano Zampini MatCheckProduct(C, 4); 1361b94d7dedSBarry Smith square = (PetscBool)(A == B && A->symmetric == PETSC_BOOL3_TRUE); 13624222ddf1SHong Zhang /* outerproduct */ 13639566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg, "outerproduct", &flg)); 13644222ddf1SHong Zhang if (flg) { 1365bc011b1eSHong Zhang /* create symbolic At */ 1366089a957eSStefano Zampini if (!square) { 13677fb60732SBarry Smith PetscCall(MatTransposeSymbolic(A, &At)); 13689566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizes(At, PetscAbs(A->cmap->bs), PetscAbs(B->cmap->bs))); 13699566063dSJacob Faibussowitsch PetscCall(MatSetType(At, ((PetscObject)A)->type_name)); 1370089a957eSStefano Zampini } 1371bc011b1eSHong Zhang /* get symbolic C=At*B */ 13729566063dSJacob Faibussowitsch PetscCall(MatProductSetAlgorithm(C, "sorted")); 13739566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(square ? A : At, B, fill, C)); 1374bc011b1eSHong Zhang 1375bc011b1eSHong Zhang /* clean up */ 1376*48a46eb9SPierre Jolivet if (!square) PetscCall(MatDestroy(&At)); 13776d373c3eSHong Zhang 13784222ddf1SHong Zhang C->ops->mattransposemultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; /* outerproduct */ 13799566063dSJacob Faibussowitsch PetscCall(MatProductSetAlgorithm(C, "outerproduct")); 13804222ddf1SHong Zhang PetscFunctionReturn(0); 13814222ddf1SHong Zhang } 13824222ddf1SHong Zhang 13834222ddf1SHong Zhang /* matmatmult */ 13849566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg, "default", &def)); 13859566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg, "at*b", &flg)); 1386089a957eSStefano Zampini if (flg || def) { 13874222ddf1SHong Zhang Mat_MatTransMatMult *atb; 13884222ddf1SHong Zhang 138928b400f6SJacob Faibussowitsch PetscCheck(!product->data, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Extra product struct not empty"); 13909566063dSJacob Faibussowitsch PetscCall(PetscNew(&atb)); 1391*48a46eb9SPierre Jolivet if (!square) PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 13929566063dSJacob Faibussowitsch PetscCall(MatProductSetAlgorithm(C, "sorted")); 13939566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(square ? A : At, B, fill, C)); 13949566063dSJacob Faibussowitsch PetscCall(MatProductSetAlgorithm(C, "at*b")); 13956718818eSStefano Zampini product->data = atb; 13966718818eSStefano Zampini product->destroy = MatDestroy_SeqAIJ_MatTransMatMult; 13974222ddf1SHong Zhang atb->At = At; 13984222ddf1SHong Zhang 13994222ddf1SHong Zhang C->ops->mattransposemultnumeric = NULL; /* see MatProductNumeric_AtB_SeqAIJ_SeqAIJ */ 14004222ddf1SHong Zhang PetscFunctionReturn(0); 14014222ddf1SHong Zhang } 14024222ddf1SHong Zhang 14034222ddf1SHong Zhang SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product Algorithm is not supported"); 1404bc011b1eSHong Zhang } 1405bc011b1eSHong Zhang 14069371c9d4SSatish Balay PetscErrorCode MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(Mat A, Mat B, Mat C) { 14070fbc74f4SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)B->data, *c = (Mat_SeqAIJ *)C->data; 1408d0f46423SBarry Smith PetscInt am = A->rmap->n, anzi, *ai = a->i, *aj = a->j, *bi = b->i, *bj, bnzi, nextb; 1409d0f46423SBarry Smith PetscInt cm = C->rmap->n, *ci = c->i, *cj = c->j, crow, *cjj, i, j, k; 1410d13dce4bSSatish Balay PetscLogDouble flops = 0.0; 1411aa1aec99SHong Zhang MatScalar *aa = a->a, *ba, *ca, *caj; 1412bc011b1eSHong Zhang 1413bc011b1eSHong Zhang PetscFunctionBegin; 1414aa1aec99SHong Zhang if (!c->a) { 14159566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(ci[cm] + 1, &ca)); 14162205254eSKarl Rupp 1417aa1aec99SHong Zhang c->a = ca; 1418aa1aec99SHong Zhang c->free_a = PETSC_TRUE; 1419aa1aec99SHong Zhang } else { 1420aa1aec99SHong Zhang ca = c->a; 14219566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ca, ci[cm])); 1422aa1aec99SHong Zhang } 1423bc011b1eSHong Zhang 1424bc011b1eSHong Zhang /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */ 1425bc011b1eSHong Zhang for (i = 0; i < am; i++) { 1426bc011b1eSHong Zhang bj = b->j + bi[i]; 1427bc011b1eSHong Zhang ba = b->a + bi[i]; 1428bc011b1eSHong Zhang bnzi = bi[i + 1] - bi[i]; 1429bc011b1eSHong Zhang anzi = ai[i + 1] - ai[i]; 1430bc011b1eSHong Zhang for (j = 0; j < anzi; j++) { 1431bc011b1eSHong Zhang nextb = 0; 14320fbc74f4SHong Zhang crow = *aj++; 1433bc011b1eSHong Zhang cjj = cj + ci[crow]; 1434bc011b1eSHong Zhang caj = ca + ci[crow]; 1435bc011b1eSHong Zhang /* perform sparse axpy operation. Note cjj includes bj. */ 1436bc011b1eSHong Zhang for (k = 0; nextb < bnzi; k++) { 14370fbc74f4SHong Zhang if (cjj[k] == *(bj + nextb)) { /* ccol == bcol */ 14380fbc74f4SHong Zhang caj[k] += (*aa) * (*(ba + nextb)); 1439bc011b1eSHong Zhang nextb++; 1440bc011b1eSHong Zhang } 1441bc011b1eSHong Zhang } 1442bc011b1eSHong Zhang flops += 2 * bnzi; 14430fbc74f4SHong Zhang aa++; 1444bc011b1eSHong Zhang } 1445bc011b1eSHong Zhang } 1446bc011b1eSHong Zhang 1447bc011b1eSHong Zhang /* Assemble the final matrix and clean up */ 14489566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 14499566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 14509566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(flops)); 1451bc011b1eSHong Zhang PetscFunctionReturn(0); 1452bc011b1eSHong Zhang } 14539123193aSHong Zhang 14549371c9d4SSatish Balay PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqDense(Mat A, Mat B, PetscReal fill, Mat C) { 14559123193aSHong Zhang PetscFunctionBegin; 14569566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_SeqDense_SeqDense(A, B, 0.0, C)); 14574222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqDense; 14589123193aSHong Zhang PetscFunctionReturn(0); 14599123193aSHong Zhang } 14609123193aSHong Zhang 14619371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatMatMultNumericAdd_SeqAIJ_SeqDense(Mat A, Mat B, Mat C, const PetscBool add) { 1462f32d5d43SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 14631ca9667aSStefano Zampini PetscScalar *c, r1, r2, r3, r4, *c1, *c2, *c3, *c4; 1464a4af7ca8SStefano Zampini const PetscScalar *aa, *b, *b1, *b2, *b3, *b4, *av; 1465daf57211SHong Zhang const PetscInt *aj; 146675f6d85dSStefano Zampini PetscInt cm = C->rmap->n, cn = B->cmap->n, bm, am = A->rmap->n; 146775f6d85dSStefano Zampini PetscInt clda; 146875f6d85dSStefano Zampini PetscInt am4, bm4, col, i, j, n; 14699123193aSHong Zhang 14709123193aSHong Zhang PetscFunctionBegin; 1471f32d5d43SBarry Smith if (!cm || !cn) PetscFunctionReturn(0); 14729566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(A, &av)); 147393aa15f2SStefano Zampini if (add) { 14749566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(C, &c)); 147593aa15f2SStefano Zampini } else { 14769566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayWrite(C, &c)); 147793aa15f2SStefano Zampini } 14789566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(B, &b)); 14799566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(B, &bm)); 14809566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(C, &clda)); 148175f6d85dSStefano Zampini am4 = 4 * clda; 148275f6d85dSStefano Zampini bm4 = 4 * bm; 14839371c9d4SSatish Balay b1 = b; 14849371c9d4SSatish Balay b2 = b1 + bm; 14859371c9d4SSatish Balay b3 = b2 + bm; 14869371c9d4SSatish Balay b4 = b3 + bm; 14879371c9d4SSatish Balay c1 = c; 14889371c9d4SSatish Balay c2 = c1 + clda; 14899371c9d4SSatish Balay c3 = c2 + clda; 14909371c9d4SSatish Balay c4 = c3 + clda; 14911ca9667aSStefano Zampini for (col = 0; col < (cn / 4) * 4; col += 4) { /* over columns of C */ 14921ca9667aSStefano Zampini for (i = 0; i < am; i++) { /* over rows of A in those columns */ 1493f32d5d43SBarry Smith r1 = r2 = r3 = r4 = 0.0; 1494f32d5d43SBarry Smith n = a->i[i + 1] - a->i[i]; 1495f32d5d43SBarry Smith aj = a->j + a->i[i]; 1496a4af7ca8SStefano Zampini aa = av + a->i[i]; 1497f32d5d43SBarry Smith for (j = 0; j < n; j++) { 14981ca9667aSStefano Zampini const PetscScalar aatmp = aa[j]; 14991ca9667aSStefano Zampini const PetscInt ajtmp = aj[j]; 1500730858b9SHong Zhang r1 += aatmp * b1[ajtmp]; 1501730858b9SHong Zhang r2 += aatmp * b2[ajtmp]; 1502730858b9SHong Zhang r3 += aatmp * b3[ajtmp]; 1503730858b9SHong Zhang r4 += aatmp * b4[ajtmp]; 15049123193aSHong Zhang } 150593aa15f2SStefano Zampini if (add) { 150687753ddeSHong Zhang c1[i] += r1; 150787753ddeSHong Zhang c2[i] += r2; 150887753ddeSHong Zhang c3[i] += r3; 150987753ddeSHong Zhang c4[i] += r4; 151093aa15f2SStefano Zampini } else { 151193aa15f2SStefano Zampini c1[i] = r1; 151293aa15f2SStefano Zampini c2[i] = r2; 151393aa15f2SStefano Zampini c3[i] = r3; 151493aa15f2SStefano Zampini c4[i] = r4; 151593aa15f2SStefano Zampini } 1516f32d5d43SBarry Smith } 15179371c9d4SSatish Balay b1 += bm4; 15189371c9d4SSatish Balay b2 += bm4; 15199371c9d4SSatish Balay b3 += bm4; 15209371c9d4SSatish Balay b4 += bm4; 15219371c9d4SSatish Balay c1 += am4; 15229371c9d4SSatish Balay c2 += am4; 15239371c9d4SSatish Balay c3 += am4; 15249371c9d4SSatish Balay c4 += am4; 1525f32d5d43SBarry Smith } 152693aa15f2SStefano Zampini /* process remaining columns */ 152793aa15f2SStefano Zampini if (col != cn) { 152893aa15f2SStefano Zampini PetscInt rc = cn - col; 152993aa15f2SStefano Zampini 153093aa15f2SStefano Zampini if (rc == 1) { 153193aa15f2SStefano Zampini for (i = 0; i < am; i++) { 1532f32d5d43SBarry Smith r1 = 0.0; 1533f32d5d43SBarry Smith n = a->i[i + 1] - a->i[i]; 1534f32d5d43SBarry Smith aj = a->j + a->i[i]; 1535a4af7ca8SStefano Zampini aa = av + a->i[i]; 153693aa15f2SStefano Zampini for (j = 0; j < n; j++) r1 += aa[j] * b1[aj[j]]; 153793aa15f2SStefano Zampini if (add) c1[i] += r1; 153893aa15f2SStefano Zampini else c1[i] = r1; 153993aa15f2SStefano Zampini } 154093aa15f2SStefano Zampini } else if (rc == 2) { 154193aa15f2SStefano Zampini for (i = 0; i < am; i++) { 154293aa15f2SStefano Zampini r1 = r2 = 0.0; 154393aa15f2SStefano Zampini n = a->i[i + 1] - a->i[i]; 154493aa15f2SStefano Zampini aj = a->j + a->i[i]; 154593aa15f2SStefano Zampini aa = av + a->i[i]; 1546f32d5d43SBarry Smith for (j = 0; j < n; j++) { 154793aa15f2SStefano Zampini const PetscScalar aatmp = aa[j]; 154893aa15f2SStefano Zampini const PetscInt ajtmp = aj[j]; 154993aa15f2SStefano Zampini r1 += aatmp * b1[ajtmp]; 155093aa15f2SStefano Zampini r2 += aatmp * b2[ajtmp]; 1551f32d5d43SBarry Smith } 155293aa15f2SStefano Zampini if (add) { 155387753ddeSHong Zhang c1[i] += r1; 155493aa15f2SStefano Zampini c2[i] += r2; 155593aa15f2SStefano Zampini } else { 155693aa15f2SStefano Zampini c1[i] = r1; 155793aa15f2SStefano Zampini c2[i] = r2; 1558f32d5d43SBarry Smith } 155993aa15f2SStefano Zampini } 156093aa15f2SStefano Zampini } else { 156193aa15f2SStefano Zampini for (i = 0; i < am; i++) { 156293aa15f2SStefano Zampini r1 = r2 = r3 = 0.0; 156393aa15f2SStefano Zampini n = a->i[i + 1] - a->i[i]; 156493aa15f2SStefano Zampini aj = a->j + a->i[i]; 156593aa15f2SStefano Zampini aa = av + a->i[i]; 156693aa15f2SStefano Zampini for (j = 0; j < n; j++) { 156793aa15f2SStefano Zampini const PetscScalar aatmp = aa[j]; 156893aa15f2SStefano Zampini const PetscInt ajtmp = aj[j]; 156993aa15f2SStefano Zampini r1 += aatmp * b1[ajtmp]; 157093aa15f2SStefano Zampini r2 += aatmp * b2[ajtmp]; 157193aa15f2SStefano Zampini r3 += aatmp * b3[ajtmp]; 157293aa15f2SStefano Zampini } 157393aa15f2SStefano Zampini if (add) { 157493aa15f2SStefano Zampini c1[i] += r1; 157593aa15f2SStefano Zampini c2[i] += r2; 157693aa15f2SStefano Zampini c3[i] += r3; 157793aa15f2SStefano Zampini } else { 157893aa15f2SStefano Zampini c1[i] = r1; 157993aa15f2SStefano Zampini c2[i] = r2; 158093aa15f2SStefano Zampini c3[i] = r3; 158193aa15f2SStefano Zampini } 158293aa15f2SStefano Zampini } 158393aa15f2SStefano Zampini } 1584f32d5d43SBarry Smith } 15859566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(cn * (2.0 * a->nz))); 158693aa15f2SStefano Zampini if (add) { 15879566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(C, &c)); 158893aa15f2SStefano Zampini } else { 15899566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayWrite(C, &c)); 159093aa15f2SStefano Zampini } 15919566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(B, &b)); 15929566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 1593f32d5d43SBarry Smith PetscFunctionReturn(0); 1594f32d5d43SBarry Smith } 1595f32d5d43SBarry Smith 15969371c9d4SSatish Balay PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqDense(Mat A, Mat B, Mat C) { 1597f32d5d43SBarry Smith PetscFunctionBegin; 159808401ef6SPierre Jolivet PetscCheck(B->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in A %" PetscInt_FMT " not equal rows in B %" PetscInt_FMT, A->cmap->n, B->rmap->n); 159908401ef6SPierre Jolivet PetscCheck(A->rmap->n == C->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows in C %" PetscInt_FMT " not equal rows in A %" PetscInt_FMT, C->rmap->n, A->rmap->n); 160008401ef6SPierre Jolivet PetscCheck(B->cmap->n == C->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in B %" PetscInt_FMT " not equal columns in C %" PetscInt_FMT, B->cmap->n, C->cmap->n); 16014324174eSBarry Smith 16029566063dSJacob Faibussowitsch PetscCall(MatMatMultNumericAdd_SeqAIJ_SeqDense(A, B, C, PETSC_FALSE)); 16039123193aSHong Zhang PetscFunctionReturn(0); 16049123193aSHong Zhang } 1605b1683b59SHong Zhang 16064222ddf1SHong Zhang /* ------------------------------------------------------- */ 16079371c9d4SSatish Balay static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_AB(Mat C) { 16084222ddf1SHong Zhang PetscFunctionBegin; 16094222ddf1SHong Zhang C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqDense; 16104222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_AB; 16114222ddf1SHong Zhang PetscFunctionReturn(0); 16124222ddf1SHong Zhang } 16134222ddf1SHong Zhang 16146718818eSStefano Zampini PETSC_INTERN PetscErrorCode MatTMatTMultSymbolic_SeqAIJ_SeqDense(Mat, Mat, PetscReal, Mat); 16156718818eSStefano Zampini 16169371c9d4SSatish Balay static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_AtB(Mat C) { 16174222ddf1SHong Zhang PetscFunctionBegin; 16186718818eSStefano Zampini C->ops->transposematmultsymbolic = MatTMatTMultSymbolic_SeqAIJ_SeqDense; 16194222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_AtB; 16206718818eSStefano Zampini PetscFunctionReturn(0); 16216718818eSStefano Zampini } 16226718818eSStefano Zampini 16239371c9d4SSatish Balay static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_ABt(Mat C) { 16246718818eSStefano Zampini PetscFunctionBegin; 16256718818eSStefano Zampini C->ops->mattransposemultsymbolic = MatTMatTMultSymbolic_SeqAIJ_SeqDense; 16266718818eSStefano Zampini C->ops->productsymbolic = MatProductSymbolic_ABt; 16274222ddf1SHong Zhang PetscFunctionReturn(0); 16284222ddf1SHong Zhang } 16294222ddf1SHong Zhang 16309371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat C) { 16314222ddf1SHong Zhang Mat_Product *product = C->product; 16324222ddf1SHong Zhang 16334222ddf1SHong Zhang PetscFunctionBegin; 16344222ddf1SHong Zhang switch (product->type) { 16359371c9d4SSatish Balay case MATPRODUCT_AB: PetscCall(MatProductSetFromOptions_SeqAIJ_SeqDense_AB(C)); break; 16369371c9d4SSatish Balay case MATPRODUCT_AtB: PetscCall(MatProductSetFromOptions_SeqAIJ_SeqDense_AtB(C)); break; 16379371c9d4SSatish Balay case MATPRODUCT_ABt: PetscCall(MatProductSetFromOptions_SeqAIJ_SeqDense_ABt(C)); break; 16389371c9d4SSatish Balay default: break; 16394222ddf1SHong Zhang } 16404222ddf1SHong Zhang PetscFunctionReturn(0); 16414222ddf1SHong Zhang } 16424222ddf1SHong Zhang /* ------------------------------------------------------- */ 16439371c9d4SSatish Balay static PetscErrorCode MatProductSetFromOptions_SeqXBAIJ_SeqDense_AB(Mat C) { 16444222ddf1SHong Zhang Mat_Product *product = C->product; 16454222ddf1SHong Zhang Mat A = product->A; 16464222ddf1SHong Zhang PetscBool baij; 16474222ddf1SHong Zhang 16484222ddf1SHong Zhang PetscFunctionBegin; 16499566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &baij)); 16504222ddf1SHong Zhang if (!baij) { /* A is seqsbaij */ 16514222ddf1SHong Zhang PetscBool sbaij; 16529566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &sbaij)); 165328b400f6SJacob Faibussowitsch PetscCheck(sbaij, PetscObjectComm((PetscObject)C), PETSC_ERR_ARG_WRONGSTATE, "Mat must be either seqbaij or seqsbaij format"); 16544222ddf1SHong Zhang 16554222ddf1SHong Zhang C->ops->matmultsymbolic = MatMatMultSymbolic_SeqSBAIJ_SeqDense; 16564222ddf1SHong Zhang } else { /* A is seqbaij */ 16574222ddf1SHong Zhang C->ops->matmultsymbolic = MatMatMultSymbolic_SeqBAIJ_SeqDense; 16584222ddf1SHong Zhang } 16594222ddf1SHong Zhang 16604222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_AB; 16614222ddf1SHong Zhang PetscFunctionReturn(0); 16624222ddf1SHong Zhang } 16634222ddf1SHong Zhang 16649371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqXBAIJ_SeqDense(Mat C) { 16654222ddf1SHong Zhang Mat_Product *product = C->product; 16664222ddf1SHong Zhang 16674222ddf1SHong Zhang PetscFunctionBegin; 16686718818eSStefano Zampini MatCheckProduct(C, 1); 166928b400f6SJacob Faibussowitsch PetscCheck(product->A, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing A"); 1670b94d7dedSBarry Smith if (product->type == MATPRODUCT_AB || (product->type == MATPRODUCT_AtB && product->A->symmetric == PETSC_BOOL3_TRUE)) PetscCall(MatProductSetFromOptions_SeqXBAIJ_SeqDense_AB(C)); 16714222ddf1SHong Zhang PetscFunctionReturn(0); 16724222ddf1SHong Zhang } 16736718818eSStefano Zampini 16744222ddf1SHong Zhang /* ------------------------------------------------------- */ 16759371c9d4SSatish Balay static PetscErrorCode MatProductSetFromOptions_SeqDense_SeqAIJ_AB(Mat C) { 16764222ddf1SHong Zhang PetscFunctionBegin; 16774222ddf1SHong Zhang C->ops->matmultsymbolic = MatMatMultSymbolic_SeqDense_SeqAIJ; 16784222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_AB; 16794222ddf1SHong Zhang PetscFunctionReturn(0); 16804222ddf1SHong Zhang } 16814222ddf1SHong Zhang 16829371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqDense_SeqAIJ(Mat C) { 16834222ddf1SHong Zhang Mat_Product *product = C->product; 16844222ddf1SHong Zhang 16854222ddf1SHong Zhang PetscFunctionBegin; 1686*48a46eb9SPierre Jolivet if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_SeqDense_SeqAIJ_AB(C)); 16874222ddf1SHong Zhang PetscFunctionReturn(0); 16884222ddf1SHong Zhang } 16894222ddf1SHong Zhang /* ------------------------------------------------------- */ 16904222ddf1SHong Zhang 16919371c9d4SSatish Balay PetscErrorCode MatTransColoringApplySpToDen_SeqAIJ(MatTransposeColoring coloring, Mat B, Mat Btdense) { 16922f5f1f90SHong Zhang Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 16932f5f1f90SHong Zhang Mat_SeqDense *btdense = (Mat_SeqDense *)Btdense->data; 16942f5f1f90SHong Zhang PetscInt *bi = b->i, *bj = b->j; 16952f5f1f90SHong Zhang PetscInt m = Btdense->rmap->n, n = Btdense->cmap->n, j, k, l, col, anz, *btcol, brow, ncolumns; 16962f5f1f90SHong Zhang MatScalar *btval, *btval_den, *ba = b->a; 16972f5f1f90SHong Zhang PetscInt *columns = coloring->columns, *colorforcol = coloring->colorforcol, ncolors = coloring->ncolors; 1698c8db22f6SHong Zhang 1699c8db22f6SHong Zhang PetscFunctionBegin; 17002f5f1f90SHong Zhang btval_den = btdense->v; 17019566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(btval_den, m * n)); 17022f5f1f90SHong Zhang for (k = 0; k < ncolors; k++) { 17032f5f1f90SHong Zhang ncolumns = coloring->ncolumns[k]; 17042f5f1f90SHong Zhang for (l = 0; l < ncolumns; l++) { /* insert a row of B to a column of Btdense */ 1705d2853540SHong Zhang col = *(columns + colorforcol[k] + l); 17062f5f1f90SHong Zhang btcol = bj + bi[col]; 17072f5f1f90SHong Zhang btval = ba + bi[col]; 17082f5f1f90SHong Zhang anz = bi[col + 1] - bi[col]; 1709d2853540SHong Zhang for (j = 0; j < anz; j++) { 17102f5f1f90SHong Zhang brow = btcol[j]; 17112f5f1f90SHong Zhang btval_den[brow] = btval[j]; 1712c8db22f6SHong Zhang } 1713c8db22f6SHong Zhang } 17142f5f1f90SHong Zhang btval_den += m; 1715c8db22f6SHong Zhang } 1716c8db22f6SHong Zhang PetscFunctionReturn(0); 1717c8db22f6SHong Zhang } 1718c8db22f6SHong Zhang 17199371c9d4SSatish Balay PetscErrorCode MatTransColoringApplyDenToSp_SeqAIJ(MatTransposeColoring matcoloring, Mat Cden, Mat Csp) { 1720b2d2b04fSHong Zhang Mat_SeqAIJ *csp = (Mat_SeqAIJ *)Csp->data; 17211683a169SBarry Smith const PetscScalar *ca_den, *ca_den_ptr; 17221683a169SBarry Smith PetscScalar *ca = csp->a; 1723f99a636bSHong Zhang PetscInt k, l, m = Cden->rmap->n, ncolors = matcoloring->ncolors; 1724e88777f2SHong Zhang PetscInt brows = matcoloring->brows, *den2sp = matcoloring->den2sp; 1725077f23c2SHong Zhang PetscInt nrows, *row, *idx; 1726077f23c2SHong Zhang PetscInt *rows = matcoloring->rows, *colorforrow = matcoloring->colorforrow; 17278972f759SHong Zhang 17288972f759SHong Zhang PetscFunctionBegin; 17299566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(Cden, &ca_den)); 1730f99a636bSHong Zhang 1731077f23c2SHong Zhang if (brows > 0) { 1732077f23c2SHong Zhang PetscInt *lstart, row_end, row_start; 1733980ae229SHong Zhang lstart = matcoloring->lstart; 17349566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(lstart, ncolors)); 1735eeb4fd02SHong Zhang 1736077f23c2SHong Zhang row_end = brows; 1737eeb4fd02SHong Zhang if (row_end > m) row_end = m; 1738077f23c2SHong Zhang for (row_start = 0; row_start < m; row_start += brows) { /* loop over row blocks of Csp */ 1739077f23c2SHong Zhang ca_den_ptr = ca_den; 1740980ae229SHong Zhang for (k = 0; k < ncolors; k++) { /* loop over colors (columns of Cden) */ 1741eeb4fd02SHong Zhang nrows = matcoloring->nrows[k]; 1742eeb4fd02SHong Zhang row = rows + colorforrow[k]; 1743eeb4fd02SHong Zhang idx = den2sp + colorforrow[k]; 1744eeb4fd02SHong Zhang for (l = lstart[k]; l < nrows; l++) { 1745eeb4fd02SHong Zhang if (row[l] >= row_end) { 1746eeb4fd02SHong Zhang lstart[k] = l; 1747eeb4fd02SHong Zhang break; 1748eeb4fd02SHong Zhang } else { 1749077f23c2SHong Zhang ca[idx[l]] = ca_den_ptr[row[l]]; 1750eeb4fd02SHong Zhang } 1751eeb4fd02SHong Zhang } 1752077f23c2SHong Zhang ca_den_ptr += m; 1753eeb4fd02SHong Zhang } 1754077f23c2SHong Zhang row_end += brows; 1755eeb4fd02SHong Zhang if (row_end > m) row_end = m; 1756eeb4fd02SHong Zhang } 1757077f23c2SHong Zhang } else { /* non-blocked impl: loop over columns of Csp - slow if Csp is large */ 1758077f23c2SHong Zhang ca_den_ptr = ca_den; 1759077f23c2SHong Zhang for (k = 0; k < ncolors; k++) { 1760077f23c2SHong Zhang nrows = matcoloring->nrows[k]; 1761077f23c2SHong Zhang row = rows + colorforrow[k]; 1762077f23c2SHong Zhang idx = den2sp + colorforrow[k]; 17639371c9d4SSatish Balay for (l = 0; l < nrows; l++) { ca[idx[l]] = ca_den_ptr[row[l]]; } 1764077f23c2SHong Zhang ca_den_ptr += m; 1765077f23c2SHong Zhang } 1766f99a636bSHong Zhang } 1767f99a636bSHong Zhang 17689566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(Cden, &ca_den)); 1769f99a636bSHong Zhang #if defined(PETSC_USE_INFO) 1770077f23c2SHong Zhang if (matcoloring->brows > 0) { 17719566063dSJacob Faibussowitsch PetscCall(PetscInfo(Csp, "Loop over %" PetscInt_FMT " row blocks for den2sp\n", brows)); 1772e88777f2SHong Zhang } else { 17739566063dSJacob Faibussowitsch PetscCall(PetscInfo(Csp, "Loop over colors/columns of Cden, inefficient for large sparse matrix product \n")); 1774e88777f2SHong Zhang } 1775f99a636bSHong Zhang #endif 17768972f759SHong Zhang PetscFunctionReturn(0); 17778972f759SHong Zhang } 17788972f759SHong Zhang 17799371c9d4SSatish Balay PetscErrorCode MatTransposeColoringCreate_SeqAIJ(Mat mat, ISColoring iscoloring, MatTransposeColoring c) { 1780e88777f2SHong Zhang PetscInt i, n, nrows, Nbs, j, k, m, ncols, col, cm; 17811a83f524SJed Brown const PetscInt *is, *ci, *cj, *row_idx; 1782b28d80bdSHong Zhang PetscInt nis = iscoloring->n, *rowhit, bs = 1; 1783b1683b59SHong Zhang IS *isa; 1784b28d80bdSHong Zhang Mat_SeqAIJ *csp = (Mat_SeqAIJ *)mat->data; 1785e88777f2SHong Zhang PetscInt *colorforrow, *rows, *rows_i, *idxhit, *spidx, *den2sp, *den2sp_i; 1786e88777f2SHong Zhang PetscInt *colorforcol, *columns, *columns_i, brows; 1787e88777f2SHong Zhang PetscBool flg; 1788b1683b59SHong Zhang 1789b1683b59SHong Zhang PetscFunctionBegin; 17909566063dSJacob Faibussowitsch PetscCall(ISColoringGetIS(iscoloring, PETSC_USE_POINTER, PETSC_IGNORE, &isa)); 1791e99be685SHong Zhang 17927ecccc15SHong Zhang /* bs >1 is not being tested yet! */ 1793e88777f2SHong Zhang Nbs = mat->cmap->N / bs; 1794b1683b59SHong Zhang c->M = mat->rmap->N / bs; /* set total rows, columns and local rows */ 1795e88777f2SHong Zhang c->N = Nbs; 1796e88777f2SHong Zhang c->m = c->M; 1797b1683b59SHong Zhang c->rstart = 0; 1798e88777f2SHong Zhang c->brows = 100; 1799b1683b59SHong Zhang 1800b1683b59SHong Zhang c->ncolors = nis; 18019566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(nis, &c->ncolumns, nis, &c->nrows, nis + 1, &colorforrow)); 18029566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(csp->nz + 1, &rows)); 18039566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(csp->nz + 1, &den2sp)); 1804e88777f2SHong Zhang 1805e88777f2SHong Zhang brows = c->brows; 18069566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, NULL, "-matden2sp_brows", &brows, &flg)); 1807e88777f2SHong Zhang if (flg) c->brows = brows; 1808*48a46eb9SPierre Jolivet if (brows > 0) PetscCall(PetscMalloc1(nis + 1, &c->lstart)); 18092205254eSKarl Rupp 1810d2853540SHong Zhang colorforrow[0] = 0; 1811d2853540SHong Zhang rows_i = rows; 1812f99a636bSHong Zhang den2sp_i = den2sp; 1813b1683b59SHong Zhang 18149566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nis + 1, &colorforcol)); 18159566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(Nbs + 1, &columns)); 18162205254eSKarl Rupp 1817d2853540SHong Zhang colorforcol[0] = 0; 1818d2853540SHong Zhang columns_i = columns; 1819d2853540SHong Zhang 1820077f23c2SHong Zhang /* get column-wise storage of mat */ 18219566063dSJacob Faibussowitsch PetscCall(MatGetColumnIJ_SeqAIJ_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL)); 1822b1683b59SHong Zhang 1823b28d80bdSHong Zhang cm = c->m; 18249566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(cm + 1, &rowhit)); 18259566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(cm + 1, &idxhit)); 1826f99a636bSHong Zhang for (i = 0; i < nis; i++) { /* loop over color */ 18279566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(isa[i], &n)); 18289566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isa[i], &is)); 18292205254eSKarl Rupp 1830b1683b59SHong Zhang c->ncolumns[i] = n; 18311baa6e33SBarry Smith if (n) PetscCall(PetscArraycpy(columns_i, is, n)); 1832d2853540SHong Zhang colorforcol[i + 1] = colorforcol[i] + n; 1833d2853540SHong Zhang columns_i += n; 1834b1683b59SHong Zhang 1835b1683b59SHong Zhang /* fast, crude version requires O(N*N) work */ 18369566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(rowhit, cm)); 1837e99be685SHong Zhang 1838b7caf3d6SHong Zhang for (j = 0; j < n; j++) { /* loop over columns*/ 1839b1683b59SHong Zhang col = is[j]; 1840d2853540SHong Zhang row_idx = cj + ci[col]; 1841b1683b59SHong Zhang m = ci[col + 1] - ci[col]; 1842b7caf3d6SHong Zhang for (k = 0; k < m; k++) { /* loop over columns marking them in rowhit */ 1843e99be685SHong Zhang idxhit[*row_idx] = spidx[ci[col] + k]; 1844d2853540SHong Zhang rowhit[*row_idx++] = col + 1; 1845b1683b59SHong Zhang } 1846b1683b59SHong Zhang } 1847b1683b59SHong Zhang /* count the number of hits */ 1848b1683b59SHong Zhang nrows = 0; 1849e8354b3eSHong Zhang for (j = 0; j < cm; j++) { 1850b1683b59SHong Zhang if (rowhit[j]) nrows++; 1851b1683b59SHong Zhang } 1852b1683b59SHong Zhang c->nrows[i] = nrows; 1853d2853540SHong Zhang colorforrow[i + 1] = colorforrow[i] + nrows; 1854d2853540SHong Zhang 1855b1683b59SHong Zhang nrows = 0; 1856b7caf3d6SHong Zhang for (j = 0; j < cm; j++) { /* loop over rows */ 1857b1683b59SHong Zhang if (rowhit[j]) { 1858d2853540SHong Zhang rows_i[nrows] = j; 185912b89a43SHong Zhang den2sp_i[nrows] = idxhit[j]; 1860b1683b59SHong Zhang nrows++; 1861b1683b59SHong Zhang } 1862b1683b59SHong Zhang } 1863e88777f2SHong Zhang den2sp_i += nrows; 1864077f23c2SHong Zhang 18659566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isa[i], &is)); 1866f99a636bSHong Zhang rows_i += nrows; 1867b1683b59SHong Zhang } 18689566063dSJacob Faibussowitsch PetscCall(MatRestoreColumnIJ_SeqAIJ_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL)); 18699566063dSJacob Faibussowitsch PetscCall(PetscFree(rowhit)); 18709566063dSJacob Faibussowitsch PetscCall(ISColoringRestoreIS(iscoloring, PETSC_USE_POINTER, &isa)); 18712c71b3e2SJacob Faibussowitsch PetscCheck(csp->nz == colorforrow[nis], PETSC_COMM_SELF, PETSC_ERR_PLIB, "csp->nz %" PetscInt_FMT " != colorforrow[nis] %" PetscInt_FMT, csp->nz, colorforrow[nis]); 1872b28d80bdSHong Zhang 1873d2853540SHong Zhang c->colorforrow = colorforrow; 1874d2853540SHong Zhang c->rows = rows; 1875f99a636bSHong Zhang c->den2sp = den2sp; 1876d2853540SHong Zhang c->colorforcol = colorforcol; 1877d2853540SHong Zhang c->columns = columns; 18782205254eSKarl Rupp 18799566063dSJacob Faibussowitsch PetscCall(PetscFree(idxhit)); 1880b1683b59SHong Zhang PetscFunctionReturn(0); 1881b1683b59SHong Zhang } 1882d013fc79Sandi selinger 18834222ddf1SHong Zhang /* --------------------------------------------------------------- */ 18849371c9d4SSatish Balay static PetscErrorCode MatProductNumeric_AtB_SeqAIJ_SeqAIJ(Mat C) { 18854222ddf1SHong Zhang Mat_Product *product = C->product; 18864222ddf1SHong Zhang Mat A = product->A, B = product->B; 18874222ddf1SHong Zhang 1888df97dc6dSFande Kong PetscFunctionBegin; 18894222ddf1SHong Zhang if (C->ops->mattransposemultnumeric) { 18904222ddf1SHong Zhang /* Alg: "outerproduct" */ 18919566063dSJacob Faibussowitsch PetscCall((*C->ops->mattransposemultnumeric)(A, B, C)); 18924222ddf1SHong Zhang } else { 18934222ddf1SHong Zhang /* Alg: "matmatmult" -- C = At*B */ 18946718818eSStefano Zampini Mat_MatTransMatMult *atb = (Mat_MatTransMatMult *)product->data; 18954222ddf1SHong Zhang 189628b400f6SJacob Faibussowitsch PetscCheck(atb, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing product struct"); 18971cdffd5eSHong Zhang if (atb->At) { 18981cdffd5eSHong Zhang /* At is computed in MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(); 18991cdffd5eSHong Zhang user may have called MatProductReplaceMats() to get this A=product->A */ 19001cdffd5eSHong Zhang PetscCall(MatTransposeSetPrecursor(A, atb->At)); 19017fb60732SBarry Smith PetscCall(MatTranspose(A, MAT_REUSE_MATRIX, &atb->At)); 19024222ddf1SHong Zhang } 19037fb60732SBarry Smith PetscCall(MatMatMultNumeric_SeqAIJ_SeqAIJ(atb->At ? atb->At : A, B, C)); 19044222ddf1SHong Zhang } 1905df97dc6dSFande Kong PetscFunctionReturn(0); 1906df97dc6dSFande Kong } 1907df97dc6dSFande Kong 19089371c9d4SSatish Balay static PetscErrorCode MatProductSymbolic_AtB_SeqAIJ_SeqAIJ(Mat C) { 19094222ddf1SHong Zhang Mat_Product *product = C->product; 19104222ddf1SHong Zhang Mat A = product->A, B = product->B; 19114222ddf1SHong Zhang PetscReal fill = product->fill; 1912d013fc79Sandi selinger 1913d013fc79Sandi selinger PetscFunctionBegin; 19149566063dSJacob Faibussowitsch PetscCall(MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(A, B, fill, C)); 19152869b61bSandi selinger 19164222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJ_SeqAIJ; 19174222ddf1SHong Zhang PetscFunctionReturn(0); 19182869b61bSandi selinger } 1919d013fc79Sandi selinger 19204222ddf1SHong Zhang /* --------------------------------------------------------------- */ 19219371c9d4SSatish Balay static PetscErrorCode MatProductSetFromOptions_SeqAIJ_AB(Mat C) { 19224222ddf1SHong Zhang Mat_Product *product = C->product; 19234222ddf1SHong Zhang PetscInt alg = 0; /* default algorithm */ 19244222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 19254222ddf1SHong Zhang #if !defined(PETSC_HAVE_HYPRE) 19264222ddf1SHong Zhang const char *algTypes[7] = {"sorted", "scalable", "scalable_fast", "heap", "btheap", "llcondensed", "rowmerge"}; 19274222ddf1SHong Zhang PetscInt nalg = 7; 19284222ddf1SHong Zhang #else 19294222ddf1SHong Zhang const char *algTypes[8] = {"sorted", "scalable", "scalable_fast", "heap", "btheap", "llcondensed", "rowmerge", "hypre"}; 19304222ddf1SHong Zhang PetscInt nalg = 8; 19314222ddf1SHong Zhang #endif 19324222ddf1SHong Zhang 19334222ddf1SHong Zhang PetscFunctionBegin; 19344222ddf1SHong Zhang /* Set default algorithm */ 19359566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg, "default", &flg)); 1936*48a46eb9SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 1937d013fc79Sandi selinger 19384222ddf1SHong Zhang /* Get runtime option */ 19394222ddf1SHong Zhang if (product->api_user) { 1940d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 19419566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-matmatmult_via", "Algorithmic approach", "MatMatMult", algTypes, nalg, algTypes[0], &alg, &flg)); 1942d0609cedSBarry Smith PetscOptionsEnd(); 19434222ddf1SHong Zhang } else { 1944d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 19459566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[0], &alg, &flg)); 1946d0609cedSBarry Smith PetscOptionsEnd(); 1947d013fc79Sandi selinger } 1948*48a46eb9SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 1949d013fc79Sandi selinger 19504222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_AB; 19514222ddf1SHong Zhang C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ; 19524222ddf1SHong Zhang PetscFunctionReturn(0); 19534222ddf1SHong Zhang } 1954d013fc79Sandi selinger 19559371c9d4SSatish Balay static PetscErrorCode MatProductSetFromOptions_SeqAIJ_AtB(Mat C) { 19564222ddf1SHong Zhang Mat_Product *product = C->product; 19574222ddf1SHong Zhang PetscInt alg = 0; /* default algorithm */ 19584222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 1959089a957eSStefano Zampini const char *algTypes[3] = {"default", "at*b", "outerproduct"}; 1960089a957eSStefano Zampini PetscInt nalg = 3; 1961d013fc79Sandi selinger 19624222ddf1SHong Zhang PetscFunctionBegin; 19634222ddf1SHong Zhang /* Get runtime option */ 19644222ddf1SHong Zhang if (product->api_user) { 1965d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatTransposeMatMult", "Mat"); 19669566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-mattransposematmult_via", "Algorithmic approach", "MatTransposeMatMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 1967d0609cedSBarry Smith PetscOptionsEnd(); 19684222ddf1SHong Zhang } else { 1969d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AtB", "Mat"); 19709566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AtB", algTypes, nalg, algTypes[alg], &alg, &flg)); 1971d0609cedSBarry Smith PetscOptionsEnd(); 19724222ddf1SHong Zhang } 1973*48a46eb9SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 1974d013fc79Sandi selinger 19754222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJ_SeqAIJ; 19764222ddf1SHong Zhang PetscFunctionReturn(0); 19774222ddf1SHong Zhang } 19784222ddf1SHong Zhang 19799371c9d4SSatish Balay static PetscErrorCode MatProductSetFromOptions_SeqAIJ_ABt(Mat C) { 19804222ddf1SHong Zhang Mat_Product *product = C->product; 19814222ddf1SHong Zhang PetscInt alg = 0; /* default algorithm */ 19824222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 19834222ddf1SHong Zhang const char *algTypes[2] = {"default", "color"}; 19844222ddf1SHong Zhang PetscInt nalg = 2; 19854222ddf1SHong Zhang 19864222ddf1SHong Zhang PetscFunctionBegin; 19874222ddf1SHong Zhang /* Set default algorithm */ 19889566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg, "default", &flg)); 19894222ddf1SHong Zhang if (!flg) { 19904222ddf1SHong Zhang alg = 1; 19919566063dSJacob Faibussowitsch PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 19924222ddf1SHong Zhang } 19934222ddf1SHong Zhang 19944222ddf1SHong Zhang /* Get runtime option */ 19954222ddf1SHong Zhang if (product->api_user) { 1996d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat"); 19979566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-matmattransmult_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 1998d0609cedSBarry Smith PetscOptionsEnd(); 19994222ddf1SHong Zhang } else { 2000d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat"); 20019566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg)); 2002d0609cedSBarry Smith PetscOptionsEnd(); 20034222ddf1SHong Zhang } 2004*48a46eb9SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 20054222ddf1SHong Zhang 20064222ddf1SHong Zhang C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ; 20074222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_ABt; 20084222ddf1SHong Zhang PetscFunctionReturn(0); 20094222ddf1SHong Zhang } 20104222ddf1SHong Zhang 20119371c9d4SSatish Balay static PetscErrorCode MatProductSetFromOptions_SeqAIJ_PtAP(Mat C) { 20124222ddf1SHong Zhang Mat_Product *product = C->product; 20134222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 20144222ddf1SHong Zhang PetscInt alg = 0; /* default algorithm -- alg=1 should be default!!! */ 20154222ddf1SHong Zhang #if !defined(PETSC_HAVE_HYPRE) 20164222ddf1SHong Zhang const char *algTypes[2] = {"scalable", "rap"}; 20174222ddf1SHong Zhang PetscInt nalg = 2; 20184222ddf1SHong Zhang #else 20194222ddf1SHong Zhang const char *algTypes[3] = {"scalable", "rap", "hypre"}; 20204222ddf1SHong Zhang PetscInt nalg = 3; 20214222ddf1SHong Zhang #endif 20224222ddf1SHong Zhang 20234222ddf1SHong Zhang PetscFunctionBegin; 20244222ddf1SHong Zhang /* Set default algorithm */ 20259566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2026*48a46eb9SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 20274222ddf1SHong Zhang 20284222ddf1SHong Zhang /* Get runtime option */ 20294222ddf1SHong Zhang if (product->api_user) { 2030d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 20319566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-matptap_via", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[0], &alg, &flg)); 2032d0609cedSBarry Smith PetscOptionsEnd(); 20334222ddf1SHong Zhang } else { 2034d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 20359566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_PtAP", algTypes, nalg, algTypes[0], &alg, &flg)); 2036d0609cedSBarry Smith PetscOptionsEnd(); 20374222ddf1SHong Zhang } 2038*48a46eb9SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 20394222ddf1SHong Zhang 20404222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqAIJ; 20414222ddf1SHong Zhang PetscFunctionReturn(0); 20424222ddf1SHong Zhang } 20434222ddf1SHong Zhang 20449371c9d4SSatish Balay static PetscErrorCode MatProductSetFromOptions_SeqAIJ_RARt(Mat C) { 20454222ddf1SHong Zhang Mat_Product *product = C->product; 20464222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 20474222ddf1SHong Zhang PetscInt alg = 0; /* default algorithm */ 20484222ddf1SHong Zhang const char *algTypes[3] = {"r*a*rt", "r*art", "coloring_rart"}; 20494222ddf1SHong Zhang PetscInt nalg = 3; 20504222ddf1SHong Zhang 20514222ddf1SHong Zhang PetscFunctionBegin; 20524222ddf1SHong Zhang /* Set default algorithm */ 20539566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2054*48a46eb9SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 20554222ddf1SHong Zhang 20564222ddf1SHong Zhang /* Get runtime option */ 20574222ddf1SHong Zhang if (product->api_user) { 2058d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatRARt", "Mat"); 20599566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-matrart_via", "Algorithmic approach", "MatRARt", algTypes, nalg, algTypes[0], &alg, &flg)); 2060d0609cedSBarry Smith PetscOptionsEnd(); 20614222ddf1SHong Zhang } else { 2062d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_RARt", "Mat"); 20639566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_RARt", algTypes, nalg, algTypes[0], &alg, &flg)); 2064d0609cedSBarry Smith PetscOptionsEnd(); 20654222ddf1SHong Zhang } 2066*48a46eb9SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 20674222ddf1SHong Zhang 20684222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_RARt_SeqAIJ_SeqAIJ; 20694222ddf1SHong Zhang PetscFunctionReturn(0); 20704222ddf1SHong Zhang } 20714222ddf1SHong Zhang 20724222ddf1SHong Zhang /* ABC = A*B*C = A*(B*C); ABC's algorithm must be chosen from AB's algorithm */ 20739371c9d4SSatish Balay static PetscErrorCode MatProductSetFromOptions_SeqAIJ_ABC(Mat C) { 20744222ddf1SHong Zhang Mat_Product *product = C->product; 20754222ddf1SHong Zhang PetscInt alg = 0; /* default algorithm */ 20764222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 20774222ddf1SHong Zhang const char *algTypes[7] = {"sorted", "scalable", "scalable_fast", "heap", "btheap", "llcondensed", "rowmerge"}; 20784222ddf1SHong Zhang PetscInt nalg = 7; 20794222ddf1SHong Zhang 20804222ddf1SHong Zhang PetscFunctionBegin; 20814222ddf1SHong Zhang /* Set default algorithm */ 20829566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg, "default", &flg)); 2083*48a46eb9SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 20844222ddf1SHong Zhang 20854222ddf1SHong Zhang /* Get runtime option */ 20864222ddf1SHong Zhang if (product->api_user) { 2087d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMatMult", "Mat"); 20889566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-matmatmatmult_via", "Algorithmic approach", "MatMatMatMult", algTypes, nalg, algTypes[alg], &alg, &flg)); 2089d0609cedSBarry Smith PetscOptionsEnd(); 20904222ddf1SHong Zhang } else { 2091d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABC", "Mat"); 20929566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABC", algTypes, nalg, algTypes[alg], &alg, &flg)); 2093d0609cedSBarry Smith PetscOptionsEnd(); 20944222ddf1SHong Zhang } 2095*48a46eb9SPierre Jolivet if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 20964222ddf1SHong Zhang 20974222ddf1SHong Zhang C->ops->matmatmultsymbolic = MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ; 20984222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_ABC; 20994222ddf1SHong Zhang PetscFunctionReturn(0); 21004222ddf1SHong Zhang } 21014222ddf1SHong Zhang 21029371c9d4SSatish Balay PetscErrorCode MatProductSetFromOptions_SeqAIJ(Mat C) { 21034222ddf1SHong Zhang Mat_Product *product = C->product; 21044222ddf1SHong Zhang 21054222ddf1SHong Zhang PetscFunctionBegin; 21064222ddf1SHong Zhang switch (product->type) { 21079371c9d4SSatish Balay case MATPRODUCT_AB: PetscCall(MatProductSetFromOptions_SeqAIJ_AB(C)); break; 21089371c9d4SSatish Balay case MATPRODUCT_AtB: PetscCall(MatProductSetFromOptions_SeqAIJ_AtB(C)); break; 21099371c9d4SSatish Balay case MATPRODUCT_ABt: PetscCall(MatProductSetFromOptions_SeqAIJ_ABt(C)); break; 21109371c9d4SSatish Balay case MATPRODUCT_PtAP: PetscCall(MatProductSetFromOptions_SeqAIJ_PtAP(C)); break; 21119371c9d4SSatish Balay case MATPRODUCT_RARt: PetscCall(MatProductSetFromOptions_SeqAIJ_RARt(C)); break; 21129371c9d4SSatish Balay case MATPRODUCT_ABC: PetscCall(MatProductSetFromOptions_SeqAIJ_ABC(C)); break; 21139371c9d4SSatish Balay default: break; 21144222ddf1SHong Zhang } 2115d013fc79Sandi selinger PetscFunctionReturn(0); 2116d013fc79Sandi selinger } 2117