1be1d678aSKris Buschelman 2d50806bdSBarry Smith /* 32499ec78SHong Zhang Defines matrix-matrix product routines for pairs of SeqAIJ matrices 4d50806bdSBarry Smith C = A * B 5d50806bdSBarry Smith */ 6d50806bdSBarry Smith 7c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 8c6db04a5SJed Brown #include <../src/mat/utils/freespace.h> 9c6db04a5SJed Brown #include <petscbt.h> 10af0996ceSBarry Smith #include <petsc/private/isimpl.h> 1107475bc1SBarry Smith #include <../src/mat/impls/dense/seq/dense.h> 127bab7c10SHong Zhang 13df97dc6dSFande Kong PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 14df97dc6dSFande Kong { 15df97dc6dSFande Kong PetscFunctionBegin; 16*dbbe0bcdSBarry Smith if (C->ops->matmultnumeric) PetscCall((*C->ops->matmultnumeric)(A,B,C)); 17*dbbe0bcdSBarry Smith else PetscCall(MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted(A,B,C)); 18df97dc6dSFande Kong PetscFunctionReturn(0); 19df97dc6dSFande Kong } 20df97dc6dSFande Kong 214222ddf1SHong Zhang /* Modified from MatCreateSeqAIJWithArrays() */ 22e4e71118SStefano Zampini PETSC_INTERN PetscErrorCode MatSetSeqAIJWithArrays_private(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],MatType mtype,Mat mat) 23df97dc6dSFande Kong { 244222ddf1SHong Zhang PetscInt ii; 254222ddf1SHong Zhang Mat_SeqAIJ *aij; 26cbc6b225SStefano Zampini PetscBool isseqaij, osingle, ofree_a, ofree_ij; 275c66b693SKris Buschelman 285c66b693SKris Buschelman PetscFunctionBegin; 29aed4548fSBarry Smith PetscCheck(m <= 0 || !i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 309566063dSJacob Faibussowitsch PetscCall(MatSetSizes(mat,m,n,m,n)); 314222ddf1SHong Zhang 32e4e71118SStefano Zampini if (!mtype) { 339566063dSJacob Faibussowitsch PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat,MATSEQAIJ,&isseqaij)); 349566063dSJacob Faibussowitsch if (!isseqaij) PetscCall(MatSetType(mat,MATSEQAIJ)); 35e4e71118SStefano Zampini } else { 369566063dSJacob Faibussowitsch PetscCall(MatSetType(mat,mtype)); 37e4e71118SStefano Zampini } 38cbc6b225SStefano Zampini 394222ddf1SHong Zhang aij = (Mat_SeqAIJ*)(mat)->data; 40cbc6b225SStefano Zampini osingle = aij->singlemalloc; 41cbc6b225SStefano Zampini ofree_a = aij->free_a; 42cbc6b225SStefano Zampini ofree_ij = aij->free_ij; 43cbc6b225SStefano Zampini /* changes the free flags */ 449566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(mat,MAT_SKIP_ALLOCATION,NULL)); 45cbc6b225SStefano Zampini 469566063dSJacob Faibussowitsch PetscCall(PetscFree(aij->ilen)); 479566063dSJacob Faibussowitsch PetscCall(PetscFree(aij->imax)); 489566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m,&aij->imax)); 499566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m,&aij->ilen)); 50cbc6b225SStefano Zampini for (ii=0,aij->nonzerorowcnt=0,aij->rmax = 0; ii<m; ii++) { 51cbc6b225SStefano Zampini const PetscInt rnz = i[ii+1] - i[ii]; 52cbc6b225SStefano Zampini aij->nonzerorowcnt += !!rnz; 53cbc6b225SStefano Zampini aij->rmax = PetscMax(aij->rmax,rnz); 54cbc6b225SStefano Zampini aij->ilen[ii] = aij->imax[ii] = i[ii+1] - i[ii]; 55cbc6b225SStefano Zampini } 56cbc6b225SStefano Zampini aij->maxnz = i[m]; 57cbc6b225SStefano Zampini aij->nz = i[m]; 584222ddf1SHong Zhang 59cbc6b225SStefano Zampini if (osingle) { 609566063dSJacob Faibussowitsch PetscCall(PetscFree3(aij->a,aij->j,aij->i)); 61cbc6b225SStefano Zampini } else { 629566063dSJacob Faibussowitsch if (ofree_a) PetscCall(PetscFree(aij->a)); 639566063dSJacob Faibussowitsch if (ofree_ij) PetscCall(PetscFree(aij->j)); 649566063dSJacob Faibussowitsch if (ofree_ij) PetscCall(PetscFree(aij->i)); 65cbc6b225SStefano Zampini } 664222ddf1SHong Zhang aij->i = i; 674222ddf1SHong Zhang aij->j = j; 684222ddf1SHong Zhang aij->a = a; 694222ddf1SHong Zhang aij->nonew = -1; /* this indicates that inserting a new value in the matrix that generates a new nonzero is an error */ 70cbc6b225SStefano Zampini /* default to not retain ownership */ 71cbc6b225SStefano Zampini aij->singlemalloc = PETSC_FALSE; 724222ddf1SHong Zhang aij->free_a = PETSC_FALSE; 734222ddf1SHong Zhang aij->free_ij = PETSC_FALSE; 749566063dSJacob Faibussowitsch PetscCall(MatCheckCompressedRow(mat,aij->nonzerorowcnt,&aij->compressedrow,aij->i,m,0.6)); 755c66b693SKris Buschelman PetscFunctionReturn(0); 765c66b693SKris Buschelman } 771c24bd37SHong Zhang 784222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C) 794222ddf1SHong Zhang { 804222ddf1SHong Zhang Mat_Product *product = C->product; 814222ddf1SHong Zhang MatProductAlgorithm alg; 824222ddf1SHong Zhang PetscBool flg; 834222ddf1SHong Zhang 844222ddf1SHong Zhang PetscFunctionBegin; 854222ddf1SHong Zhang if (product) { 864222ddf1SHong Zhang alg = product->alg; 874222ddf1SHong Zhang } else { 884222ddf1SHong Zhang alg = "sorted"; 894222ddf1SHong Zhang } 904222ddf1SHong Zhang /* sorted */ 919566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(alg,"sorted",&flg)); 924222ddf1SHong Zhang if (flg) { 939566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_Sorted(A,B,fill,C)); 944222ddf1SHong Zhang PetscFunctionReturn(0); 954222ddf1SHong Zhang } 964222ddf1SHong Zhang 974222ddf1SHong Zhang /* scalable */ 989566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(alg,"scalable",&flg)); 994222ddf1SHong Zhang if (flg) { 1009566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(A,B,fill,C)); 1014222ddf1SHong Zhang PetscFunctionReturn(0); 1024222ddf1SHong Zhang } 1034222ddf1SHong Zhang 1044222ddf1SHong Zhang /* scalable_fast */ 1059566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(alg,"scalable_fast",&flg)); 1064222ddf1SHong Zhang if (flg) { 1079566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(A,B,fill,C)); 1084222ddf1SHong Zhang PetscFunctionReturn(0); 1094222ddf1SHong Zhang } 1104222ddf1SHong Zhang 1114222ddf1SHong Zhang /* heap */ 1129566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(alg,"heap",&flg)); 1134222ddf1SHong Zhang if (flg) { 1149566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(A,B,fill,C)); 1154222ddf1SHong Zhang PetscFunctionReturn(0); 1164222ddf1SHong Zhang } 1174222ddf1SHong Zhang 1184222ddf1SHong Zhang /* btheap */ 1199566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(alg,"btheap",&flg)); 1204222ddf1SHong Zhang if (flg) { 1219566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(A,B,fill,C)); 1224222ddf1SHong Zhang PetscFunctionReturn(0); 1234222ddf1SHong Zhang } 1244222ddf1SHong Zhang 1254222ddf1SHong Zhang /* llcondensed */ 1269566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(alg,"llcondensed",&flg)); 1274222ddf1SHong Zhang if (flg) { 1289566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(A,B,fill,C)); 1294222ddf1SHong Zhang PetscFunctionReturn(0); 1304222ddf1SHong Zhang } 1314222ddf1SHong Zhang 1324222ddf1SHong Zhang /* rowmerge */ 1339566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(alg,"rowmerge",&flg)); 1344222ddf1SHong Zhang if (flg) { 1359566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_RowMerge(A,B,fill,C)); 1364222ddf1SHong Zhang PetscFunctionReturn(0); 1374222ddf1SHong Zhang } 1384222ddf1SHong Zhang 1394222ddf1SHong Zhang #if defined(PETSC_HAVE_HYPRE) 1409566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(alg,"hypre",&flg)); 1414222ddf1SHong Zhang if (flg) { 1429566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_AIJ_AIJ_wHYPRE(A,B,fill,C)); 1434222ddf1SHong Zhang PetscFunctionReturn(0); 1444222ddf1SHong Zhang } 1454222ddf1SHong Zhang #endif 1464222ddf1SHong Zhang 1474222ddf1SHong Zhang SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product Algorithm is not supported"); 1484222ddf1SHong Zhang } 1494222ddf1SHong Zhang 1504222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(Mat A,Mat B,PetscReal fill,Mat C) 151b561aa9dSHong Zhang { 152b561aa9dSHong Zhang Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 153971236abSHong Zhang PetscInt *ai=a->i,*bi=b->i,*ci,*cj; 154c1ba5575SJed Brown PetscInt am =A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 155b561aa9dSHong Zhang PetscReal afill; 156eca6b86aSHong Zhang PetscInt i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax; 157eca6b86aSHong Zhang PetscTable ta; 158fb908031SHong Zhang PetscBT lnkbt; 1590298fd71SBarry Smith PetscFreeSpaceList free_space=NULL,current_space=NULL; 160b561aa9dSHong Zhang 161b561aa9dSHong Zhang PetscFunctionBegin; 162bd958071SHong Zhang /* Get ci and cj */ 163bd958071SHong Zhang /*---------------*/ 164fb908031SHong Zhang /* Allocate ci array, arrays for fill computation and */ 165fb908031SHong Zhang /* free space for accumulating nonzero column info */ 1669566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am+2,&ci)); 167fb908031SHong Zhang ci[0] = 0; 168fb908031SHong Zhang 169fb908031SHong Zhang /* create and initialize a linked list */ 1709566063dSJacob Faibussowitsch PetscCall(PetscTableCreate(bn,bn,&ta)); 171c373ccc6SHong Zhang MatRowMergeMax_SeqAIJ(b,bm,ta); 1729566063dSJacob Faibussowitsch PetscCall(PetscTableGetCount(ta,&Crmax)); 1739566063dSJacob Faibussowitsch PetscCall(PetscTableDestroy(&ta)); 174eca6b86aSHong Zhang 1759566063dSJacob Faibussowitsch PetscCall(PetscLLCondensedCreate(Crmax,bn,&lnk,&lnkbt)); 176fb908031SHong Zhang 177fb908031SHong Zhang /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 1789566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space)); 1792205254eSKarl Rupp 180fb908031SHong Zhang current_space = free_space; 181fb908031SHong Zhang 182fb908031SHong Zhang /* Determine ci and cj */ 183fb908031SHong Zhang for (i=0; i<am; i++) { 184fb908031SHong Zhang anzi = ai[i+1] - ai[i]; 185fb908031SHong Zhang aj = a->j + ai[i]; 186fb908031SHong Zhang for (j=0; j<anzi; j++) { 187fb908031SHong Zhang brow = aj[j]; 188fb908031SHong Zhang bnzj = bi[brow+1] - bi[brow]; 189fb908031SHong Zhang bj = b->j + bi[brow]; 190fb908031SHong Zhang /* add non-zero cols of B into the sorted linked list lnk */ 1919566063dSJacob Faibussowitsch PetscCall(PetscLLCondensedAddSorted(bnzj,bj,lnk,lnkbt)); 192fb908031SHong Zhang } 1938c78258cSHong Zhang /* add possible missing diagonal entry */ 1948c78258cSHong Zhang if (C->force_diagonals) { 1959566063dSJacob Faibussowitsch PetscCall(PetscLLCondensedAddSorted(1,&i,lnk,lnkbt)); 1968c78258cSHong Zhang } 197fb908031SHong Zhang cnzi = lnk[0]; 198fb908031SHong Zhang 199fb908031SHong Zhang /* If free space is not available, make more free space */ 200fb908031SHong Zhang /* Double the amount of total space in the list */ 201fb908031SHong Zhang if (current_space->local_remaining<cnzi) { 2029566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space)); 203fb908031SHong Zhang ndouble++; 204fb908031SHong Zhang } 205fb908031SHong Zhang 206fb908031SHong Zhang /* Copy data into free space, then initialize lnk */ 2079566063dSJacob Faibussowitsch PetscCall(PetscLLCondensedClean(bn,cnzi,current_space->array,lnk,lnkbt)); 2082205254eSKarl Rupp 209fb908031SHong Zhang current_space->array += cnzi; 210fb908031SHong Zhang current_space->local_used += cnzi; 211fb908031SHong Zhang current_space->local_remaining -= cnzi; 2122205254eSKarl Rupp 213fb908031SHong Zhang ci[i+1] = ci[i] + cnzi; 214fb908031SHong Zhang } 215fb908031SHong Zhang 216fb908031SHong Zhang /* Column indices are in the list of free space */ 217fb908031SHong Zhang /* Allocate space for cj, initialize cj, and */ 218fb908031SHong Zhang /* destroy list of free space and other temporary array(s) */ 2199566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ci[am]+1,&cj)); 2209566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space,cj)); 2219566063dSJacob Faibussowitsch PetscCall(PetscLLCondensedDestroy(lnk,lnkbt)); 222b561aa9dSHong Zhang 22326be0446SHong Zhang /* put together the new symbolic matrix */ 2249566063dSJacob Faibussowitsch PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C)); 2259566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(C,A,B)); 22658c24d83SHong Zhang 22758c24d83SHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 22858c24d83SHong Zhang /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 2294222ddf1SHong Zhang c = (Mat_SeqAIJ*)(C->data); 230aa1aec99SHong Zhang c->free_a = PETSC_FALSE; 231e6b907acSBarry Smith c->free_ij = PETSC_TRUE; 23258c24d83SHong Zhang c->nonew = 0; 2334222ddf1SHong Zhang 2344222ddf1SHong Zhang /* fast, needs non-scalable O(bn) array 'abdense' */ 2354222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 2360b7e3e3dSHong Zhang 2377212da7cSHong Zhang /* set MatInfo */ 2387212da7cSHong Zhang afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 239f2b054eeSHong Zhang if (afill < 1.0) afill = 1.0; 2404222ddf1SHong Zhang C->info.mallocs = ndouble; 2414222ddf1SHong Zhang C->info.fill_ratio_given = fill; 2424222ddf1SHong Zhang C->info.fill_ratio_needed = afill; 2437212da7cSHong Zhang 2447212da7cSHong Zhang #if defined(PETSC_USE_INFO) 2457212da7cSHong Zhang if (ci[am]) { 2469566063dSJacob Faibussowitsch PetscCall(PetscInfo(C,"Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill)); 2479566063dSJacob Faibussowitsch PetscCall(PetscInfo(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill)); 248f2b054eeSHong Zhang } else { 2499566063dSJacob Faibussowitsch PetscCall(PetscInfo(C,"Empty matrix product\n")); 250be0fcf8dSHong Zhang } 251f2b054eeSHong Zhang #endif 25258c24d83SHong Zhang PetscFunctionReturn(0); 25358c24d83SHong Zhang } 254d50806bdSBarry Smith 255df97dc6dSFande Kong PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted(Mat A,Mat B,Mat C) 256d50806bdSBarry Smith { 257d13dce4bSSatish Balay PetscLogDouble flops=0.0; 258d50806bdSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 259d50806bdSBarry Smith Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 260d50806bdSBarry Smith Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 26138baddfdSBarry Smith PetscInt *ai =a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 262c58ca2e3SHong Zhang PetscInt am =A->rmap->n,cm=C->rmap->n; 263519eb980SHong Zhang PetscInt i,j,k,anzi,bnzi,cnzi,brow; 2642e5835c6SStefano Zampini PetscScalar *ca,valtmp; 265aa1aec99SHong Zhang PetscScalar *ab_dense; 2666718818eSStefano Zampini PetscContainer cab_dense; 2672e5835c6SStefano Zampini const PetscScalar *aa,*ba,*baj; 268d50806bdSBarry Smith 269d50806bdSBarry Smith PetscFunctionBegin; 2709566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(A,&aa)); 2719566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(B,&ba)); 272aa1aec99SHong Zhang if (!c->a) { /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */ 2739566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ci[cm]+1,&ca)); 274aa1aec99SHong Zhang c->a = ca; 275aa1aec99SHong Zhang c->free_a = PETSC_TRUE; 2766718818eSStefano Zampini } else ca = c->a; 2776718818eSStefano Zampini 2786718818eSStefano Zampini /* TODO this should be done in the symbolic phase */ 2796718818eSStefano Zampini /* However, this function is so heavily used (sometimes in an hidden way through multnumeric function pointers 2806718818eSStefano Zampini that is hard to eradicate) */ 2819566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)C,"__PETSc__ab_dense",(PetscObject*)&cab_dense)); 2826718818eSStefano Zampini if (!cab_dense) { 2839566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(B->cmap->N,&ab_dense)); 2849566063dSJacob Faibussowitsch PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&cab_dense)); 2859566063dSJacob Faibussowitsch PetscCall(PetscContainerSetPointer(cab_dense,ab_dense)); 2869566063dSJacob Faibussowitsch PetscCall(PetscContainerSetUserDestroy(cab_dense,PetscContainerUserDestroyDefault)); 2879566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)C,"__PETSc__ab_dense",(PetscObject)cab_dense)); 2889566063dSJacob Faibussowitsch PetscCall(PetscObjectDereference((PetscObject)cab_dense)); 289d90d86d0SMatthew G. Knepley } 2909566063dSJacob Faibussowitsch PetscCall(PetscContainerGetPointer(cab_dense,(void**)&ab_dense)); 2919566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ab_dense,B->cmap->N)); 292aa1aec99SHong Zhang 293c124e916SHong Zhang /* clean old values in C */ 2949566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ca,ci[cm])); 295d50806bdSBarry Smith /* Traverse A row-wise. */ 296d50806bdSBarry Smith /* Build the ith row in C by summing over nonzero columns in A, */ 297d50806bdSBarry Smith /* the rows of B corresponding to nonzeros of A. */ 298d50806bdSBarry Smith for (i=0; i<am; i++) { 299d50806bdSBarry Smith anzi = ai[i+1] - ai[i]; 300d50806bdSBarry Smith for (j=0; j<anzi; j++) { 301519eb980SHong Zhang brow = aj[j]; 302d50806bdSBarry Smith bnzi = bi[brow+1] - bi[brow]; 303d50806bdSBarry Smith bjj = bj + bi[brow]; 304d50806bdSBarry Smith baj = ba + bi[brow]; 305519eb980SHong Zhang /* perform dense axpy */ 30636ec6d2dSHong Zhang valtmp = aa[j]; 307519eb980SHong Zhang for (k=0; k<bnzi; k++) { 30836ec6d2dSHong Zhang ab_dense[bjj[k]] += valtmp*baj[k]; 309519eb980SHong Zhang } 310519eb980SHong Zhang flops += 2*bnzi; 311519eb980SHong Zhang } 312c58ca2e3SHong Zhang aj += anzi; aa += anzi; 313c58ca2e3SHong Zhang 314c58ca2e3SHong Zhang cnzi = ci[i+1] - ci[i]; 315519eb980SHong Zhang for (k=0; k<cnzi; k++) { 316519eb980SHong Zhang ca[k] += ab_dense[cj[k]]; 317519eb980SHong Zhang ab_dense[cj[k]] = 0.0; /* zero ab_dense */ 318519eb980SHong Zhang } 319519eb980SHong Zhang flops += cnzi; 320519eb980SHong Zhang cj += cnzi; ca += cnzi; 321519eb980SHong Zhang } 3222e5835c6SStefano Zampini #if defined(PETSC_HAVE_DEVICE) 3232e5835c6SStefano Zampini if (C->offloadmask != PETSC_OFFLOAD_UNALLOCATED) C->offloadmask = PETSC_OFFLOAD_CPU; 3242e5835c6SStefano Zampini #endif 3259566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 3269566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 3279566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(flops)); 3289566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(A,&aa)); 3299566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(B,&ba)); 330c58ca2e3SHong Zhang PetscFunctionReturn(0); 331c58ca2e3SHong Zhang } 332c58ca2e3SHong Zhang 33325023028SHong Zhang PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,Mat C) 334c58ca2e3SHong Zhang { 335c58ca2e3SHong Zhang PetscLogDouble flops=0.0; 336c58ca2e3SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 337c58ca2e3SHong Zhang Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 338c58ca2e3SHong Zhang Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 339c58ca2e3SHong Zhang PetscInt *ai = a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 340c58ca2e3SHong Zhang PetscInt am = A->rmap->N,cm=C->rmap->N; 341c58ca2e3SHong Zhang PetscInt i,j,k,anzi,bnzi,cnzi,brow; 3422e5835c6SStefano Zampini PetscScalar *ca=c->a,valtmp; 3432e5835c6SStefano Zampini const PetscScalar *aa,*ba,*baj; 344c58ca2e3SHong Zhang PetscInt nextb; 345c58ca2e3SHong Zhang 346c58ca2e3SHong Zhang PetscFunctionBegin; 3479566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(A,&aa)); 3489566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(B,&ba)); 349cf742fccSHong Zhang if (!ca) { /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */ 3509566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ci[cm]+1,&ca)); 351cf742fccSHong Zhang c->a = ca; 352cf742fccSHong Zhang c->free_a = PETSC_TRUE; 353cf742fccSHong Zhang } 354cf742fccSHong Zhang 355c58ca2e3SHong Zhang /* clean old values in C */ 3569566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ca,ci[cm])); 357c58ca2e3SHong Zhang /* Traverse A row-wise. */ 358c58ca2e3SHong Zhang /* Build the ith row in C by summing over nonzero columns in A, */ 359c58ca2e3SHong Zhang /* the rows of B corresponding to nonzeros of A. */ 360519eb980SHong Zhang for (i=0; i<am; i++) { 361519eb980SHong Zhang anzi = ai[i+1] - ai[i]; 362519eb980SHong Zhang cnzi = ci[i+1] - ci[i]; 363519eb980SHong Zhang for (j=0; j<anzi; j++) { 364519eb980SHong Zhang brow = aj[j]; 365519eb980SHong Zhang bnzi = bi[brow+1] - bi[brow]; 366519eb980SHong Zhang bjj = bj + bi[brow]; 367519eb980SHong Zhang baj = ba + bi[brow]; 368519eb980SHong Zhang /* perform sparse axpy */ 36936ec6d2dSHong Zhang valtmp = aa[j]; 370c124e916SHong Zhang nextb = 0; 371c124e916SHong Zhang for (k=0; nextb<bnzi; k++) { 372c124e916SHong Zhang if (cj[k] == bjj[nextb]) { /* ccol == bcol */ 37336ec6d2dSHong Zhang ca[k] += valtmp*baj[nextb++]; 374c124e916SHong Zhang } 375d50806bdSBarry Smith } 376d50806bdSBarry Smith flops += 2*bnzi; 377d50806bdSBarry Smith } 378519eb980SHong Zhang aj += anzi; aa += anzi; 379519eb980SHong Zhang cj += cnzi; ca += cnzi; 380d50806bdSBarry Smith } 3812e5835c6SStefano Zampini #if defined(PETSC_HAVE_DEVICE) 3822e5835c6SStefano Zampini if (C->offloadmask != PETSC_OFFLOAD_UNALLOCATED) C->offloadmask = PETSC_OFFLOAD_CPU; 3832e5835c6SStefano Zampini #endif 3849566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 3859566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 3869566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(flops)); 3879566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(A,&aa)); 3889566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(B,&ba)); 389d50806bdSBarry Smith PetscFunctionReturn(0); 390d50806bdSBarry Smith } 391bc011b1eSHong Zhang 3924222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(Mat A,Mat B,PetscReal fill,Mat C) 39325296bd5SBarry Smith { 39425296bd5SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 39525296bd5SBarry Smith PetscInt *ai = a->i,*bi=b->i,*ci,*cj; 3963c50cad2SHong Zhang PetscInt am = A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 39725296bd5SBarry Smith MatScalar *ca; 39825296bd5SBarry Smith PetscReal afill; 399eca6b86aSHong Zhang PetscInt i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax; 400eca6b86aSHong Zhang PetscTable ta; 4010298fd71SBarry Smith PetscFreeSpaceList free_space=NULL,current_space=NULL; 40225296bd5SBarry Smith 40325296bd5SBarry Smith PetscFunctionBegin; 4043c50cad2SHong Zhang /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_fast() */ 4053c50cad2SHong Zhang /*-----------------------------------------------------------------------------------------*/ 4063c50cad2SHong Zhang /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 4079566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am+2,&ci)); 4083c50cad2SHong Zhang ci[0] = 0; 4093c50cad2SHong Zhang 4103c50cad2SHong Zhang /* create and initialize a linked list */ 4119566063dSJacob Faibussowitsch PetscCall(PetscTableCreate(bn,bn,&ta)); 412c373ccc6SHong Zhang MatRowMergeMax_SeqAIJ(b,bm,ta); 4139566063dSJacob Faibussowitsch PetscCall(PetscTableGetCount(ta,&Crmax)); 4149566063dSJacob Faibussowitsch PetscCall(PetscTableDestroy(&ta)); 415eca6b86aSHong Zhang 4169566063dSJacob Faibussowitsch PetscCall(PetscLLCondensedCreate_fast(Crmax,&lnk)); 4173c50cad2SHong Zhang 4183c50cad2SHong Zhang /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 4199566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space)); 4203c50cad2SHong Zhang current_space = free_space; 4213c50cad2SHong Zhang 4223c50cad2SHong Zhang /* Determine ci and cj */ 4233c50cad2SHong Zhang for (i=0; i<am; i++) { 4243c50cad2SHong Zhang anzi = ai[i+1] - ai[i]; 4253c50cad2SHong Zhang aj = a->j + ai[i]; 4263c50cad2SHong Zhang for (j=0; j<anzi; j++) { 4273c50cad2SHong Zhang brow = aj[j]; 4283c50cad2SHong Zhang bnzj = bi[brow+1] - bi[brow]; 4293c50cad2SHong Zhang bj = b->j + bi[brow]; 4303c50cad2SHong Zhang /* add non-zero cols of B into the sorted linked list lnk */ 4319566063dSJacob Faibussowitsch PetscCall(PetscLLCondensedAddSorted_fast(bnzj,bj,lnk)); 4323c50cad2SHong Zhang } 4338c78258cSHong Zhang /* add possible missing diagonal entry */ 4348c78258cSHong Zhang if (C->force_diagonals) { 4359566063dSJacob Faibussowitsch PetscCall(PetscLLCondensedAddSorted_fast(1,&i,lnk)); 4368c78258cSHong Zhang } 4373c50cad2SHong Zhang cnzi = lnk[1]; 4383c50cad2SHong Zhang 4393c50cad2SHong Zhang /* If free space is not available, make more free space */ 4403c50cad2SHong Zhang /* Double the amount of total space in the list */ 4413c50cad2SHong Zhang if (current_space->local_remaining<cnzi) { 4429566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space)); 4433c50cad2SHong Zhang ndouble++; 4443c50cad2SHong Zhang } 4453c50cad2SHong Zhang 4463c50cad2SHong Zhang /* Copy data into free space, then initialize lnk */ 4479566063dSJacob Faibussowitsch PetscCall(PetscLLCondensedClean_fast(cnzi,current_space->array,lnk)); 4482205254eSKarl Rupp 4493c50cad2SHong Zhang current_space->array += cnzi; 4503c50cad2SHong Zhang current_space->local_used += cnzi; 4513c50cad2SHong Zhang current_space->local_remaining -= cnzi; 4522205254eSKarl Rupp 4533c50cad2SHong Zhang ci[i+1] = ci[i] + cnzi; 4543c50cad2SHong Zhang } 4553c50cad2SHong Zhang 4563c50cad2SHong Zhang /* Column indices are in the list of free space */ 4573c50cad2SHong Zhang /* Allocate space for cj, initialize cj, and */ 4583c50cad2SHong Zhang /* destroy list of free space and other temporary array(s) */ 4599566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ci[am]+1,&cj)); 4609566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space,cj)); 4619566063dSJacob Faibussowitsch PetscCall(PetscLLCondensedDestroy_fast(lnk)); 46225296bd5SBarry Smith 46325296bd5SBarry Smith /* Allocate space for ca */ 4649566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(ci[am]+1,&ca)); 46525296bd5SBarry Smith 46625296bd5SBarry Smith /* put together the new symbolic matrix */ 4679566063dSJacob Faibussowitsch PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,ca,((PetscObject)A)->type_name,C)); 4689566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(C,A,B)); 46925296bd5SBarry Smith 47025296bd5SBarry Smith /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 47125296bd5SBarry Smith /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 4724222ddf1SHong Zhang c = (Mat_SeqAIJ*)(C->data); 47325296bd5SBarry Smith c->free_a = PETSC_TRUE; 47425296bd5SBarry Smith c->free_ij = PETSC_TRUE; 47525296bd5SBarry Smith c->nonew = 0; 4762205254eSKarl Rupp 4774222ddf1SHong Zhang /* slower, less memory */ 4784222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; 47925296bd5SBarry Smith 48025296bd5SBarry Smith /* set MatInfo */ 48125296bd5SBarry Smith afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 48225296bd5SBarry Smith if (afill < 1.0) afill = 1.0; 4834222ddf1SHong Zhang C->info.mallocs = ndouble; 4844222ddf1SHong Zhang C->info.fill_ratio_given = fill; 4854222ddf1SHong Zhang C->info.fill_ratio_needed = afill; 48625296bd5SBarry Smith 48725296bd5SBarry Smith #if defined(PETSC_USE_INFO) 48825296bd5SBarry Smith if (ci[am]) { 4899566063dSJacob Faibussowitsch PetscCall(PetscInfo(C,"Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill)); 4909566063dSJacob Faibussowitsch PetscCall(PetscInfo(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill)); 49125296bd5SBarry Smith } else { 4929566063dSJacob Faibussowitsch PetscCall(PetscInfo(C,"Empty matrix product\n")); 49325296bd5SBarry Smith } 49425296bd5SBarry Smith #endif 49525296bd5SBarry Smith PetscFunctionReturn(0); 49625296bd5SBarry Smith } 49725296bd5SBarry Smith 4984222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,PetscReal fill,Mat C) 499e9e4536cSHong Zhang { 500e9e4536cSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 501bf9555e6SHong Zhang PetscInt *ai = a->i,*bi=b->i,*ci,*cj; 50225c41797SHong Zhang PetscInt am = A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 503e9e4536cSHong Zhang MatScalar *ca; 504e9e4536cSHong Zhang PetscReal afill; 505eca6b86aSHong Zhang PetscInt i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax; 506eca6b86aSHong Zhang PetscTable ta; 5070298fd71SBarry Smith PetscFreeSpaceList free_space=NULL,current_space=NULL; 508e9e4536cSHong Zhang 509e9e4536cSHong Zhang PetscFunctionBegin; 510bd958071SHong Zhang /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_Scalalbe() */ 511bd958071SHong Zhang /*---------------------------------------------------------------------------------------------*/ 512bd958071SHong Zhang /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 5139566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am+2,&ci)); 514bd958071SHong Zhang ci[0] = 0; 515bd958071SHong Zhang 516bd958071SHong Zhang /* create and initialize a linked list */ 5179566063dSJacob Faibussowitsch PetscCall(PetscTableCreate(bn,bn,&ta)); 518c373ccc6SHong Zhang MatRowMergeMax_SeqAIJ(b,bm,ta); 5199566063dSJacob Faibussowitsch PetscCall(PetscTableGetCount(ta,&Crmax)); 5209566063dSJacob Faibussowitsch PetscCall(PetscTableDestroy(&ta)); 5219566063dSJacob Faibussowitsch PetscCall(PetscLLCondensedCreate_Scalable(Crmax,&lnk)); 522bd958071SHong Zhang 523bd958071SHong Zhang /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 5249566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space)); 525bd958071SHong Zhang current_space = free_space; 526bd958071SHong Zhang 527bd958071SHong Zhang /* Determine ci and cj */ 528bd958071SHong Zhang for (i=0; i<am; i++) { 529bd958071SHong Zhang anzi = ai[i+1] - ai[i]; 530bd958071SHong Zhang aj = a->j + ai[i]; 531bd958071SHong Zhang for (j=0; j<anzi; j++) { 532bd958071SHong Zhang brow = aj[j]; 533bd958071SHong Zhang bnzj = bi[brow+1] - bi[brow]; 534bd958071SHong Zhang bj = b->j + bi[brow]; 535bd958071SHong Zhang /* add non-zero cols of B into the sorted linked list lnk */ 5369566063dSJacob Faibussowitsch PetscCall(PetscLLCondensedAddSorted_Scalable(bnzj,bj,lnk)); 537bd958071SHong Zhang } 5388c78258cSHong Zhang /* add possible missing diagonal entry */ 5398c78258cSHong Zhang if (C->force_diagonals) { 5409566063dSJacob Faibussowitsch PetscCall(PetscLLCondensedAddSorted_Scalable(1,&i,lnk)); 5418c78258cSHong Zhang } 5428c78258cSHong Zhang 543bd958071SHong Zhang cnzi = lnk[0]; 544bd958071SHong Zhang 545bd958071SHong Zhang /* If free space is not available, make more free space */ 546bd958071SHong Zhang /* Double the amount of total space in the list */ 547bd958071SHong Zhang if (current_space->local_remaining<cnzi) { 5489566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space)); 549bd958071SHong Zhang ndouble++; 550bd958071SHong Zhang } 551bd958071SHong Zhang 552bd958071SHong Zhang /* Copy data into free space, then initialize lnk */ 5539566063dSJacob Faibussowitsch PetscCall(PetscLLCondensedClean_Scalable(cnzi,current_space->array,lnk)); 5542205254eSKarl Rupp 555bd958071SHong Zhang current_space->array += cnzi; 556bd958071SHong Zhang current_space->local_used += cnzi; 557bd958071SHong Zhang current_space->local_remaining -= cnzi; 5582205254eSKarl Rupp 559bd958071SHong Zhang ci[i+1] = ci[i] + cnzi; 560bd958071SHong Zhang } 561bd958071SHong Zhang 562bd958071SHong Zhang /* Column indices are in the list of free space */ 563bd958071SHong Zhang /* Allocate space for cj, initialize cj, and */ 564bd958071SHong Zhang /* destroy list of free space and other temporary array(s) */ 5659566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ci[am]+1,&cj)); 5669566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space,cj)); 5679566063dSJacob Faibussowitsch PetscCall(PetscLLCondensedDestroy_Scalable(lnk)); 568e9e4536cSHong Zhang 569e9e4536cSHong Zhang /* Allocate space for ca */ 570bd958071SHong Zhang /*-----------------------*/ 5719566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(ci[am]+1,&ca)); 572e9e4536cSHong Zhang 573e9e4536cSHong Zhang /* put together the new symbolic matrix */ 5749566063dSJacob Faibussowitsch PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,ca,((PetscObject)A)->type_name,C)); 5759566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(C,A,B)); 576e9e4536cSHong Zhang 577e9e4536cSHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 578e9e4536cSHong Zhang /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 5794222ddf1SHong Zhang c = (Mat_SeqAIJ*)(C->data); 580e9e4536cSHong Zhang c->free_a = PETSC_TRUE; 581e9e4536cSHong Zhang c->free_ij = PETSC_TRUE; 582e9e4536cSHong Zhang c->nonew = 0; 5832205254eSKarl Rupp 5844222ddf1SHong Zhang /* slower, less memory */ 5854222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; 586e9e4536cSHong Zhang 587e9e4536cSHong Zhang /* set MatInfo */ 588e9e4536cSHong Zhang afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 589e9e4536cSHong Zhang if (afill < 1.0) afill = 1.0; 5904222ddf1SHong Zhang C->info.mallocs = ndouble; 5914222ddf1SHong Zhang C->info.fill_ratio_given = fill; 5924222ddf1SHong Zhang C->info.fill_ratio_needed = afill; 593e9e4536cSHong Zhang 594e9e4536cSHong Zhang #if defined(PETSC_USE_INFO) 595e9e4536cSHong Zhang if (ci[am]) { 5969566063dSJacob Faibussowitsch PetscCall(PetscInfo(C,"Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill)); 5979566063dSJacob Faibussowitsch PetscCall(PetscInfo(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill)); 598e9e4536cSHong Zhang } else { 5999566063dSJacob Faibussowitsch PetscCall(PetscInfo(C,"Empty matrix product\n")); 600e9e4536cSHong Zhang } 601e9e4536cSHong Zhang #endif 602e9e4536cSHong Zhang PetscFunctionReturn(0); 603e9e4536cSHong Zhang } 604e9e4536cSHong Zhang 6054222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(Mat A,Mat B,PetscReal fill,Mat C) 6060ced3a2bSJed Brown { 6070ced3a2bSJed Brown Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 6080ced3a2bSJed Brown const PetscInt *ai=a->i,*bi=b->i,*aj=a->j,*bj=b->j; 6090ced3a2bSJed Brown PetscInt *ci,*cj,*bb; 6100ced3a2bSJed Brown PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 6110ced3a2bSJed Brown PetscReal afill; 6120ced3a2bSJed Brown PetscInt i,j,col,ndouble = 0; 6130298fd71SBarry Smith PetscFreeSpaceList free_space=NULL,current_space=NULL; 6140ced3a2bSJed Brown PetscHeap h; 6150ced3a2bSJed Brown 6160ced3a2bSJed Brown PetscFunctionBegin; 617cd093f1dSJed Brown /* Get ci and cj - by merging sorted rows using a heap */ 6180ced3a2bSJed Brown /*---------------------------------------------------------------------------------------------*/ 6190ced3a2bSJed Brown /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 6209566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am+2,&ci)); 6210ced3a2bSJed Brown ci[0] = 0; 6220ced3a2bSJed Brown 6230ced3a2bSJed Brown /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 6249566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space)); 6250ced3a2bSJed Brown current_space = free_space; 6260ced3a2bSJed Brown 6279566063dSJacob Faibussowitsch PetscCall(PetscHeapCreate(a->rmax,&h)); 6289566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(a->rmax,&bb)); 6290ced3a2bSJed Brown 6300ced3a2bSJed Brown /* Determine ci and cj */ 6310ced3a2bSJed Brown for (i=0; i<am; i++) { 6320ced3a2bSJed Brown const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 6330ced3a2bSJed Brown const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 6340ced3a2bSJed Brown ci[i+1] = ci[i]; 6350ced3a2bSJed Brown /* Populate the min heap */ 6360ced3a2bSJed Brown for (j=0; j<anzi; j++) { 6370ced3a2bSJed Brown bb[j] = bi[acol[j]]; /* bb points at the start of the row */ 6380ced3a2bSJed Brown if (bb[j] < bi[acol[j]+1]) { /* Add if row is nonempty */ 6399566063dSJacob Faibussowitsch PetscCall(PetscHeapAdd(h,j,bj[bb[j]++])); 6400ced3a2bSJed Brown } 6410ced3a2bSJed Brown } 6420ced3a2bSJed Brown /* Pick off the min element, adding it to free space */ 6439566063dSJacob Faibussowitsch PetscCall(PetscHeapPop(h,&j,&col)); 6440ced3a2bSJed Brown while (j >= 0) { 6450ced3a2bSJed Brown if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */ 6469566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscMin(PetscIntMultTruncate(2,current_space->total_array_size),16 << 20),¤t_space)); 6470ced3a2bSJed Brown ndouble++; 6480ced3a2bSJed Brown } 6490ced3a2bSJed Brown *(current_space->array++) = col; 6500ced3a2bSJed Brown current_space->local_used++; 6510ced3a2bSJed Brown current_space->local_remaining--; 6520ced3a2bSJed Brown ci[i+1]++; 6530ced3a2bSJed Brown 6540ced3a2bSJed Brown /* stash if anything else remains in this row of B */ 6559566063dSJacob Faibussowitsch if (bb[j] < bi[acol[j]+1]) PetscCall(PetscHeapStash(h,j,bj[bb[j]++])); 6560ced3a2bSJed Brown while (1) { /* pop and stash any other rows of B that also had an entry in this column */ 6570ced3a2bSJed Brown PetscInt j2,col2; 6589566063dSJacob Faibussowitsch PetscCall(PetscHeapPeek(h,&j2,&col2)); 6590ced3a2bSJed Brown if (col2 != col) break; 6609566063dSJacob Faibussowitsch PetscCall(PetscHeapPop(h,&j2,&col2)); 6619566063dSJacob Faibussowitsch if (bb[j2] < bi[acol[j2]+1]) PetscCall(PetscHeapStash(h,j2,bj[bb[j2]++])); 6620ced3a2bSJed Brown } 6630ced3a2bSJed Brown /* Put any stashed elements back into the min heap */ 6649566063dSJacob Faibussowitsch PetscCall(PetscHeapUnstash(h)); 6659566063dSJacob Faibussowitsch PetscCall(PetscHeapPop(h,&j,&col)); 6660ced3a2bSJed Brown } 6670ced3a2bSJed Brown } 6689566063dSJacob Faibussowitsch PetscCall(PetscFree(bb)); 6699566063dSJacob Faibussowitsch PetscCall(PetscHeapDestroy(&h)); 6700ced3a2bSJed Brown 6710ced3a2bSJed Brown /* Column indices are in the list of free space */ 6720ced3a2bSJed Brown /* Allocate space for cj, initialize cj, and */ 6730ced3a2bSJed Brown /* destroy list of free space and other temporary array(s) */ 6749566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ci[am],&cj)); 6759566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space,cj)); 6760ced3a2bSJed Brown 6770ced3a2bSJed Brown /* put together the new symbolic matrix */ 6789566063dSJacob Faibussowitsch PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C)); 6799566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(C,A,B)); 6800ced3a2bSJed Brown 6810ced3a2bSJed Brown /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6820ced3a2bSJed Brown /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 6834222ddf1SHong Zhang c = (Mat_SeqAIJ*)(C->data); 6840ced3a2bSJed Brown c->free_a = PETSC_TRUE; 6850ced3a2bSJed Brown c->free_ij = PETSC_TRUE; 6860ced3a2bSJed Brown c->nonew = 0; 68726fbe8dcSKarl Rupp 6884222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 6890ced3a2bSJed Brown 6900ced3a2bSJed Brown /* set MatInfo */ 6910ced3a2bSJed Brown afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 6920ced3a2bSJed Brown if (afill < 1.0) afill = 1.0; 6934222ddf1SHong Zhang C->info.mallocs = ndouble; 6944222ddf1SHong Zhang C->info.fill_ratio_given = fill; 6954222ddf1SHong Zhang C->info.fill_ratio_needed = afill; 6960ced3a2bSJed Brown 6970ced3a2bSJed Brown #if defined(PETSC_USE_INFO) 6980ced3a2bSJed Brown if (ci[am]) { 6999566063dSJacob Faibussowitsch PetscCall(PetscInfo(C,"Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill)); 7009566063dSJacob Faibussowitsch PetscCall(PetscInfo(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill)); 7010ced3a2bSJed Brown } else { 7029566063dSJacob Faibussowitsch PetscCall(PetscInfo(C,"Empty matrix product\n")); 7030ced3a2bSJed Brown } 7040ced3a2bSJed Brown #endif 7050ced3a2bSJed Brown PetscFunctionReturn(0); 7060ced3a2bSJed Brown } 707e9e4536cSHong Zhang 7084222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(Mat A,Mat B,PetscReal fill,Mat C) 7098a07c6f1SJed Brown { 7108a07c6f1SJed Brown Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 7118a07c6f1SJed Brown const PetscInt *ai = a->i,*bi=b->i,*aj=a->j,*bj=b->j; 7128a07c6f1SJed Brown PetscInt *ci,*cj,*bb; 7138a07c6f1SJed Brown PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 7148a07c6f1SJed Brown PetscReal afill; 7158a07c6f1SJed Brown PetscInt i,j,col,ndouble = 0; 7160298fd71SBarry Smith PetscFreeSpaceList free_space=NULL,current_space=NULL; 7178a07c6f1SJed Brown PetscHeap h; 7188a07c6f1SJed Brown PetscBT bt; 7198a07c6f1SJed Brown 7208a07c6f1SJed Brown PetscFunctionBegin; 721cd093f1dSJed Brown /* Get ci and cj - using a heap for the sorted rows, but use BT so that each index is only added once */ 7228a07c6f1SJed Brown /*---------------------------------------------------------------------------------------------*/ 7238a07c6f1SJed Brown /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 7249566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am+2,&ci)); 7258a07c6f1SJed Brown ci[0] = 0; 7268a07c6f1SJed Brown 7278a07c6f1SJed Brown /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 7289566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space)); 7292205254eSKarl Rupp 7308a07c6f1SJed Brown current_space = free_space; 7318a07c6f1SJed Brown 7329566063dSJacob Faibussowitsch PetscCall(PetscHeapCreate(a->rmax,&h)); 7339566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(a->rmax,&bb)); 7349566063dSJacob Faibussowitsch PetscCall(PetscBTCreate(bn,&bt)); 7358a07c6f1SJed Brown 7368a07c6f1SJed Brown /* Determine ci and cj */ 7378a07c6f1SJed Brown for (i=0; i<am; i++) { 7388a07c6f1SJed Brown const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 7398a07c6f1SJed Brown const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 7408a07c6f1SJed Brown const PetscInt *fptr = current_space->array; /* Save beginning of the row so we can clear the BT later */ 7418a07c6f1SJed Brown ci[i+1] = ci[i]; 7428a07c6f1SJed Brown /* Populate the min heap */ 7438a07c6f1SJed Brown for (j=0; j<anzi; j++) { 7448a07c6f1SJed Brown PetscInt brow = acol[j]; 7458a07c6f1SJed Brown for (bb[j] = bi[brow]; bb[j] < bi[brow+1]; bb[j]++) { 7468a07c6f1SJed Brown PetscInt bcol = bj[bb[j]]; 7478a07c6f1SJed Brown if (!PetscBTLookupSet(bt,bcol)) { /* new entry */ 7489566063dSJacob Faibussowitsch PetscCall(PetscHeapAdd(h,j,bcol)); 7498a07c6f1SJed Brown bb[j]++; 7508a07c6f1SJed Brown break; 7518a07c6f1SJed Brown } 7528a07c6f1SJed Brown } 7538a07c6f1SJed Brown } 7548a07c6f1SJed Brown /* Pick off the min element, adding it to free space */ 7559566063dSJacob Faibussowitsch PetscCall(PetscHeapPop(h,&j,&col)); 7568a07c6f1SJed Brown while (j >= 0) { 7578a07c6f1SJed Brown if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */ 7580298fd71SBarry Smith fptr = NULL; /* need PetscBTMemzero */ 7599566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscMin(PetscIntMultTruncate(2,current_space->total_array_size),16 << 20),¤t_space)); 7608a07c6f1SJed Brown ndouble++; 7618a07c6f1SJed Brown } 7628a07c6f1SJed Brown *(current_space->array++) = col; 7638a07c6f1SJed Brown current_space->local_used++; 7648a07c6f1SJed Brown current_space->local_remaining--; 7658a07c6f1SJed Brown ci[i+1]++; 7668a07c6f1SJed Brown 7678a07c6f1SJed Brown /* stash if anything else remains in this row of B */ 7688a07c6f1SJed Brown for (; bb[j] < bi[acol[j]+1]; bb[j]++) { 7698a07c6f1SJed Brown PetscInt bcol = bj[bb[j]]; 7708a07c6f1SJed Brown if (!PetscBTLookupSet(bt,bcol)) { /* new entry */ 7719566063dSJacob Faibussowitsch PetscCall(PetscHeapAdd(h,j,bcol)); 7728a07c6f1SJed Brown bb[j]++; 7738a07c6f1SJed Brown break; 7748a07c6f1SJed Brown } 7758a07c6f1SJed Brown } 7769566063dSJacob Faibussowitsch PetscCall(PetscHeapPop(h,&j,&col)); 7778a07c6f1SJed Brown } 7788a07c6f1SJed Brown if (fptr) { /* Clear the bits for this row */ 7799566063dSJacob Faibussowitsch for (; fptr<current_space->array; fptr++) PetscCall(PetscBTClear(bt,*fptr)); 7808a07c6f1SJed Brown } else { /* We reallocated so we don't remember (easily) how to clear only the bits we changed */ 7819566063dSJacob Faibussowitsch PetscCall(PetscBTMemzero(bn,bt)); 7828a07c6f1SJed Brown } 7838a07c6f1SJed Brown } 7849566063dSJacob Faibussowitsch PetscCall(PetscFree(bb)); 7859566063dSJacob Faibussowitsch PetscCall(PetscHeapDestroy(&h)); 7869566063dSJacob Faibussowitsch PetscCall(PetscBTDestroy(&bt)); 7878a07c6f1SJed Brown 7888a07c6f1SJed Brown /* Column indices are in the list of free space */ 7898a07c6f1SJed Brown /* Allocate space for cj, initialize cj, and */ 7908a07c6f1SJed Brown /* destroy list of free space and other temporary array(s) */ 7919566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ci[am],&cj)); 7929566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space,cj)); 7938a07c6f1SJed Brown 7948a07c6f1SJed Brown /* put together the new symbolic matrix */ 7959566063dSJacob Faibussowitsch PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C)); 7969566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(C,A,B)); 7978a07c6f1SJed Brown 7988a07c6f1SJed Brown /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 7998a07c6f1SJed Brown /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 8004222ddf1SHong Zhang c = (Mat_SeqAIJ*)(C->data); 8018a07c6f1SJed Brown c->free_a = PETSC_TRUE; 8028a07c6f1SJed Brown c->free_ij = PETSC_TRUE; 8038a07c6f1SJed Brown c->nonew = 0; 80426fbe8dcSKarl Rupp 8054222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 8068a07c6f1SJed Brown 8078a07c6f1SJed Brown /* set MatInfo */ 8088a07c6f1SJed Brown afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 8098a07c6f1SJed Brown if (afill < 1.0) afill = 1.0; 8104222ddf1SHong Zhang C->info.mallocs = ndouble; 8114222ddf1SHong Zhang C->info.fill_ratio_given = fill; 8124222ddf1SHong Zhang C->info.fill_ratio_needed = afill; 8138a07c6f1SJed Brown 8148a07c6f1SJed Brown #if defined(PETSC_USE_INFO) 8158a07c6f1SJed Brown if (ci[am]) { 8169566063dSJacob Faibussowitsch PetscCall(PetscInfo(C,"Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill)); 8179566063dSJacob Faibussowitsch PetscCall(PetscInfo(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill)); 8188a07c6f1SJed Brown } else { 8199566063dSJacob Faibussowitsch PetscCall(PetscInfo(C,"Empty matrix product\n")); 8208a07c6f1SJed Brown } 8218a07c6f1SJed Brown #endif 8228a07c6f1SJed Brown PetscFunctionReturn(0); 8238a07c6f1SJed Brown } 8248a07c6f1SJed Brown 8254222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_RowMerge(Mat A,Mat B,PetscReal fill,Mat C) 826d7ed1a4dSandi selinger { 827d7ed1a4dSandi selinger Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 828d7ed1a4dSandi selinger const PetscInt *ai=a->i,*bi=b->i,*aj=a->j,*bj=b->j,*inputi,*inputj,*inputcol,*inputcol_L1; 829d7ed1a4dSandi selinger PetscInt *ci,*cj,*outputj,worki_L1[9],worki_L2[9]; 830d7ed1a4dSandi selinger PetscInt c_maxmem,a_maxrownnz=0,a_rownnz; 831d7ed1a4dSandi selinger const PetscInt workcol[8]={0,1,2,3,4,5,6,7}; 832d7ed1a4dSandi selinger const PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 833d7ed1a4dSandi selinger const PetscInt *brow_ptr[8],*brow_end[8]; 834d7ed1a4dSandi selinger PetscInt window[8]; 835d7ed1a4dSandi selinger PetscInt window_min,old_window_min,ci_nnz,outputi_nnz=0,L1_nrows,L2_nrows; 836d7ed1a4dSandi selinger PetscInt i,k,ndouble=0,L1_rowsleft,rowsleft; 837d7ed1a4dSandi selinger PetscReal afill; 838f83700f2Sandi selinger PetscInt *workj_L1,*workj_L2,*workj_L3; 8397660c4dbSandi selinger PetscInt L1_nnz,L2_nnz; 840d7ed1a4dSandi selinger 841d7ed1a4dSandi selinger /* Step 1: Get upper bound on memory required for allocation. 842d7ed1a4dSandi selinger Because of the way virtual memory works, 843d7ed1a4dSandi selinger only the memory pages that are actually needed will be physically allocated. */ 844d7ed1a4dSandi selinger PetscFunctionBegin; 8459566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am+1,&ci)); 846d7ed1a4dSandi selinger for (i=0; i<am; i++) { 847d7ed1a4dSandi selinger const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 848d7ed1a4dSandi selinger const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 849d7ed1a4dSandi selinger a_rownnz = 0; 850d7ed1a4dSandi selinger for (k=0; k<anzi; ++k) { 851d7ed1a4dSandi selinger a_rownnz += bi[acol[k]+1] - bi[acol[k]]; 852d7ed1a4dSandi selinger if (a_rownnz > bn) { 853d7ed1a4dSandi selinger a_rownnz = bn; 854d7ed1a4dSandi selinger break; 855d7ed1a4dSandi selinger } 856d7ed1a4dSandi selinger } 857d7ed1a4dSandi selinger a_maxrownnz = PetscMax(a_maxrownnz, a_rownnz); 858d7ed1a4dSandi selinger } 859d7ed1a4dSandi selinger /* temporary work areas for merging rows */ 8609566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(a_maxrownnz*8,&workj_L1)); 8619566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(a_maxrownnz*8,&workj_L2)); 8629566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(a_maxrownnz,&workj_L3)); 863d7ed1a4dSandi selinger 8647660c4dbSandi selinger /* This should be enough for almost all matrices. If not, memory is reallocated later. */ 8657660c4dbSandi selinger c_maxmem = 8*(ai[am]+bi[bm]); 866d7ed1a4dSandi selinger /* Step 2: Populate pattern for C */ 8679566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(c_maxmem,&cj)); 868d7ed1a4dSandi selinger 869d7ed1a4dSandi selinger ci_nnz = 0; 870d7ed1a4dSandi selinger ci[0] = 0; 871d7ed1a4dSandi selinger worki_L1[0] = 0; 872d7ed1a4dSandi selinger worki_L2[0] = 0; 873d7ed1a4dSandi selinger for (i=0; i<am; i++) { 874d7ed1a4dSandi selinger const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 875d7ed1a4dSandi selinger const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 876d7ed1a4dSandi selinger rowsleft = anzi; 877d7ed1a4dSandi selinger inputcol_L1 = acol; 878d7ed1a4dSandi selinger L2_nnz = 0; 8797660c4dbSandi selinger L2_nrows = 1; /* Number of rows to be merged on Level 3. output of L3 already exists -> initial value 1 */ 8807660c4dbSandi selinger worki_L2[1] = 0; 88108fe1b3cSKarl Rupp outputi_nnz = 0; 882d7ed1a4dSandi selinger 883d7ed1a4dSandi selinger /* If the number of indices in C so far + the max number of columns in the next row > c_maxmem -> allocate more memory */ 884d7ed1a4dSandi selinger while (ci_nnz+a_maxrownnz > c_maxmem) { 885d7ed1a4dSandi selinger c_maxmem *= 2; 886d7ed1a4dSandi selinger ndouble++; 8879566063dSJacob Faibussowitsch PetscCall(PetscRealloc(sizeof(PetscInt)*c_maxmem,&cj)); 888d7ed1a4dSandi selinger } 889d7ed1a4dSandi selinger 890d7ed1a4dSandi selinger while (rowsleft) { 891d7ed1a4dSandi selinger L1_rowsleft = PetscMin(64, rowsleft); /* In the inner loop max 64 rows of B can be merged */ 892d7ed1a4dSandi selinger L1_nrows = 0; 8937660c4dbSandi selinger L1_nnz = 0; 894d7ed1a4dSandi selinger inputcol = inputcol_L1; 895d7ed1a4dSandi selinger inputi = bi; 896d7ed1a4dSandi selinger inputj = bj; 897d7ed1a4dSandi selinger 898d7ed1a4dSandi selinger /* The following macro is used to specialize for small rows in A. 899d7ed1a4dSandi selinger This helps with compiler unrolling, improving performance substantially. 900f83700f2Sandi selinger Input: inputj inputi inputcol bn 901d7ed1a4dSandi selinger Output: outputj outputi_nnz */ 902d7ed1a4dSandi selinger #define MatMatMultSymbolic_RowMergeMacro(ANNZ) \ 903d7ed1a4dSandi selinger window_min = bn; \ 9047660c4dbSandi selinger outputi_nnz = 0; \ 9057660c4dbSandi selinger for (k=0; k<ANNZ; ++k) { \ 906d7ed1a4dSandi selinger brow_ptr[k] = inputj + inputi[inputcol[k]]; \ 907d7ed1a4dSandi selinger brow_end[k] = inputj + inputi[inputcol[k]+1]; \ 908d7ed1a4dSandi selinger window[k] = (brow_ptr[k] != brow_end[k]) ? *brow_ptr[k] : bn; \ 909d7ed1a4dSandi selinger window_min = PetscMin(window[k], window_min); \ 910d7ed1a4dSandi selinger } \ 911d7ed1a4dSandi selinger while (window_min < bn) { \ 912d7ed1a4dSandi selinger outputj[outputi_nnz++] = window_min; \ 913d7ed1a4dSandi selinger /* advance front and compute new minimum */ \ 914d7ed1a4dSandi selinger old_window_min = window_min; \ 915d7ed1a4dSandi selinger window_min = bn; \ 916d7ed1a4dSandi selinger for (k=0; k<ANNZ; ++k) { \ 917d7ed1a4dSandi selinger if (window[k] == old_window_min) { \ 918d7ed1a4dSandi selinger brow_ptr[k]++; \ 919d7ed1a4dSandi selinger window[k] = (brow_ptr[k] != brow_end[k]) ? *brow_ptr[k] : bn; \ 920d7ed1a4dSandi selinger } \ 921d7ed1a4dSandi selinger window_min = PetscMin(window[k], window_min); \ 922d7ed1a4dSandi selinger } \ 923d7ed1a4dSandi selinger } 924d7ed1a4dSandi selinger 925d7ed1a4dSandi selinger /************** L E V E L 1 ***************/ 926d7ed1a4dSandi selinger /* Merge up to 8 rows of B to L1 work array*/ 927d7ed1a4dSandi selinger while (L1_rowsleft) { 9287660c4dbSandi selinger outputi_nnz = 0; 9297660c4dbSandi selinger if (anzi > 8) outputj = workj_L1 + L1_nnz; /* Level 1 rowmerge*/ 9307660c4dbSandi selinger else outputj = cj + ci_nnz; /* Merge directly to C */ 9317660c4dbSandi selinger 932d7ed1a4dSandi selinger switch (L1_rowsleft) { 933d7ed1a4dSandi selinger case 1: brow_ptr[0] = inputj + inputi[inputcol[0]]; 934d7ed1a4dSandi selinger brow_end[0] = inputj + inputi[inputcol[0]+1]; 935d7ed1a4dSandi selinger for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */ 936d7ed1a4dSandi selinger inputcol += L1_rowsleft; 937d7ed1a4dSandi selinger rowsleft -= L1_rowsleft; 938d7ed1a4dSandi selinger L1_rowsleft = 0; 939d7ed1a4dSandi selinger break; 940d7ed1a4dSandi selinger case 2: MatMatMultSymbolic_RowMergeMacro(2); 941d7ed1a4dSandi selinger inputcol += L1_rowsleft; 942d7ed1a4dSandi selinger rowsleft -= L1_rowsleft; 943d7ed1a4dSandi selinger L1_rowsleft = 0; 944d7ed1a4dSandi selinger break; 945d7ed1a4dSandi selinger case 3: MatMatMultSymbolic_RowMergeMacro(3); 946d7ed1a4dSandi selinger inputcol += L1_rowsleft; 947d7ed1a4dSandi selinger rowsleft -= L1_rowsleft; 948d7ed1a4dSandi selinger L1_rowsleft = 0; 949d7ed1a4dSandi selinger break; 950d7ed1a4dSandi selinger case 4: MatMatMultSymbolic_RowMergeMacro(4); 951d7ed1a4dSandi selinger inputcol += L1_rowsleft; 952d7ed1a4dSandi selinger rowsleft -= L1_rowsleft; 953d7ed1a4dSandi selinger L1_rowsleft = 0; 954d7ed1a4dSandi selinger break; 955d7ed1a4dSandi selinger case 5: MatMatMultSymbolic_RowMergeMacro(5); 956d7ed1a4dSandi selinger inputcol += L1_rowsleft; 957d7ed1a4dSandi selinger rowsleft -= L1_rowsleft; 958d7ed1a4dSandi selinger L1_rowsleft = 0; 959d7ed1a4dSandi selinger break; 960d7ed1a4dSandi selinger case 6: MatMatMultSymbolic_RowMergeMacro(6); 961d7ed1a4dSandi selinger inputcol += L1_rowsleft; 962d7ed1a4dSandi selinger rowsleft -= L1_rowsleft; 963d7ed1a4dSandi selinger L1_rowsleft = 0; 964d7ed1a4dSandi selinger break; 965d7ed1a4dSandi selinger case 7: MatMatMultSymbolic_RowMergeMacro(7); 966d7ed1a4dSandi selinger inputcol += L1_rowsleft; 967d7ed1a4dSandi selinger rowsleft -= L1_rowsleft; 968d7ed1a4dSandi selinger L1_rowsleft = 0; 969d7ed1a4dSandi selinger break; 970d7ed1a4dSandi selinger default: MatMatMultSymbolic_RowMergeMacro(8); 971d7ed1a4dSandi selinger inputcol += 8; 972d7ed1a4dSandi selinger rowsleft -= 8; 973d7ed1a4dSandi selinger L1_rowsleft -= 8; 974d7ed1a4dSandi selinger break; 975d7ed1a4dSandi selinger } 976d7ed1a4dSandi selinger inputcol_L1 = inputcol; 9777660c4dbSandi selinger L1_nnz += outputi_nnz; 9787660c4dbSandi selinger worki_L1[++L1_nrows] = L1_nnz; 979d7ed1a4dSandi selinger } 980d7ed1a4dSandi selinger 981d7ed1a4dSandi selinger /********************** L E V E L 2 ************************/ 982d7ed1a4dSandi selinger /* Merge from L1 work array to either C or to L2 work array */ 983d7ed1a4dSandi selinger if (anzi > 8) { 984d7ed1a4dSandi selinger inputi = worki_L1; 985d7ed1a4dSandi selinger inputj = workj_L1; 986d7ed1a4dSandi selinger inputcol = workcol; 987d7ed1a4dSandi selinger outputi_nnz = 0; 988d7ed1a4dSandi selinger 989d7ed1a4dSandi selinger if (anzi <= 64) outputj = cj + ci_nnz; /* Merge from L1 work array to C */ 990d7ed1a4dSandi selinger else outputj = workj_L2 + L2_nnz; /* Merge from L1 work array to L2 work array */ 991d7ed1a4dSandi selinger 992d7ed1a4dSandi selinger switch (L1_nrows) { 993d7ed1a4dSandi selinger case 1: brow_ptr[0] = inputj + inputi[inputcol[0]]; 994d7ed1a4dSandi selinger brow_end[0] = inputj + inputi[inputcol[0]+1]; 995d7ed1a4dSandi selinger for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */ 996d7ed1a4dSandi selinger break; 997d7ed1a4dSandi selinger case 2: MatMatMultSymbolic_RowMergeMacro(2); break; 998d7ed1a4dSandi selinger case 3: MatMatMultSymbolic_RowMergeMacro(3); break; 999d7ed1a4dSandi selinger case 4: MatMatMultSymbolic_RowMergeMacro(4); break; 1000d7ed1a4dSandi selinger case 5: MatMatMultSymbolic_RowMergeMacro(5); break; 1001d7ed1a4dSandi selinger case 6: MatMatMultSymbolic_RowMergeMacro(6); break; 1002d7ed1a4dSandi selinger case 7: MatMatMultSymbolic_RowMergeMacro(7); break; 1003d7ed1a4dSandi selinger case 8: MatMatMultSymbolic_RowMergeMacro(8); break; 1004d7ed1a4dSandi selinger default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatMult logic error: Not merging 1-8 rows from L1 work array!"); 1005d7ed1a4dSandi selinger } 1006d7ed1a4dSandi selinger L2_nnz += outputi_nnz; 1007d7ed1a4dSandi selinger worki_L2[++L2_nrows] = L2_nnz; 1008d7ed1a4dSandi selinger 10097660c4dbSandi selinger /************************ L E V E L 3 **********************/ 10107660c4dbSandi selinger /* Merge from L2 work array to either C or to L2 work array */ 1011d7ed1a4dSandi selinger if (anzi > 64 && (L2_nrows == 8 || rowsleft == 0)) { 1012d7ed1a4dSandi selinger inputi = worki_L2; 1013d7ed1a4dSandi selinger inputj = workj_L2; 1014d7ed1a4dSandi selinger inputcol = workcol; 1015d7ed1a4dSandi selinger outputi_nnz = 0; 1016f83700f2Sandi selinger if (rowsleft) outputj = workj_L3; 1017d7ed1a4dSandi selinger else outputj = cj + ci_nnz; 1018d7ed1a4dSandi selinger switch (L2_nrows) { 1019d7ed1a4dSandi selinger case 1: brow_ptr[0] = inputj + inputi[inputcol[0]]; 1020d7ed1a4dSandi selinger brow_end[0] = inputj + inputi[inputcol[0]+1]; 1021d7ed1a4dSandi selinger for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */ 1022d7ed1a4dSandi selinger break; 1023d7ed1a4dSandi selinger case 2: MatMatMultSymbolic_RowMergeMacro(2); break; 1024d7ed1a4dSandi selinger case 3: MatMatMultSymbolic_RowMergeMacro(3); break; 1025d7ed1a4dSandi selinger case 4: MatMatMultSymbolic_RowMergeMacro(4); break; 1026d7ed1a4dSandi selinger case 5: MatMatMultSymbolic_RowMergeMacro(5); break; 1027d7ed1a4dSandi selinger case 6: MatMatMultSymbolic_RowMergeMacro(6); break; 1028d7ed1a4dSandi selinger case 7: MatMatMultSymbolic_RowMergeMacro(7); break; 1029d7ed1a4dSandi selinger case 8: MatMatMultSymbolic_RowMergeMacro(8); break; 1030d7ed1a4dSandi selinger default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatMult logic error: Not merging 1-8 rows from L2 work array!"); 1031d7ed1a4dSandi selinger } 1032d7ed1a4dSandi selinger L2_nrows = 1; 10337660c4dbSandi selinger L2_nnz = outputi_nnz; 10347660c4dbSandi selinger worki_L2[1] = outputi_nnz; 10357660c4dbSandi selinger /* Copy to workj_L2 */ 1036d7ed1a4dSandi selinger if (rowsleft) { 10377660c4dbSandi selinger for (k=0; k<outputi_nnz; ++k) workj_L2[k] = outputj[k]; 1038d7ed1a4dSandi selinger } 1039d7ed1a4dSandi selinger } 1040d7ed1a4dSandi selinger } 1041d7ed1a4dSandi selinger } /* while (rowsleft) */ 1042d7ed1a4dSandi selinger #undef MatMatMultSymbolic_RowMergeMacro 1043d7ed1a4dSandi selinger 1044d7ed1a4dSandi selinger /* terminate current row */ 1045d7ed1a4dSandi selinger ci_nnz += outputi_nnz; 1046d7ed1a4dSandi selinger ci[i+1] = ci_nnz; 1047d7ed1a4dSandi selinger } 1048d7ed1a4dSandi selinger 1049d7ed1a4dSandi selinger /* Step 3: Create the new symbolic matrix */ 10509566063dSJacob Faibussowitsch PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C)); 10519566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(C,A,B)); 1052d7ed1a4dSandi selinger 1053d7ed1a4dSandi selinger /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1054d7ed1a4dSandi selinger /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 10554222ddf1SHong Zhang c = (Mat_SeqAIJ*)(C->data); 1056d7ed1a4dSandi selinger c->free_a = PETSC_TRUE; 1057d7ed1a4dSandi selinger c->free_ij = PETSC_TRUE; 1058d7ed1a4dSandi selinger c->nonew = 0; 1059d7ed1a4dSandi selinger 10604222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 1061d7ed1a4dSandi selinger 1062d7ed1a4dSandi selinger /* set MatInfo */ 1063d7ed1a4dSandi selinger afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 1064d7ed1a4dSandi selinger if (afill < 1.0) afill = 1.0; 10654222ddf1SHong Zhang C->info.mallocs = ndouble; 10664222ddf1SHong Zhang C->info.fill_ratio_given = fill; 10674222ddf1SHong Zhang C->info.fill_ratio_needed = afill; 1068d7ed1a4dSandi selinger 1069d7ed1a4dSandi selinger #if defined(PETSC_USE_INFO) 1070d7ed1a4dSandi selinger if (ci[am]) { 10719566063dSJacob Faibussowitsch PetscCall(PetscInfo(C,"Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill)); 10729566063dSJacob Faibussowitsch PetscCall(PetscInfo(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill)); 1073d7ed1a4dSandi selinger } else { 10749566063dSJacob Faibussowitsch PetscCall(PetscInfo(C,"Empty matrix product\n")); 1075d7ed1a4dSandi selinger } 1076d7ed1a4dSandi selinger #endif 1077d7ed1a4dSandi selinger 1078d7ed1a4dSandi selinger /* Step 4: Free temporary work areas */ 10799566063dSJacob Faibussowitsch PetscCall(PetscFree(workj_L1)); 10809566063dSJacob Faibussowitsch PetscCall(PetscFree(workj_L2)); 10819566063dSJacob Faibussowitsch PetscCall(PetscFree(workj_L3)); 1082d7ed1a4dSandi selinger PetscFunctionReturn(0); 1083d7ed1a4dSandi selinger } 1084d7ed1a4dSandi selinger 1085cd093f1dSJed Brown /* concatenate unique entries and then sort */ 10864222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Sorted(Mat A,Mat B,PetscReal fill,Mat C) 1087cd093f1dSJed Brown { 1088cd093f1dSJed Brown Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 1089cd093f1dSJed Brown const PetscInt *ai = a->i,*bi=b->i,*aj=a->j,*bj=b->j; 10908c78258cSHong Zhang PetscInt *ci,*cj,bcol; 1091cd093f1dSJed Brown PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 1092cd093f1dSJed Brown PetscReal afill; 1093cd093f1dSJed Brown PetscInt i,j,ndouble = 0; 1094cd093f1dSJed Brown PetscSegBuffer seg,segrow; 1095cd093f1dSJed Brown char *seen; 1096cd093f1dSJed Brown 1097cd093f1dSJed Brown PetscFunctionBegin; 10989566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am+1,&ci)); 1099cd093f1dSJed Brown ci[0] = 0; 1100cd093f1dSJed Brown 1101cd093f1dSJed Brown /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 11029566063dSJacob Faibussowitsch PetscCall(PetscSegBufferCreate(sizeof(PetscInt),(PetscInt)(fill*(ai[am]+bi[bm])),&seg)); 11039566063dSJacob Faibussowitsch PetscCall(PetscSegBufferCreate(sizeof(PetscInt),100,&segrow)); 11049566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(bn,&seen)); 1105cd093f1dSJed Brown 1106cd093f1dSJed Brown /* Determine ci and cj */ 1107cd093f1dSJed Brown for (i=0; i<am; i++) { 1108cd093f1dSJed Brown const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 1109cd093f1dSJed Brown const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 1110cd093f1dSJed Brown PetscInt packlen = 0,*PETSC_RESTRICT crow; 11118c78258cSHong Zhang 1112cd093f1dSJed Brown /* Pack segrow */ 1113cd093f1dSJed Brown for (j=0; j<anzi; j++) { 1114cd093f1dSJed Brown PetscInt brow = acol[j],bjstart = bi[brow],bjend = bi[brow+1],k; 1115cd093f1dSJed Brown for (k=bjstart; k<bjend; k++) { 11168c78258cSHong Zhang bcol = bj[k]; 1117cd093f1dSJed Brown if (!seen[bcol]) { /* new entry */ 1118cd093f1dSJed Brown PetscInt *PETSC_RESTRICT slot; 11199566063dSJacob Faibussowitsch PetscCall(PetscSegBufferGetInts(segrow,1,&slot)); 1120cd093f1dSJed Brown *slot = bcol; 1121cd093f1dSJed Brown seen[bcol] = 1; 1122cd093f1dSJed Brown packlen++; 1123cd093f1dSJed Brown } 1124cd093f1dSJed Brown } 1125cd093f1dSJed Brown } 11268c78258cSHong Zhang 11278c78258cSHong Zhang /* Check i-th diagonal entry */ 11288c78258cSHong Zhang if (C->force_diagonals && !seen[i]) { 11298c78258cSHong Zhang PetscInt *PETSC_RESTRICT slot; 11309566063dSJacob Faibussowitsch PetscCall(PetscSegBufferGetInts(segrow,1,&slot)); 11318c78258cSHong Zhang *slot = i; 11328c78258cSHong Zhang seen[i] = 1; 11338c78258cSHong Zhang packlen++; 11348c78258cSHong Zhang } 11358c78258cSHong Zhang 11369566063dSJacob Faibussowitsch PetscCall(PetscSegBufferGetInts(seg,packlen,&crow)); 11379566063dSJacob Faibussowitsch PetscCall(PetscSegBufferExtractTo(segrow,crow)); 11389566063dSJacob Faibussowitsch PetscCall(PetscSortInt(packlen,crow)); 1139cd093f1dSJed Brown ci[i+1] = ci[i] + packlen; 1140cd093f1dSJed Brown for (j=0; j<packlen; j++) seen[crow[j]] = 0; 1141cd093f1dSJed Brown } 11429566063dSJacob Faibussowitsch PetscCall(PetscSegBufferDestroy(&segrow)); 11439566063dSJacob Faibussowitsch PetscCall(PetscFree(seen)); 1144cd093f1dSJed Brown 1145cd093f1dSJed Brown /* Column indices are in the segmented buffer */ 11469566063dSJacob Faibussowitsch PetscCall(PetscSegBufferExtractAlloc(seg,&cj)); 11479566063dSJacob Faibussowitsch PetscCall(PetscSegBufferDestroy(&seg)); 1148cd093f1dSJed Brown 1149cd093f1dSJed Brown /* put together the new symbolic matrix */ 11509566063dSJacob Faibussowitsch PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C)); 11519566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(C,A,B)); 1152cd093f1dSJed Brown 1153cd093f1dSJed Brown /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1154cd093f1dSJed Brown /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 11554222ddf1SHong Zhang c = (Mat_SeqAIJ*)(C->data); 1156cd093f1dSJed Brown c->free_a = PETSC_TRUE; 1157cd093f1dSJed Brown c->free_ij = PETSC_TRUE; 1158cd093f1dSJed Brown c->nonew = 0; 1159cd093f1dSJed Brown 11604222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 1161cd093f1dSJed Brown 1162cd093f1dSJed Brown /* set MatInfo */ 11632a09556fSStefano Zampini afill = (PetscReal)ci[am]/PetscMax(ai[am]+bi[bm],1) + 1.e-5; 1164cd093f1dSJed Brown if (afill < 1.0) afill = 1.0; 11654222ddf1SHong Zhang C->info.mallocs = ndouble; 11664222ddf1SHong Zhang C->info.fill_ratio_given = fill; 11674222ddf1SHong Zhang C->info.fill_ratio_needed = afill; 1168cd093f1dSJed Brown 1169cd093f1dSJed Brown #if defined(PETSC_USE_INFO) 1170cd093f1dSJed Brown if (ci[am]) { 11719566063dSJacob Faibussowitsch PetscCall(PetscInfo(C,"Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill)); 11729566063dSJacob Faibussowitsch PetscCall(PetscInfo(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill)); 1173cd093f1dSJed Brown } else { 11749566063dSJacob Faibussowitsch PetscCall(PetscInfo(C,"Empty matrix product\n")); 1175cd093f1dSJed Brown } 1176cd093f1dSJed Brown #endif 1177cd093f1dSJed Brown PetscFunctionReturn(0); 1178cd093f1dSJed Brown } 1179cd093f1dSJed Brown 11806718818eSStefano Zampini PetscErrorCode MatDestroy_SeqAIJ_MatMatMultTrans(void *data) 11812128a86cSHong Zhang { 11826718818eSStefano Zampini Mat_MatMatTransMult *abt=(Mat_MatMatTransMult *)data; 11832128a86cSHong Zhang 11842128a86cSHong Zhang PetscFunctionBegin; 11859566063dSJacob Faibussowitsch PetscCall(MatTransposeColoringDestroy(&abt->matcoloring)); 11869566063dSJacob Faibussowitsch PetscCall(MatDestroy(&abt->Bt_den)); 11879566063dSJacob Faibussowitsch PetscCall(MatDestroy(&abt->ABt_den)); 11889566063dSJacob Faibussowitsch PetscCall(PetscFree(abt)); 11892128a86cSHong Zhang PetscFunctionReturn(0); 11902128a86cSHong Zhang } 11912128a86cSHong Zhang 11924222ddf1SHong Zhang PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C) 1193bc011b1eSHong Zhang { 119481d82fe4SHong Zhang Mat Bt; 119540192850SHong Zhang Mat_MatMatTransMult *abt; 11964222ddf1SHong Zhang Mat_Product *product = C->product; 11976718818eSStefano Zampini char *alg; 1198d2853540SHong Zhang 119981d82fe4SHong Zhang PetscFunctionBegin; 120028b400f6SJacob Faibussowitsch PetscCheck(product,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing product struct"); 120128b400f6SJacob Faibussowitsch PetscCheck(!product->data,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Extra product struct not empty"); 12026718818eSStefano Zampini 120381d82fe4SHong Zhang /* create symbolic Bt */ 12047fb60732SBarry Smith PetscCall(MatTransposeSymbolic(B,&Bt)); 12059566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizes(Bt,PetscAbs(A->cmap->bs),PetscAbs(B->cmap->bs))); 12069566063dSJacob Faibussowitsch PetscCall(MatSetType(Bt,((PetscObject)A)->type_name)); 120781d82fe4SHong Zhang 120881d82fe4SHong Zhang /* get symbolic C=A*Bt */ 12099566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(product->alg,&alg)); 12109566063dSJacob Faibussowitsch PetscCall(MatProductSetAlgorithm(C,"sorted")); /* set algorithm for C = A*Bt */ 12119566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,Bt,fill,C)); 12129566063dSJacob Faibussowitsch PetscCall(MatProductSetAlgorithm(C,alg)); /* resume original algorithm for ABt product */ 12139566063dSJacob Faibussowitsch PetscCall(PetscFree(alg)); 121481d82fe4SHong Zhang 1215a5b23f4aSJose E. Roman /* create a supporting struct for reuse intermediate dense matrices with matcoloring */ 12169566063dSJacob Faibussowitsch PetscCall(PetscNew(&abt)); 12172128a86cSHong Zhang 12186718818eSStefano Zampini product->data = abt; 12196718818eSStefano Zampini product->destroy = MatDestroy_SeqAIJ_MatMatMultTrans; 12206718818eSStefano Zampini 12214222ddf1SHong Zhang C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ; 12222128a86cSHong Zhang 12234222ddf1SHong Zhang abt->usecoloring = PETSC_FALSE; 12249566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg,"color",&abt->usecoloring)); 122540192850SHong Zhang if (abt->usecoloring) { 1226b9af6bddSHong Zhang /* Create MatTransposeColoring from symbolic C=A*B^T */ 1227b9af6bddSHong Zhang MatTransposeColoring matcoloring; 1228335efc43SPeter Brune MatColoring coloring; 12292128a86cSHong Zhang ISColoring iscoloring; 12302128a86cSHong Zhang Mat Bt_dense,C_dense; 1231e8354b3eSHong Zhang 12324222ddf1SHong Zhang /* inode causes memory problem */ 12339566063dSJacob Faibussowitsch PetscCall(MatSetOption(C,MAT_USE_INODES,PETSC_FALSE)); 12344222ddf1SHong Zhang 12359566063dSJacob Faibussowitsch PetscCall(MatColoringCreate(C,&coloring)); 12369566063dSJacob Faibussowitsch PetscCall(MatColoringSetDistance(coloring,2)); 12379566063dSJacob Faibussowitsch PetscCall(MatColoringSetType(coloring,MATCOLORINGSL)); 12389566063dSJacob Faibussowitsch PetscCall(MatColoringSetFromOptions(coloring)); 12399566063dSJacob Faibussowitsch PetscCall(MatColoringApply(coloring,&iscoloring)); 12409566063dSJacob Faibussowitsch PetscCall(MatColoringDestroy(&coloring)); 12419566063dSJacob Faibussowitsch PetscCall(MatTransposeColoringCreate(C,iscoloring,&matcoloring)); 12422205254eSKarl Rupp 124340192850SHong Zhang abt->matcoloring = matcoloring; 12442205254eSKarl Rupp 12459566063dSJacob Faibussowitsch PetscCall(ISColoringDestroy(&iscoloring)); 12462128a86cSHong Zhang 12472128a86cSHong Zhang /* Create Bt_dense and C_dense = A*Bt_dense */ 12489566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF,&Bt_dense)); 12499566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors)); 12509566063dSJacob Faibussowitsch PetscCall(MatSetType(Bt_dense,MATSEQDENSE)); 12519566063dSJacob Faibussowitsch PetscCall(MatSeqDenseSetPreallocation(Bt_dense,NULL)); 12522205254eSKarl Rupp 12532128a86cSHong Zhang Bt_dense->assembled = PETSC_TRUE; 125440192850SHong Zhang abt->Bt_den = Bt_dense; 12552128a86cSHong Zhang 12569566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF,&C_dense)); 12579566063dSJacob Faibussowitsch PetscCall(MatSetSizes(C_dense,A->rmap->n,matcoloring->ncolors,A->rmap->n,matcoloring->ncolors)); 12589566063dSJacob Faibussowitsch PetscCall(MatSetType(C_dense,MATSEQDENSE)); 12599566063dSJacob Faibussowitsch PetscCall(MatSeqDenseSetPreallocation(C_dense,NULL)); 12602205254eSKarl Rupp 12612128a86cSHong Zhang Bt_dense->assembled = PETSC_TRUE; 126240192850SHong Zhang abt->ABt_den = C_dense; 1263f94ccd6cSHong Zhang 1264f94ccd6cSHong Zhang #if defined(PETSC_USE_INFO) 12651ce71dffSSatish Balay { 12664222ddf1SHong Zhang Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 12679566063dSJacob Faibussowitsch PetscCall(PetscInfo(C,"Use coloring of C=A*B^T; B^T: %" PetscInt_FMT " %" PetscInt_FMT ", Bt_dense: %" PetscInt_FMT ",%" PetscInt_FMT "; Cnz %" PetscInt_FMT " / (cm*ncolors %" PetscInt_FMT ") = %g\n",B->cmap->n,B->rmap->n,Bt_dense->rmap->n,Bt_dense->cmap->n,c->nz,A->rmap->n*matcoloring->ncolors,(double)(((PetscReal)(c->nz))/((PetscReal)(A->rmap->n*matcoloring->ncolors))))); 12681ce71dffSSatish Balay } 1269f94ccd6cSHong Zhang #endif 12702128a86cSHong Zhang } 1271e99be685SHong Zhang /* clean up */ 12729566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Bt)); 12735df89d91SHong Zhang PetscFunctionReturn(0); 12745df89d91SHong Zhang } 12755df89d91SHong Zhang 12766fc122caSHong Zhang PetscErrorCode MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 12775df89d91SHong Zhang { 12785df89d91SHong Zhang Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 1279e2cac8caSJed Brown PetscInt *ai =a->i,*aj=a->j,*bi=b->i,*bj=b->j,anzi,bnzj,nexta,nextb,*acol,*bcol,brow; 128081d82fe4SHong Zhang PetscInt cm =C->rmap->n,*ci=c->i,*cj=c->j,i,j,cnzi,*ccol; 12815df89d91SHong Zhang PetscLogDouble flops=0.0; 1282aa1aec99SHong Zhang MatScalar *aa =a->a,*aval,*ba=b->a,*bval,*ca,*cval; 12836718818eSStefano Zampini Mat_MatMatTransMult *abt; 12846718818eSStefano Zampini Mat_Product *product = C->product; 12855df89d91SHong Zhang 12865df89d91SHong Zhang PetscFunctionBegin; 128728b400f6SJacob Faibussowitsch PetscCheck(product,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing product struct"); 12886718818eSStefano Zampini abt = (Mat_MatMatTransMult *)product->data; 128928b400f6SJacob Faibussowitsch PetscCheck(abt,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing product struct"); 129058ed3ceaSHong Zhang /* clear old values in C */ 129158ed3ceaSHong Zhang if (!c->a) { 12929566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(ci[cm]+1,&ca)); 129358ed3ceaSHong Zhang c->a = ca; 129458ed3ceaSHong Zhang c->free_a = PETSC_TRUE; 129558ed3ceaSHong Zhang } else { 129658ed3ceaSHong Zhang ca = c->a; 12979566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ca,ci[cm]+1)); 129858ed3ceaSHong Zhang } 129958ed3ceaSHong Zhang 130040192850SHong Zhang if (abt->usecoloring) { 130140192850SHong Zhang MatTransposeColoring matcoloring = abt->matcoloring; 130240192850SHong Zhang Mat Bt_dense,C_dense = abt->ABt_den; 1303c8db22f6SHong Zhang 1304b9af6bddSHong Zhang /* Get Bt_dense by Apply MatTransposeColoring to B */ 130540192850SHong Zhang Bt_dense = abt->Bt_den; 13069566063dSJacob Faibussowitsch PetscCall(MatTransColoringApplySpToDen(matcoloring,B,Bt_dense)); 1307c8db22f6SHong Zhang 1308c8db22f6SHong Zhang /* C_dense = A*Bt_dense */ 13099566063dSJacob Faibussowitsch PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(A,Bt_dense,C_dense)); 1310c8db22f6SHong Zhang 13112128a86cSHong Zhang /* Recover C from C_dense */ 13129566063dSJacob Faibussowitsch PetscCall(MatTransColoringApplyDenToSp(matcoloring,C_dense,C)); 1313c8db22f6SHong Zhang PetscFunctionReturn(0); 1314c8db22f6SHong Zhang } 1315c8db22f6SHong Zhang 13161fa1209cSHong Zhang for (i=0; i<cm; i++) { 131781d82fe4SHong Zhang anzi = ai[i+1] - ai[i]; 131881d82fe4SHong Zhang acol = aj + ai[i]; 13196973769bSHong Zhang aval = aa + ai[i]; 13201fa1209cSHong Zhang cnzi = ci[i+1] - ci[i]; 13211fa1209cSHong Zhang ccol = cj + ci[i]; 13226973769bSHong Zhang cval = ca + ci[i]; 13231fa1209cSHong Zhang for (j=0; j<cnzi; j++) { 132481d82fe4SHong Zhang brow = ccol[j]; 132581d82fe4SHong Zhang bnzj = bi[brow+1] - bi[brow]; 132681d82fe4SHong Zhang bcol = bj + bi[brow]; 13276973769bSHong Zhang bval = ba + bi[brow]; 13286973769bSHong Zhang 132981d82fe4SHong Zhang /* perform sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */ 133081d82fe4SHong Zhang nexta = 0; nextb = 0; 133181d82fe4SHong Zhang while (nexta<anzi && nextb<bnzj) { 13327b6d5e96SMark Adams while (nexta < anzi && acol[nexta] < bcol[nextb]) nexta++; 133381d82fe4SHong Zhang if (nexta == anzi) break; 13347b6d5e96SMark Adams while (nextb < bnzj && acol[nexta] > bcol[nextb]) nextb++; 133581d82fe4SHong Zhang if (nextb == bnzj) break; 133681d82fe4SHong Zhang if (acol[nexta] == bcol[nextb]) { 13376973769bSHong Zhang cval[j] += aval[nexta]*bval[nextb]; 133881d82fe4SHong Zhang nexta++; nextb++; 133981d82fe4SHong Zhang flops += 2; 13401fa1209cSHong Zhang } 13411fa1209cSHong Zhang } 134281d82fe4SHong Zhang } 134381d82fe4SHong Zhang } 13449566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 13459566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 13469566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(flops)); 13475df89d91SHong Zhang PetscFunctionReturn(0); 13485df89d91SHong Zhang } 13495df89d91SHong Zhang 13506718818eSStefano Zampini PetscErrorCode MatDestroy_SeqAIJ_MatTransMatMult(void *data) 13516d373c3eSHong Zhang { 13526718818eSStefano Zampini Mat_MatTransMatMult *atb = (Mat_MatTransMatMult*)data; 13536d373c3eSHong Zhang 13546d373c3eSHong Zhang PetscFunctionBegin; 13559566063dSJacob Faibussowitsch PetscCall(MatDestroy(&atb->At)); 13561baa6e33SBarry Smith if (atb->destroy) PetscCall((*atb->destroy)(atb->data)); 13579566063dSJacob Faibussowitsch PetscCall(PetscFree(atb)); 13586d373c3eSHong Zhang PetscFunctionReturn(0); 13596d373c3eSHong Zhang } 13606d373c3eSHong Zhang 13614222ddf1SHong Zhang PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C) 13625df89d91SHong Zhang { 1363089a957eSStefano Zampini Mat At = NULL; 13644222ddf1SHong Zhang Mat_Product *product = C->product; 1365089a957eSStefano Zampini PetscBool flg,def,square; 1366bc011b1eSHong Zhang 1367bc011b1eSHong Zhang PetscFunctionBegin; 1368089a957eSStefano Zampini MatCheckProduct(C,4); 1369b94d7dedSBarry Smith square = (PetscBool)(A == B && A->symmetric == PETSC_BOOL3_TRUE); 13704222ddf1SHong Zhang /* outerproduct */ 13719566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg,"outerproduct",&flg)); 13724222ddf1SHong Zhang if (flg) { 1373bc011b1eSHong Zhang /* create symbolic At */ 1374089a957eSStefano Zampini if (!square) { 13757fb60732SBarry Smith PetscCall(MatTransposeSymbolic(A,&At)); 13769566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizes(At,PetscAbs(A->cmap->bs),PetscAbs(B->cmap->bs))); 13779566063dSJacob Faibussowitsch PetscCall(MatSetType(At,((PetscObject)A)->type_name)); 1378089a957eSStefano Zampini } 1379bc011b1eSHong Zhang /* get symbolic C=At*B */ 13809566063dSJacob Faibussowitsch PetscCall(MatProductSetAlgorithm(C,"sorted")); 13819566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(square ? A : At,B,fill,C)); 1382bc011b1eSHong Zhang 1383bc011b1eSHong Zhang /* clean up */ 1384089a957eSStefano Zampini if (!square) { 13859566063dSJacob Faibussowitsch PetscCall(MatDestroy(&At)); 1386089a957eSStefano Zampini } 13876d373c3eSHong Zhang 13884222ddf1SHong Zhang C->ops->mattransposemultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; /* outerproduct */ 13899566063dSJacob Faibussowitsch PetscCall(MatProductSetAlgorithm(C,"outerproduct")); 13904222ddf1SHong Zhang PetscFunctionReturn(0); 13914222ddf1SHong Zhang } 13924222ddf1SHong Zhang 13934222ddf1SHong Zhang /* matmatmult */ 13949566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg,"default",&def)); 13959566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg,"at*b",&flg)); 1396089a957eSStefano Zampini if (flg || def) { 13974222ddf1SHong Zhang Mat_MatTransMatMult *atb; 13984222ddf1SHong Zhang 139928b400f6SJacob Faibussowitsch PetscCheck(!product->data,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Extra product struct not empty"); 14009566063dSJacob Faibussowitsch PetscCall(PetscNew(&atb)); 1401089a957eSStefano Zampini if (!square) { 1402acd337a6SBarry Smith PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&At)); 1403089a957eSStefano Zampini } 14049566063dSJacob Faibussowitsch PetscCall(MatProductSetAlgorithm(C,"sorted")); 14059566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(square ? A : At,B,fill,C)); 14069566063dSJacob Faibussowitsch PetscCall(MatProductSetAlgorithm(C,"at*b")); 14076718818eSStefano Zampini product->data = atb; 14086718818eSStefano Zampini product->destroy = MatDestroy_SeqAIJ_MatTransMatMult; 14094222ddf1SHong Zhang atb->At = At; 14104222ddf1SHong Zhang 14114222ddf1SHong Zhang C->ops->mattransposemultnumeric = NULL; /* see MatProductNumeric_AtB_SeqAIJ_SeqAIJ */ 14124222ddf1SHong Zhang PetscFunctionReturn(0); 14134222ddf1SHong Zhang } 14144222ddf1SHong Zhang 14154222ddf1SHong Zhang SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product Algorithm is not supported"); 1416bc011b1eSHong Zhang } 1417bc011b1eSHong Zhang 141875648e8dSHong Zhang PetscErrorCode MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 1419bc011b1eSHong Zhang { 14200fbc74f4SHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 1421d0f46423SBarry Smith PetscInt am=A->rmap->n,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb; 1422d0f46423SBarry Smith PetscInt cm=C->rmap->n,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k; 1423d13dce4bSSatish Balay PetscLogDouble flops=0.0; 1424aa1aec99SHong Zhang MatScalar *aa=a->a,*ba,*ca,*caj; 1425bc011b1eSHong Zhang 1426bc011b1eSHong Zhang PetscFunctionBegin; 1427aa1aec99SHong Zhang if (!c->a) { 14289566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(ci[cm]+1,&ca)); 14292205254eSKarl Rupp 1430aa1aec99SHong Zhang c->a = ca; 1431aa1aec99SHong Zhang c->free_a = PETSC_TRUE; 1432aa1aec99SHong Zhang } else { 1433aa1aec99SHong Zhang ca = c->a; 14349566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ca,ci[cm])); 1435aa1aec99SHong Zhang } 1436bc011b1eSHong Zhang 1437bc011b1eSHong Zhang /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */ 1438bc011b1eSHong Zhang for (i=0; i<am; i++) { 1439bc011b1eSHong Zhang bj = b->j + bi[i]; 1440bc011b1eSHong Zhang ba = b->a + bi[i]; 1441bc011b1eSHong Zhang bnzi = bi[i+1] - bi[i]; 1442bc011b1eSHong Zhang anzi = ai[i+1] - ai[i]; 1443bc011b1eSHong Zhang for (j=0; j<anzi; j++) { 1444bc011b1eSHong Zhang nextb = 0; 14450fbc74f4SHong Zhang crow = *aj++; 1446bc011b1eSHong Zhang cjj = cj + ci[crow]; 1447bc011b1eSHong Zhang caj = ca + ci[crow]; 1448bc011b1eSHong Zhang /* perform sparse axpy operation. Note cjj includes bj. */ 1449bc011b1eSHong Zhang for (k=0; nextb<bnzi; k++) { 14500fbc74f4SHong Zhang if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */ 14510fbc74f4SHong Zhang caj[k] += (*aa)*(*(ba+nextb)); 1452bc011b1eSHong Zhang nextb++; 1453bc011b1eSHong Zhang } 1454bc011b1eSHong Zhang } 1455bc011b1eSHong Zhang flops += 2*bnzi; 14560fbc74f4SHong Zhang aa++; 1457bc011b1eSHong Zhang } 1458bc011b1eSHong Zhang } 1459bc011b1eSHong Zhang 1460bc011b1eSHong Zhang /* Assemble the final matrix and clean up */ 14619566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 14629566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 14639566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(flops)); 1464bc011b1eSHong Zhang PetscFunctionReturn(0); 1465bc011b1eSHong Zhang } 14669123193aSHong Zhang 14674222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqDense(Mat A,Mat B,PetscReal fill,Mat C) 14689123193aSHong Zhang { 14699123193aSHong Zhang PetscFunctionBegin; 14709566063dSJacob Faibussowitsch PetscCall(MatMatMultSymbolic_SeqDense_SeqDense(A,B,0.0,C)); 14714222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqDense; 14729123193aSHong Zhang PetscFunctionReturn(0); 14739123193aSHong Zhang } 14749123193aSHong Zhang 147593aa15f2SStefano Zampini PETSC_INTERN PetscErrorCode MatMatMultNumericAdd_SeqAIJ_SeqDense(Mat A,Mat B,Mat C,const PetscBool add) 14769123193aSHong Zhang { 1477f32d5d43SBarry Smith Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 14781ca9667aSStefano Zampini PetscScalar *c,r1,r2,r3,r4,*c1,*c2,*c3,*c4; 1479a4af7ca8SStefano Zampini const PetscScalar *aa,*b,*b1,*b2,*b3,*b4,*av; 1480daf57211SHong Zhang const PetscInt *aj; 148175f6d85dSStefano Zampini PetscInt cm=C->rmap->n,cn=B->cmap->n,bm,am=A->rmap->n; 148275f6d85dSStefano Zampini PetscInt clda; 148375f6d85dSStefano Zampini PetscInt am4,bm4,col,i,j,n; 14849123193aSHong Zhang 14859123193aSHong Zhang PetscFunctionBegin; 1486f32d5d43SBarry Smith if (!cm || !cn) PetscFunctionReturn(0); 14879566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(A,&av)); 148893aa15f2SStefano Zampini if (add) { 14899566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(C,&c)); 149093aa15f2SStefano Zampini } else { 14919566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayWrite(C,&c)); 149293aa15f2SStefano Zampini } 14939566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(B,&b)); 14949566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(B,&bm)); 14959566063dSJacob Faibussowitsch PetscCall(MatDenseGetLDA(C,&clda)); 149675f6d85dSStefano Zampini am4 = 4*clda; 149775f6d85dSStefano Zampini bm4 = 4*bm; 1498f32d5d43SBarry Smith b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 14991ca9667aSStefano Zampini c1 = c; c2 = c1 + clda; c3 = c2 + clda; c4 = c3 + clda; 15001ca9667aSStefano Zampini for (col=0; col<(cn/4)*4; col += 4) { /* over columns of C */ 15011ca9667aSStefano Zampini for (i=0; i<am; i++) { /* over rows of A in those columns */ 1502f32d5d43SBarry Smith r1 = r2 = r3 = r4 = 0.0; 1503f32d5d43SBarry Smith n = a->i[i+1] - a->i[i]; 1504f32d5d43SBarry Smith aj = a->j + a->i[i]; 1505a4af7ca8SStefano Zampini aa = av + a->i[i]; 1506f32d5d43SBarry Smith for (j=0; j<n; j++) { 15071ca9667aSStefano Zampini const PetscScalar aatmp = aa[j]; 15081ca9667aSStefano Zampini const PetscInt ajtmp = aj[j]; 1509730858b9SHong Zhang r1 += aatmp*b1[ajtmp]; 1510730858b9SHong Zhang r2 += aatmp*b2[ajtmp]; 1511730858b9SHong Zhang r3 += aatmp*b3[ajtmp]; 1512730858b9SHong Zhang r4 += aatmp*b4[ajtmp]; 15139123193aSHong Zhang } 151493aa15f2SStefano Zampini if (add) { 151587753ddeSHong Zhang c1[i] += r1; 151687753ddeSHong Zhang c2[i] += r2; 151787753ddeSHong Zhang c3[i] += r3; 151887753ddeSHong Zhang c4[i] += r4; 151993aa15f2SStefano Zampini } else { 152093aa15f2SStefano Zampini c1[i] = r1; 152193aa15f2SStefano Zampini c2[i] = r2; 152293aa15f2SStefano Zampini c3[i] = r3; 152393aa15f2SStefano Zampini c4[i] = r4; 152493aa15f2SStefano Zampini } 1525f32d5d43SBarry Smith } 1526730858b9SHong Zhang b1 += bm4; b2 += bm4; b3 += bm4; b4 += bm4; 1527730858b9SHong Zhang c1 += am4; c2 += am4; c3 += am4; c4 += am4; 1528f32d5d43SBarry Smith } 152993aa15f2SStefano Zampini /* process remaining columns */ 153093aa15f2SStefano Zampini if (col != cn) { 153193aa15f2SStefano Zampini PetscInt rc = cn-col; 153293aa15f2SStefano Zampini 153393aa15f2SStefano Zampini if (rc == 1) { 153493aa15f2SStefano Zampini for (i=0; i<am; i++) { 1535f32d5d43SBarry Smith r1 = 0.0; 1536f32d5d43SBarry Smith n = a->i[i+1] - a->i[i]; 1537f32d5d43SBarry Smith aj = a->j + a->i[i]; 1538a4af7ca8SStefano Zampini aa = av + a->i[i]; 153993aa15f2SStefano Zampini for (j=0; j<n; j++) r1 += aa[j]*b1[aj[j]]; 154093aa15f2SStefano Zampini if (add) c1[i] += r1; 154193aa15f2SStefano Zampini else c1[i] = r1; 154293aa15f2SStefano Zampini } 154393aa15f2SStefano Zampini } else if (rc == 2) { 154493aa15f2SStefano Zampini for (i=0; i<am; i++) { 154593aa15f2SStefano Zampini r1 = r2 = 0.0; 154693aa15f2SStefano Zampini n = a->i[i+1] - a->i[i]; 154793aa15f2SStefano Zampini aj = a->j + a->i[i]; 154893aa15f2SStefano Zampini aa = av + a->i[i]; 1549f32d5d43SBarry Smith for (j=0; j<n; j++) { 155093aa15f2SStefano Zampini const PetscScalar aatmp = aa[j]; 155193aa15f2SStefano Zampini const PetscInt ajtmp = aj[j]; 155293aa15f2SStefano Zampini r1 += aatmp*b1[ajtmp]; 155393aa15f2SStefano Zampini r2 += aatmp*b2[ajtmp]; 1554f32d5d43SBarry Smith } 155593aa15f2SStefano Zampini if (add) { 155687753ddeSHong Zhang c1[i] += r1; 155793aa15f2SStefano Zampini c2[i] += r2; 155893aa15f2SStefano Zampini } else { 155993aa15f2SStefano Zampini c1[i] = r1; 156093aa15f2SStefano Zampini c2[i] = r2; 1561f32d5d43SBarry Smith } 156293aa15f2SStefano Zampini } 156393aa15f2SStefano Zampini } else { 156493aa15f2SStefano Zampini for (i=0; i<am; i++) { 156593aa15f2SStefano Zampini r1 = r2 = r3 = 0.0; 156693aa15f2SStefano Zampini n = a->i[i+1] - a->i[i]; 156793aa15f2SStefano Zampini aj = a->j + a->i[i]; 156893aa15f2SStefano Zampini aa = av + a->i[i]; 156993aa15f2SStefano Zampini for (j=0; j<n; j++) { 157093aa15f2SStefano Zampini const PetscScalar aatmp = aa[j]; 157193aa15f2SStefano Zampini const PetscInt ajtmp = aj[j]; 157293aa15f2SStefano Zampini r1 += aatmp*b1[ajtmp]; 157393aa15f2SStefano Zampini r2 += aatmp*b2[ajtmp]; 157493aa15f2SStefano Zampini r3 += aatmp*b3[ajtmp]; 157593aa15f2SStefano Zampini } 157693aa15f2SStefano Zampini if (add) { 157793aa15f2SStefano Zampini c1[i] += r1; 157893aa15f2SStefano Zampini c2[i] += r2; 157993aa15f2SStefano Zampini c3[i] += r3; 158093aa15f2SStefano Zampini } else { 158193aa15f2SStefano Zampini c1[i] = r1; 158293aa15f2SStefano Zampini c2[i] = r2; 158393aa15f2SStefano Zampini c3[i] = r3; 158493aa15f2SStefano Zampini } 158593aa15f2SStefano Zampini } 158693aa15f2SStefano Zampini } 1587f32d5d43SBarry Smith } 15889566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(cn*(2.0*a->nz))); 158993aa15f2SStefano Zampini if (add) { 15909566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(C,&c)); 159193aa15f2SStefano Zampini } else { 15929566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayWrite(C,&c)); 159393aa15f2SStefano Zampini } 15949566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(B,&b)); 15959566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(A,&av)); 1596f32d5d43SBarry Smith PetscFunctionReturn(0); 1597f32d5d43SBarry Smith } 1598f32d5d43SBarry Smith 159987753ddeSHong Zhang PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqDense(Mat A,Mat B,Mat C) 1600f32d5d43SBarry Smith { 1601f32d5d43SBarry Smith PetscFunctionBegin; 160208401ef6SPierre Jolivet PetscCheck(B->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %" PetscInt_FMT " not equal rows in B %" PetscInt_FMT,A->cmap->n,B->rmap->n); 160308401ef6SPierre Jolivet PetscCheck(A->rmap->n == C->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in C %" PetscInt_FMT " not equal rows in A %" PetscInt_FMT,C->rmap->n,A->rmap->n); 160408401ef6SPierre Jolivet PetscCheck(B->cmap->n == C->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in B %" PetscInt_FMT " not equal columns in C %" PetscInt_FMT,B->cmap->n,C->cmap->n); 16054324174eSBarry Smith 16069566063dSJacob Faibussowitsch PetscCall(MatMatMultNumericAdd_SeqAIJ_SeqDense(A,B,C,PETSC_FALSE)); 16079123193aSHong Zhang PetscFunctionReturn(0); 16089123193aSHong Zhang } 1609b1683b59SHong Zhang 16104222ddf1SHong Zhang /* ------------------------------------------------------- */ 16114222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_AB(Mat C) 16124222ddf1SHong Zhang { 16134222ddf1SHong Zhang PetscFunctionBegin; 16144222ddf1SHong Zhang C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqDense; 16154222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_AB; 16164222ddf1SHong Zhang PetscFunctionReturn(0); 16174222ddf1SHong Zhang } 16184222ddf1SHong Zhang 16196718818eSStefano Zampini PETSC_INTERN PetscErrorCode MatTMatTMultSymbolic_SeqAIJ_SeqDense(Mat,Mat,PetscReal,Mat); 16206718818eSStefano Zampini 16214222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_AtB(Mat C) 16224222ddf1SHong Zhang { 16234222ddf1SHong Zhang PetscFunctionBegin; 16246718818eSStefano Zampini C->ops->transposematmultsymbolic = MatTMatTMultSymbolic_SeqAIJ_SeqDense; 16254222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_AtB; 16266718818eSStefano Zampini PetscFunctionReturn(0); 16276718818eSStefano Zampini } 16286718818eSStefano Zampini 16296718818eSStefano Zampini static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_ABt(Mat C) 16306718818eSStefano Zampini { 16316718818eSStefano Zampini PetscFunctionBegin; 16326718818eSStefano Zampini C->ops->mattransposemultsymbolic = MatTMatTMultSymbolic_SeqAIJ_SeqDense; 16336718818eSStefano Zampini C->ops->productsymbolic = MatProductSymbolic_ABt; 16344222ddf1SHong Zhang PetscFunctionReturn(0); 16354222ddf1SHong Zhang } 16364222ddf1SHong Zhang 16374222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat C) 16384222ddf1SHong Zhang { 16394222ddf1SHong Zhang Mat_Product *product = C->product; 16404222ddf1SHong Zhang 16414222ddf1SHong Zhang PetscFunctionBegin; 16424222ddf1SHong Zhang switch (product->type) { 16434222ddf1SHong Zhang case MATPRODUCT_AB: 16449566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions_SeqAIJ_SeqDense_AB(C)); 16454222ddf1SHong Zhang break; 16464222ddf1SHong Zhang case MATPRODUCT_AtB: 16479566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions_SeqAIJ_SeqDense_AtB(C)); 16484222ddf1SHong Zhang break; 16496718818eSStefano Zampini case MATPRODUCT_ABt: 16509566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions_SeqAIJ_SeqDense_ABt(C)); 16514222ddf1SHong Zhang break; 16524222ddf1SHong Zhang default: 16536718818eSStefano Zampini break; 16544222ddf1SHong Zhang } 16554222ddf1SHong Zhang PetscFunctionReturn(0); 16564222ddf1SHong Zhang } 16574222ddf1SHong Zhang /* ------------------------------------------------------- */ 16584222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqXBAIJ_SeqDense_AB(Mat C) 16594222ddf1SHong Zhang { 16604222ddf1SHong Zhang Mat_Product *product = C->product; 16614222ddf1SHong Zhang Mat A = product->A; 16624222ddf1SHong Zhang PetscBool baij; 16634222ddf1SHong Zhang 16644222ddf1SHong Zhang PetscFunctionBegin; 16659566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&baij)); 16664222ddf1SHong Zhang if (!baij) { /* A is seqsbaij */ 16674222ddf1SHong Zhang PetscBool sbaij; 16689566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&sbaij)); 166928b400f6SJacob Faibussowitsch PetscCheck(sbaij,PetscObjectComm((PetscObject)C),PETSC_ERR_ARG_WRONGSTATE,"Mat must be either seqbaij or seqsbaij format"); 16704222ddf1SHong Zhang 16714222ddf1SHong Zhang C->ops->matmultsymbolic = MatMatMultSymbolic_SeqSBAIJ_SeqDense; 16724222ddf1SHong Zhang } else { /* A is seqbaij */ 16734222ddf1SHong Zhang C->ops->matmultsymbolic = MatMatMultSymbolic_SeqBAIJ_SeqDense; 16744222ddf1SHong Zhang } 16754222ddf1SHong Zhang 16764222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_AB; 16774222ddf1SHong Zhang PetscFunctionReturn(0); 16784222ddf1SHong Zhang } 16794222ddf1SHong Zhang 16804222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqXBAIJ_SeqDense(Mat C) 16814222ddf1SHong Zhang { 16824222ddf1SHong Zhang Mat_Product *product = C->product; 16834222ddf1SHong Zhang 16844222ddf1SHong Zhang PetscFunctionBegin; 16856718818eSStefano Zampini MatCheckProduct(C,1); 168628b400f6SJacob Faibussowitsch PetscCheck(product->A,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing A"); 1687b94d7dedSBarry Smith if (product->type == MATPRODUCT_AB || (product->type == MATPRODUCT_AtB && product->A->symmetric == PETSC_BOOL3_TRUE)) PetscCall(MatProductSetFromOptions_SeqXBAIJ_SeqDense_AB(C)); 16884222ddf1SHong Zhang PetscFunctionReturn(0); 16894222ddf1SHong Zhang } 16906718818eSStefano Zampini 16914222ddf1SHong Zhang /* ------------------------------------------------------- */ 16924222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqDense_SeqAIJ_AB(Mat C) 16934222ddf1SHong Zhang { 16944222ddf1SHong Zhang PetscFunctionBegin; 16954222ddf1SHong Zhang C->ops->matmultsymbolic = MatMatMultSymbolic_SeqDense_SeqAIJ; 16964222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_AB; 16974222ddf1SHong Zhang PetscFunctionReturn(0); 16984222ddf1SHong Zhang } 16994222ddf1SHong Zhang 17004222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqDense_SeqAIJ(Mat C) 17014222ddf1SHong Zhang { 17024222ddf1SHong Zhang Mat_Product *product = C->product; 17034222ddf1SHong Zhang 17044222ddf1SHong Zhang PetscFunctionBegin; 17054222ddf1SHong Zhang if (product->type == MATPRODUCT_AB) { 17069566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions_SeqDense_SeqAIJ_AB(C)); 17076718818eSStefano Zampini } 17084222ddf1SHong Zhang PetscFunctionReturn(0); 17094222ddf1SHong Zhang } 17104222ddf1SHong Zhang /* ------------------------------------------------------- */ 17114222ddf1SHong Zhang 1712b9af6bddSHong Zhang PetscErrorCode MatTransColoringApplySpToDen_SeqAIJ(MatTransposeColoring coloring,Mat B,Mat Btdense) 1713c8db22f6SHong Zhang { 17142f5f1f90SHong Zhang Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 17152f5f1f90SHong Zhang Mat_SeqDense *btdense = (Mat_SeqDense*)Btdense->data; 17162f5f1f90SHong Zhang PetscInt *bi = b->i,*bj=b->j; 17172f5f1f90SHong Zhang PetscInt m = Btdense->rmap->n,n=Btdense->cmap->n,j,k,l,col,anz,*btcol,brow,ncolumns; 17182f5f1f90SHong Zhang MatScalar *btval,*btval_den,*ba=b->a; 17192f5f1f90SHong Zhang PetscInt *columns=coloring->columns,*colorforcol=coloring->colorforcol,ncolors=coloring->ncolors; 1720c8db22f6SHong Zhang 1721c8db22f6SHong Zhang PetscFunctionBegin; 17222f5f1f90SHong Zhang btval_den=btdense->v; 17239566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(btval_den,m*n)); 17242f5f1f90SHong Zhang for (k=0; k<ncolors; k++) { 17252f5f1f90SHong Zhang ncolumns = coloring->ncolumns[k]; 17262f5f1f90SHong Zhang for (l=0; l<ncolumns; l++) { /* insert a row of B to a column of Btdense */ 1727d2853540SHong Zhang col = *(columns + colorforcol[k] + l); 17282f5f1f90SHong Zhang btcol = bj + bi[col]; 17292f5f1f90SHong Zhang btval = ba + bi[col]; 17302f5f1f90SHong Zhang anz = bi[col+1] - bi[col]; 1731d2853540SHong Zhang for (j=0; j<anz; j++) { 17322f5f1f90SHong Zhang brow = btcol[j]; 17332f5f1f90SHong Zhang btval_den[brow] = btval[j]; 1734c8db22f6SHong Zhang } 1735c8db22f6SHong Zhang } 17362f5f1f90SHong Zhang btval_den += m; 1737c8db22f6SHong Zhang } 1738c8db22f6SHong Zhang PetscFunctionReturn(0); 1739c8db22f6SHong Zhang } 1740c8db22f6SHong Zhang 1741b9af6bddSHong Zhang PetscErrorCode MatTransColoringApplyDenToSp_SeqAIJ(MatTransposeColoring matcoloring,Mat Cden,Mat Csp) 17428972f759SHong Zhang { 1743b2d2b04fSHong Zhang Mat_SeqAIJ *csp = (Mat_SeqAIJ*)Csp->data; 17441683a169SBarry Smith const PetscScalar *ca_den,*ca_den_ptr; 17451683a169SBarry Smith PetscScalar *ca=csp->a; 1746f99a636bSHong Zhang PetscInt k,l,m=Cden->rmap->n,ncolors=matcoloring->ncolors; 1747e88777f2SHong Zhang PetscInt brows=matcoloring->brows,*den2sp=matcoloring->den2sp; 1748077f23c2SHong Zhang PetscInt nrows,*row,*idx; 1749077f23c2SHong Zhang PetscInt *rows=matcoloring->rows,*colorforrow=matcoloring->colorforrow; 17508972f759SHong Zhang 17518972f759SHong Zhang PetscFunctionBegin; 17529566063dSJacob Faibussowitsch PetscCall(MatDenseGetArrayRead(Cden,&ca_den)); 1753f99a636bSHong Zhang 1754077f23c2SHong Zhang if (brows > 0) { 1755077f23c2SHong Zhang PetscInt *lstart,row_end,row_start; 1756980ae229SHong Zhang lstart = matcoloring->lstart; 17579566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(lstart,ncolors)); 1758eeb4fd02SHong Zhang 1759077f23c2SHong Zhang row_end = brows; 1760eeb4fd02SHong Zhang if (row_end > m) row_end = m; 1761077f23c2SHong Zhang for (row_start=0; row_start<m; row_start+=brows) { /* loop over row blocks of Csp */ 1762077f23c2SHong Zhang ca_den_ptr = ca_den; 1763980ae229SHong Zhang for (k=0; k<ncolors; k++) { /* loop over colors (columns of Cden) */ 1764eeb4fd02SHong Zhang nrows = matcoloring->nrows[k]; 1765eeb4fd02SHong Zhang row = rows + colorforrow[k]; 1766eeb4fd02SHong Zhang idx = den2sp + colorforrow[k]; 1767eeb4fd02SHong Zhang for (l=lstart[k]; l<nrows; l++) { 1768eeb4fd02SHong Zhang if (row[l] >= row_end) { 1769eeb4fd02SHong Zhang lstart[k] = l; 1770eeb4fd02SHong Zhang break; 1771eeb4fd02SHong Zhang } else { 1772077f23c2SHong Zhang ca[idx[l]] = ca_den_ptr[row[l]]; 1773eeb4fd02SHong Zhang } 1774eeb4fd02SHong Zhang } 1775077f23c2SHong Zhang ca_den_ptr += m; 1776eeb4fd02SHong Zhang } 1777077f23c2SHong Zhang row_end += brows; 1778eeb4fd02SHong Zhang if (row_end > m) row_end = m; 1779eeb4fd02SHong Zhang } 1780077f23c2SHong Zhang } else { /* non-blocked impl: loop over columns of Csp - slow if Csp is large */ 1781077f23c2SHong Zhang ca_den_ptr = ca_den; 1782077f23c2SHong Zhang for (k=0; k<ncolors; k++) { 1783077f23c2SHong Zhang nrows = matcoloring->nrows[k]; 1784077f23c2SHong Zhang row = rows + colorforrow[k]; 1785077f23c2SHong Zhang idx = den2sp + colorforrow[k]; 1786077f23c2SHong Zhang for (l=0; l<nrows; l++) { 1787077f23c2SHong Zhang ca[idx[l]] = ca_den_ptr[row[l]]; 1788077f23c2SHong Zhang } 1789077f23c2SHong Zhang ca_den_ptr += m; 1790077f23c2SHong Zhang } 1791f99a636bSHong Zhang } 1792f99a636bSHong Zhang 17939566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArrayRead(Cden,&ca_den)); 1794f99a636bSHong Zhang #if defined(PETSC_USE_INFO) 1795077f23c2SHong Zhang if (matcoloring->brows > 0) { 17969566063dSJacob Faibussowitsch PetscCall(PetscInfo(Csp,"Loop over %" PetscInt_FMT " row blocks for den2sp\n",brows)); 1797e88777f2SHong Zhang } else { 17989566063dSJacob Faibussowitsch PetscCall(PetscInfo(Csp,"Loop over colors/columns of Cden, inefficient for large sparse matrix product \n")); 1799e88777f2SHong Zhang } 1800f99a636bSHong Zhang #endif 18018972f759SHong Zhang PetscFunctionReturn(0); 18028972f759SHong Zhang } 18038972f759SHong Zhang 1804b9af6bddSHong Zhang PetscErrorCode MatTransposeColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatTransposeColoring c) 1805b1683b59SHong Zhang { 1806e88777f2SHong Zhang PetscInt i,n,nrows,Nbs,j,k,m,ncols,col,cm; 18071a83f524SJed Brown const PetscInt *is,*ci,*cj,*row_idx; 1808b28d80bdSHong Zhang PetscInt nis = iscoloring->n,*rowhit,bs = 1; 1809b1683b59SHong Zhang IS *isa; 1810b28d80bdSHong Zhang Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 1811e88777f2SHong Zhang PetscInt *colorforrow,*rows,*rows_i,*idxhit,*spidx,*den2sp,*den2sp_i; 1812e88777f2SHong Zhang PetscInt *colorforcol,*columns,*columns_i,brows; 1813e88777f2SHong Zhang PetscBool flg; 1814b1683b59SHong Zhang 1815b1683b59SHong Zhang PetscFunctionBegin; 18169566063dSJacob Faibussowitsch PetscCall(ISColoringGetIS(iscoloring,PETSC_USE_POINTER,PETSC_IGNORE,&isa)); 1817e99be685SHong Zhang 18187ecccc15SHong Zhang /* bs >1 is not being tested yet! */ 1819e88777f2SHong Zhang Nbs = mat->cmap->N/bs; 1820b1683b59SHong Zhang c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 1821e88777f2SHong Zhang c->N = Nbs; 1822e88777f2SHong Zhang c->m = c->M; 1823b1683b59SHong Zhang c->rstart = 0; 1824e88777f2SHong Zhang c->brows = 100; 1825b1683b59SHong Zhang 1826b1683b59SHong Zhang c->ncolors = nis; 18279566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(nis,&c->ncolumns,nis,&c->nrows,nis+1,&colorforrow)); 18289566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(csp->nz+1,&rows)); 18299566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(csp->nz+1,&den2sp)); 1830e88777f2SHong Zhang 1831e88777f2SHong Zhang brows = c->brows; 18329566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL,NULL,"-matden2sp_brows",&brows,&flg)); 1833e88777f2SHong Zhang if (flg) c->brows = brows; 1834eeb4fd02SHong Zhang if (brows > 0) { 18359566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nis+1,&c->lstart)); 1836e88777f2SHong Zhang } 18372205254eSKarl Rupp 1838d2853540SHong Zhang colorforrow[0] = 0; 1839d2853540SHong Zhang rows_i = rows; 1840f99a636bSHong Zhang den2sp_i = den2sp; 1841b1683b59SHong Zhang 18429566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nis+1,&colorforcol)); 18439566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(Nbs+1,&columns)); 18442205254eSKarl Rupp 1845d2853540SHong Zhang colorforcol[0] = 0; 1846d2853540SHong Zhang columns_i = columns; 1847d2853540SHong Zhang 1848077f23c2SHong Zhang /* get column-wise storage of mat */ 18499566063dSJacob Faibussowitsch PetscCall(MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL)); 1850b1683b59SHong Zhang 1851b28d80bdSHong Zhang cm = c->m; 18529566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(cm+1,&rowhit)); 18539566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(cm+1,&idxhit)); 1854f99a636bSHong Zhang for (i=0; i<nis; i++) { /* loop over color */ 18559566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(isa[i],&n)); 18569566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isa[i],&is)); 18572205254eSKarl Rupp 1858b1683b59SHong Zhang c->ncolumns[i] = n; 18591baa6e33SBarry Smith if (n) PetscCall(PetscArraycpy(columns_i,is,n)); 1860d2853540SHong Zhang colorforcol[i+1] = colorforcol[i] + n; 1861d2853540SHong Zhang columns_i += n; 1862b1683b59SHong Zhang 1863b1683b59SHong Zhang /* fast, crude version requires O(N*N) work */ 18649566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(rowhit,cm)); 1865e99be685SHong Zhang 1866b7caf3d6SHong Zhang for (j=0; j<n; j++) { /* loop over columns*/ 1867b1683b59SHong Zhang col = is[j]; 1868d2853540SHong Zhang row_idx = cj + ci[col]; 1869b1683b59SHong Zhang m = ci[col+1] - ci[col]; 1870b7caf3d6SHong Zhang for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */ 1871e99be685SHong Zhang idxhit[*row_idx] = spidx[ci[col] + k]; 1872d2853540SHong Zhang rowhit[*row_idx++] = col + 1; 1873b1683b59SHong Zhang } 1874b1683b59SHong Zhang } 1875b1683b59SHong Zhang /* count the number of hits */ 1876b1683b59SHong Zhang nrows = 0; 1877e8354b3eSHong Zhang for (j=0; j<cm; j++) { 1878b1683b59SHong Zhang if (rowhit[j]) nrows++; 1879b1683b59SHong Zhang } 1880b1683b59SHong Zhang c->nrows[i] = nrows; 1881d2853540SHong Zhang colorforrow[i+1] = colorforrow[i] + nrows; 1882d2853540SHong Zhang 1883b1683b59SHong Zhang nrows = 0; 1884b7caf3d6SHong Zhang for (j=0; j<cm; j++) { /* loop over rows */ 1885b1683b59SHong Zhang if (rowhit[j]) { 1886d2853540SHong Zhang rows_i[nrows] = j; 188712b89a43SHong Zhang den2sp_i[nrows] = idxhit[j]; 1888b1683b59SHong Zhang nrows++; 1889b1683b59SHong Zhang } 1890b1683b59SHong Zhang } 1891e88777f2SHong Zhang den2sp_i += nrows; 1892077f23c2SHong Zhang 18939566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isa[i],&is)); 1894f99a636bSHong Zhang rows_i += nrows; 1895b1683b59SHong Zhang } 18969566063dSJacob Faibussowitsch PetscCall(MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL)); 18979566063dSJacob Faibussowitsch PetscCall(PetscFree(rowhit)); 18989566063dSJacob Faibussowitsch PetscCall(ISColoringRestoreIS(iscoloring,PETSC_USE_POINTER,&isa)); 18992c71b3e2SJacob Faibussowitsch PetscCheck(csp->nz == colorforrow[nis],PETSC_COMM_SELF,PETSC_ERR_PLIB,"csp->nz %" PetscInt_FMT " != colorforrow[nis] %" PetscInt_FMT,csp->nz,colorforrow[nis]); 1900b28d80bdSHong Zhang 1901d2853540SHong Zhang c->colorforrow = colorforrow; 1902d2853540SHong Zhang c->rows = rows; 1903f99a636bSHong Zhang c->den2sp = den2sp; 1904d2853540SHong Zhang c->colorforcol = colorforcol; 1905d2853540SHong Zhang c->columns = columns; 19062205254eSKarl Rupp 19079566063dSJacob Faibussowitsch PetscCall(PetscFree(idxhit)); 1908b1683b59SHong Zhang PetscFunctionReturn(0); 1909b1683b59SHong Zhang } 1910d013fc79Sandi selinger 19114222ddf1SHong Zhang /* --------------------------------------------------------------- */ 19124222ddf1SHong Zhang static PetscErrorCode MatProductNumeric_AtB_SeqAIJ_SeqAIJ(Mat C) 1913df97dc6dSFande Kong { 19144222ddf1SHong Zhang Mat_Product *product = C->product; 19154222ddf1SHong Zhang Mat A=product->A,B=product->B; 19164222ddf1SHong Zhang 1917df97dc6dSFande Kong PetscFunctionBegin; 19184222ddf1SHong Zhang if (C->ops->mattransposemultnumeric) { 19194222ddf1SHong Zhang /* Alg: "outerproduct" */ 19209566063dSJacob Faibussowitsch PetscCall((*C->ops->mattransposemultnumeric)(A,B,C)); 19214222ddf1SHong Zhang } else { 19224222ddf1SHong Zhang /* Alg: "matmatmult" -- C = At*B */ 19236718818eSStefano Zampini Mat_MatTransMatMult *atb = (Mat_MatTransMatMult *)product->data; 19244222ddf1SHong Zhang 192528b400f6SJacob Faibussowitsch PetscCheck(atb,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing product struct"); 19261cdffd5eSHong Zhang if (atb->At) { 19271cdffd5eSHong Zhang /* At is computed in MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(); 19281cdffd5eSHong Zhang user may have called MatProductReplaceMats() to get this A=product->A */ 19291cdffd5eSHong Zhang PetscCall(MatTransposeSetPrecursor(A,atb->At)); 19307fb60732SBarry Smith PetscCall(MatTranspose(A,MAT_REUSE_MATRIX,&atb->At)); 19314222ddf1SHong Zhang } 19327fb60732SBarry Smith PetscCall(MatMatMultNumeric_SeqAIJ_SeqAIJ(atb->At ? atb->At : A,B,C)); 19334222ddf1SHong Zhang } 1934df97dc6dSFande Kong PetscFunctionReturn(0); 1935df97dc6dSFande Kong } 1936df97dc6dSFande Kong 19374222ddf1SHong Zhang static PetscErrorCode MatProductSymbolic_AtB_SeqAIJ_SeqAIJ(Mat C) 1938d013fc79Sandi selinger { 19394222ddf1SHong Zhang Mat_Product *product = C->product; 19404222ddf1SHong Zhang Mat A=product->A,B=product->B; 19414222ddf1SHong Zhang PetscReal fill=product->fill; 1942d013fc79Sandi selinger 1943d013fc79Sandi selinger PetscFunctionBegin; 19449566063dSJacob Faibussowitsch PetscCall(MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C)); 19452869b61bSandi selinger 19464222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJ_SeqAIJ; 19474222ddf1SHong Zhang PetscFunctionReturn(0); 19482869b61bSandi selinger } 1949d013fc79Sandi selinger 19504222ddf1SHong Zhang /* --------------------------------------------------------------- */ 19514222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_AB(Mat C) 19524222ddf1SHong Zhang { 19534222ddf1SHong Zhang Mat_Product *product = C->product; 19544222ddf1SHong Zhang PetscInt alg = 0; /* default algorithm */ 19554222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 19564222ddf1SHong Zhang #if !defined(PETSC_HAVE_HYPRE) 19574222ddf1SHong Zhang const char *algTypes[7] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge"}; 19584222ddf1SHong Zhang PetscInt nalg = 7; 19594222ddf1SHong Zhang #else 19604222ddf1SHong Zhang const char *algTypes[8] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge","hypre"}; 19614222ddf1SHong Zhang PetscInt nalg = 8; 19624222ddf1SHong Zhang #endif 19634222ddf1SHong Zhang 19644222ddf1SHong Zhang PetscFunctionBegin; 19654222ddf1SHong Zhang /* Set default algorithm */ 19669566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg,"default",&flg)); 19674222ddf1SHong Zhang if (flg) { 19689566063dSJacob Faibussowitsch PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 1969d013fc79Sandi selinger } 1970d013fc79Sandi selinger 19714222ddf1SHong Zhang /* Get runtime option */ 19724222ddf1SHong Zhang if (product->api_user) { 1973d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat"); 19749566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-matmatmult_via","Algorithmic approach","MatMatMult",algTypes,nalg,algTypes[0],&alg,&flg)); 1975d0609cedSBarry Smith PetscOptionsEnd(); 19764222ddf1SHong Zhang } else { 1977d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat"); 19789566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_AB",algTypes,nalg,algTypes[0],&alg,&flg)); 1979d0609cedSBarry Smith PetscOptionsEnd(); 1980d013fc79Sandi selinger } 19814222ddf1SHong Zhang if (flg) { 19829566063dSJacob Faibussowitsch PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 1983d013fc79Sandi selinger } 1984d013fc79Sandi selinger 19854222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_AB; 19864222ddf1SHong Zhang C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ; 19874222ddf1SHong Zhang PetscFunctionReturn(0); 19884222ddf1SHong Zhang } 1989d013fc79Sandi selinger 19904222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_AtB(Mat C) 19914222ddf1SHong Zhang { 19924222ddf1SHong Zhang Mat_Product *product = C->product; 19934222ddf1SHong Zhang PetscInt alg = 0; /* default algorithm */ 19944222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 1995089a957eSStefano Zampini const char *algTypes[3] = {"default","at*b","outerproduct"}; 1996089a957eSStefano Zampini PetscInt nalg = 3; 1997d013fc79Sandi selinger 19984222ddf1SHong Zhang PetscFunctionBegin; 19994222ddf1SHong Zhang /* Get runtime option */ 20004222ddf1SHong Zhang if (product->api_user) { 2001d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatTransposeMatMult","Mat"); 20029566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-mattransposematmult_via","Algorithmic approach","MatTransposeMatMult",algTypes,nalg,algTypes[alg],&alg,&flg)); 2003d0609cedSBarry Smith PetscOptionsEnd(); 20044222ddf1SHong Zhang } else { 2005d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AtB","Mat"); 20069566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_AtB",algTypes,nalg,algTypes[alg],&alg,&flg)); 2007d0609cedSBarry Smith PetscOptionsEnd(); 20084222ddf1SHong Zhang } 20094222ddf1SHong Zhang if (flg) { 20109566063dSJacob Faibussowitsch PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 20114222ddf1SHong Zhang } 2012d013fc79Sandi selinger 20134222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJ_SeqAIJ; 20144222ddf1SHong Zhang PetscFunctionReturn(0); 20154222ddf1SHong Zhang } 20164222ddf1SHong Zhang 20174222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_ABt(Mat C) 20184222ddf1SHong Zhang { 20194222ddf1SHong Zhang Mat_Product *product = C->product; 20204222ddf1SHong Zhang PetscInt alg = 0; /* default algorithm */ 20214222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 20224222ddf1SHong Zhang const char *algTypes[2] = {"default","color"}; 20234222ddf1SHong Zhang PetscInt nalg = 2; 20244222ddf1SHong Zhang 20254222ddf1SHong Zhang PetscFunctionBegin; 20264222ddf1SHong Zhang /* Set default algorithm */ 20279566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(C->product->alg,"default",&flg)); 20284222ddf1SHong Zhang if (!flg) { 20294222ddf1SHong Zhang alg = 1; 20309566063dSJacob Faibussowitsch PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 20314222ddf1SHong Zhang } 20324222ddf1SHong Zhang 20334222ddf1SHong Zhang /* Get runtime option */ 20344222ddf1SHong Zhang if (product->api_user) { 2035d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat"); 20369566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-matmattransmult_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg)); 2037d0609cedSBarry Smith PetscOptionsEnd(); 20384222ddf1SHong Zhang } else { 2039d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat"); 20409566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg)); 2041d0609cedSBarry Smith PetscOptionsEnd(); 20424222ddf1SHong Zhang } 20434222ddf1SHong Zhang if (flg) { 20449566063dSJacob Faibussowitsch PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 20454222ddf1SHong Zhang } 20464222ddf1SHong Zhang 20474222ddf1SHong Zhang C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ; 20484222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_ABt; 20494222ddf1SHong Zhang PetscFunctionReturn(0); 20504222ddf1SHong Zhang } 20514222ddf1SHong Zhang 20524222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_PtAP(Mat C) 20534222ddf1SHong Zhang { 20544222ddf1SHong Zhang Mat_Product *product = C->product; 20554222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 20564222ddf1SHong Zhang PetscInt alg = 0; /* default algorithm -- alg=1 should be default!!! */ 20574222ddf1SHong Zhang #if !defined(PETSC_HAVE_HYPRE) 20584222ddf1SHong Zhang const char *algTypes[2] = {"scalable","rap"}; 20594222ddf1SHong Zhang PetscInt nalg = 2; 20604222ddf1SHong Zhang #else 20614222ddf1SHong Zhang const char *algTypes[3] = {"scalable","rap","hypre"}; 20624222ddf1SHong Zhang PetscInt nalg = 3; 20634222ddf1SHong Zhang #endif 20644222ddf1SHong Zhang 20654222ddf1SHong Zhang PetscFunctionBegin; 20664222ddf1SHong Zhang /* Set default algorithm */ 20679566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg,"default",&flg)); 20684222ddf1SHong Zhang if (flg) { 20699566063dSJacob Faibussowitsch PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 20704222ddf1SHong Zhang } 20714222ddf1SHong Zhang 20724222ddf1SHong Zhang /* Get runtime option */ 20734222ddf1SHong Zhang if (product->api_user) { 2074d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat"); 20759566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-matptap_via","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[0],&alg,&flg)); 2076d0609cedSBarry Smith PetscOptionsEnd(); 20774222ddf1SHong Zhang } else { 2078d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat"); 20799566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_PtAP",algTypes,nalg,algTypes[0],&alg,&flg)); 2080d0609cedSBarry Smith PetscOptionsEnd(); 20814222ddf1SHong Zhang } 20824222ddf1SHong Zhang if (flg) { 20839566063dSJacob Faibussowitsch PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 20844222ddf1SHong Zhang } 20854222ddf1SHong Zhang 20864222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqAIJ; 20874222ddf1SHong Zhang PetscFunctionReturn(0); 20884222ddf1SHong Zhang } 20894222ddf1SHong Zhang 20904222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_RARt(Mat C) 20914222ddf1SHong Zhang { 20924222ddf1SHong Zhang Mat_Product *product = C->product; 20934222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 20944222ddf1SHong Zhang PetscInt alg = 0; /* default algorithm */ 20954222ddf1SHong Zhang const char *algTypes[3] = {"r*a*rt","r*art","coloring_rart"}; 20964222ddf1SHong Zhang PetscInt nalg = 3; 20974222ddf1SHong Zhang 20984222ddf1SHong Zhang PetscFunctionBegin; 20994222ddf1SHong Zhang /* Set default algorithm */ 21009566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg,"default",&flg)); 21014222ddf1SHong Zhang if (flg) { 21029566063dSJacob Faibussowitsch PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 21034222ddf1SHong Zhang } 21044222ddf1SHong Zhang 21054222ddf1SHong Zhang /* Get runtime option */ 21064222ddf1SHong Zhang if (product->api_user) { 2107d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatRARt","Mat"); 21089566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-matrart_via","Algorithmic approach","MatRARt",algTypes,nalg,algTypes[0],&alg,&flg)); 2109d0609cedSBarry Smith PetscOptionsEnd(); 21104222ddf1SHong Zhang } else { 2111d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_RARt","Mat"); 21129566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_RARt",algTypes,nalg,algTypes[0],&alg,&flg)); 2113d0609cedSBarry Smith PetscOptionsEnd(); 21144222ddf1SHong Zhang } 21154222ddf1SHong Zhang if (flg) { 21169566063dSJacob Faibussowitsch PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 21174222ddf1SHong Zhang } 21184222ddf1SHong Zhang 21194222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_RARt_SeqAIJ_SeqAIJ; 21204222ddf1SHong Zhang PetscFunctionReturn(0); 21214222ddf1SHong Zhang } 21224222ddf1SHong Zhang 21234222ddf1SHong Zhang /* ABC = A*B*C = A*(B*C); ABC's algorithm must be chosen from AB's algorithm */ 21244222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_ABC(Mat C) 21254222ddf1SHong Zhang { 21264222ddf1SHong Zhang Mat_Product *product = C->product; 21274222ddf1SHong Zhang PetscInt alg = 0; /* default algorithm */ 21284222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 21294222ddf1SHong Zhang const char *algTypes[7] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge"}; 21304222ddf1SHong Zhang PetscInt nalg = 7; 21314222ddf1SHong Zhang 21324222ddf1SHong Zhang PetscFunctionBegin; 21334222ddf1SHong Zhang /* Set default algorithm */ 21349566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(product->alg,"default",&flg)); 21354222ddf1SHong Zhang if (flg) { 21369566063dSJacob Faibussowitsch PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 21374222ddf1SHong Zhang } 21384222ddf1SHong Zhang 21394222ddf1SHong Zhang /* Get runtime option */ 21404222ddf1SHong Zhang if (product->api_user) { 2141d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMatMult","Mat"); 21429566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-matmatmatmult_via","Algorithmic approach","MatMatMatMult",algTypes,nalg,algTypes[alg],&alg,&flg)); 2143d0609cedSBarry Smith PetscOptionsEnd(); 21444222ddf1SHong Zhang } else { 2145d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABC","Mat"); 21469566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_ABC",algTypes,nalg,algTypes[alg],&alg,&flg)); 2147d0609cedSBarry Smith PetscOptionsEnd(); 21484222ddf1SHong Zhang } 21494222ddf1SHong Zhang if (flg) { 21509566063dSJacob Faibussowitsch PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 21514222ddf1SHong Zhang } 21524222ddf1SHong Zhang 21534222ddf1SHong Zhang C->ops->matmatmultsymbolic = MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ; 21544222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_ABC; 21554222ddf1SHong Zhang PetscFunctionReturn(0); 21564222ddf1SHong Zhang } 21574222ddf1SHong Zhang 21584222ddf1SHong Zhang PetscErrorCode MatProductSetFromOptions_SeqAIJ(Mat C) 21594222ddf1SHong Zhang { 21604222ddf1SHong Zhang Mat_Product *product = C->product; 21614222ddf1SHong Zhang 21624222ddf1SHong Zhang PetscFunctionBegin; 21634222ddf1SHong Zhang switch (product->type) { 21644222ddf1SHong Zhang case MATPRODUCT_AB: 21659566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions_SeqAIJ_AB(C)); 21664222ddf1SHong Zhang break; 21674222ddf1SHong Zhang case MATPRODUCT_AtB: 21689566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions_SeqAIJ_AtB(C)); 21694222ddf1SHong Zhang break; 21704222ddf1SHong Zhang case MATPRODUCT_ABt: 21719566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions_SeqAIJ_ABt(C)); 21724222ddf1SHong Zhang break; 21734222ddf1SHong Zhang case MATPRODUCT_PtAP: 21749566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions_SeqAIJ_PtAP(C)); 21754222ddf1SHong Zhang break; 21764222ddf1SHong Zhang case MATPRODUCT_RARt: 21779566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions_SeqAIJ_RARt(C)); 21784222ddf1SHong Zhang break; 21794222ddf1SHong Zhang case MATPRODUCT_ABC: 21809566063dSJacob Faibussowitsch PetscCall(MatProductSetFromOptions_SeqAIJ_ABC(C)); 21814222ddf1SHong Zhang break; 21826718818eSStefano Zampini default: 21836718818eSStefano Zampini break; 21844222ddf1SHong Zhang } 2185d013fc79Sandi selinger PetscFunctionReturn(0); 2186d013fc79Sandi selinger } 2187