1be1d678aSKris Buschelman 2d50806bdSBarry Smith /* 32499ec78SHong Zhang Defines matrix-matrix product routines for pairs of SeqAIJ matrices 4d50806bdSBarry Smith C = A * B 5d50806bdSBarry Smith */ 6d50806bdSBarry Smith 7c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 8c6db04a5SJed Brown #include <../src/mat/utils/freespace.h> 9c6db04a5SJed Brown #include <petscbt.h> 10af0996ceSBarry Smith #include <petsc/private/isimpl.h> 1107475bc1SBarry Smith #include <../src/mat/impls/dense/seq/dense.h> 127bab7c10SHong Zhang 13df97dc6dSFande Kong PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 14df97dc6dSFande Kong { 15df97dc6dSFande Kong PetscErrorCode ierr; 16df97dc6dSFande Kong 17df97dc6dSFande Kong PetscFunctionBegin; 18df97dc6dSFande Kong if (C->ops->matmultnumeric) { 196718818eSStefano Zampini if (C->ops->matmultnumeric == MatMatMultNumeric_SeqAIJ_SeqAIJ) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Recursive call"); 20df97dc6dSFande Kong ierr = (*C->ops->matmultnumeric)(A,B,C);CHKERRQ(ierr); 21df97dc6dSFande Kong } else { 22df97dc6dSFande Kong ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted(A,B,C);CHKERRQ(ierr); 23df97dc6dSFande Kong } 24df97dc6dSFande Kong PetscFunctionReturn(0); 25df97dc6dSFande Kong } 26df97dc6dSFande Kong 274222ddf1SHong Zhang /* Modified from MatCreateSeqAIJWithArrays() */ 28e4e71118SStefano Zampini PETSC_INTERN PetscErrorCode MatSetSeqAIJWithArrays_private(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],MatType mtype,Mat mat) 29df97dc6dSFande Kong { 30df97dc6dSFande Kong PetscErrorCode ierr; 314222ddf1SHong Zhang PetscInt ii; 324222ddf1SHong Zhang Mat_SeqAIJ *aij; 33e4e71118SStefano Zampini PetscBool isseqaij; 345c66b693SKris Buschelman 355c66b693SKris Buschelman PetscFunctionBegin; 364222ddf1SHong Zhang if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 374222ddf1SHong Zhang ierr = MatSetSizes(mat,m,n,m,n);CHKERRQ(ierr); 384222ddf1SHong Zhang 39e4e71118SStefano Zampini if (!mtype) { 40e4e71118SStefano Zampini ierr = PetscObjectBaseTypeCompare((PetscObject)mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 41e4e71118SStefano Zampini if (!isseqaij) { ierr = MatSetType(mat,MATSEQAIJ);CHKERRQ(ierr); } 42e4e71118SStefano Zampini } else { 43e4e71118SStefano Zampini ierr = MatSetType(mat,mtype);CHKERRQ(ierr); 44e4e71118SStefano Zampini } 45f4259b30SLisandro Dalcin ierr = MatSeqAIJSetPreallocation_SeqAIJ(mat,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 464222ddf1SHong Zhang aij = (Mat_SeqAIJ*)(mat)->data; 474222ddf1SHong Zhang ierr = PetscMalloc1(m,&aij->imax);CHKERRQ(ierr); 484222ddf1SHong Zhang ierr = PetscMalloc1(m,&aij->ilen);CHKERRQ(ierr); 494222ddf1SHong Zhang 504222ddf1SHong Zhang aij->i = i; 514222ddf1SHong Zhang aij->j = j; 524222ddf1SHong Zhang aij->a = a; 534222ddf1SHong Zhang aij->singlemalloc = PETSC_FALSE; 544222ddf1SHong Zhang aij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 554222ddf1SHong Zhang aij->free_a = PETSC_FALSE; 564222ddf1SHong Zhang aij->free_ij = PETSC_FALSE; 574222ddf1SHong Zhang 584222ddf1SHong Zhang for (ii=0; ii<m; ii++) { 594222ddf1SHong Zhang aij->ilen[ii] = aij->imax[ii] = i[ii+1] - i[ii]; 6025023028SHong Zhang } 614222ddf1SHong Zhang 625c66b693SKris Buschelman PetscFunctionReturn(0); 635c66b693SKris Buschelman } 641c24bd37SHong Zhang 654222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C) 664222ddf1SHong Zhang { 674222ddf1SHong Zhang PetscErrorCode ierr; 684222ddf1SHong Zhang Mat_Product *product = C->product; 694222ddf1SHong Zhang MatProductAlgorithm alg; 704222ddf1SHong Zhang PetscBool flg; 714222ddf1SHong Zhang 724222ddf1SHong Zhang PetscFunctionBegin; 734222ddf1SHong Zhang if (product) { 744222ddf1SHong Zhang alg = product->alg; 754222ddf1SHong Zhang } else { 764222ddf1SHong Zhang alg = "sorted"; 774222ddf1SHong Zhang } 784222ddf1SHong Zhang /* sorted */ 794222ddf1SHong Zhang ierr = PetscStrcmp(alg,"sorted",&flg);CHKERRQ(ierr); 804222ddf1SHong Zhang if (flg) { 814222ddf1SHong Zhang ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Sorted(A,B,fill,C);CHKERRQ(ierr); 824222ddf1SHong Zhang PetscFunctionReturn(0); 834222ddf1SHong Zhang } 844222ddf1SHong Zhang 854222ddf1SHong Zhang /* scalable */ 864222ddf1SHong Zhang ierr = PetscStrcmp(alg,"scalable",&flg);CHKERRQ(ierr); 874222ddf1SHong Zhang if (flg) { 884222ddf1SHong Zhang ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(A,B,fill,C);CHKERRQ(ierr); 894222ddf1SHong Zhang PetscFunctionReturn(0); 904222ddf1SHong Zhang } 914222ddf1SHong Zhang 924222ddf1SHong Zhang /* scalable_fast */ 934222ddf1SHong Zhang ierr = PetscStrcmp(alg,"scalable_fast",&flg);CHKERRQ(ierr); 944222ddf1SHong Zhang if (flg) { 954222ddf1SHong Zhang ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(A,B,fill,C);CHKERRQ(ierr); 964222ddf1SHong Zhang PetscFunctionReturn(0); 974222ddf1SHong Zhang } 984222ddf1SHong Zhang 994222ddf1SHong Zhang /* heap */ 1004222ddf1SHong Zhang ierr = PetscStrcmp(alg,"heap",&flg);CHKERRQ(ierr); 1014222ddf1SHong Zhang if (flg) { 1024222ddf1SHong Zhang ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(A,B,fill,C);CHKERRQ(ierr); 1034222ddf1SHong Zhang PetscFunctionReturn(0); 1044222ddf1SHong Zhang } 1054222ddf1SHong Zhang 1064222ddf1SHong Zhang /* btheap */ 1074222ddf1SHong Zhang ierr = PetscStrcmp(alg,"btheap",&flg);CHKERRQ(ierr); 1084222ddf1SHong Zhang if (flg) { 1094222ddf1SHong Zhang ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(A,B,fill,C);CHKERRQ(ierr); 1104222ddf1SHong Zhang PetscFunctionReturn(0); 1114222ddf1SHong Zhang } 1124222ddf1SHong Zhang 1134222ddf1SHong Zhang /* llcondensed */ 1144222ddf1SHong Zhang ierr = PetscStrcmp(alg,"llcondensed",&flg);CHKERRQ(ierr); 1154222ddf1SHong Zhang if (flg) { 1164222ddf1SHong Zhang ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(A,B,fill,C);CHKERRQ(ierr); 1174222ddf1SHong Zhang PetscFunctionReturn(0); 1184222ddf1SHong Zhang } 1194222ddf1SHong Zhang 1204222ddf1SHong Zhang /* rowmerge */ 1214222ddf1SHong Zhang ierr = PetscStrcmp(alg,"rowmerge",&flg);CHKERRQ(ierr); 1224222ddf1SHong Zhang if (flg) { 1234222ddf1SHong Zhang ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_RowMerge(A,B,fill,C);CHKERRQ(ierr); 1244222ddf1SHong Zhang PetscFunctionReturn(0); 1254222ddf1SHong Zhang } 1264222ddf1SHong Zhang 1274222ddf1SHong Zhang #if defined(PETSC_HAVE_HYPRE) 1284222ddf1SHong Zhang ierr = PetscStrcmp(alg,"hypre",&flg);CHKERRQ(ierr); 1294222ddf1SHong Zhang if (flg) { 1304222ddf1SHong Zhang ierr = MatMatMultSymbolic_AIJ_AIJ_wHYPRE(A,B,fill,C);CHKERRQ(ierr); 1314222ddf1SHong Zhang PetscFunctionReturn(0); 1324222ddf1SHong Zhang } 1334222ddf1SHong Zhang #endif 1344222ddf1SHong Zhang 1354222ddf1SHong Zhang SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product Algorithm is not supported"); 1364222ddf1SHong Zhang PetscFunctionReturn(0); 1374222ddf1SHong Zhang } 1384222ddf1SHong Zhang 1394222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(Mat A,Mat B,PetscReal fill,Mat C) 140b561aa9dSHong Zhang { 141b561aa9dSHong Zhang PetscErrorCode ierr; 142b561aa9dSHong Zhang Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 143971236abSHong Zhang PetscInt *ai=a->i,*bi=b->i,*ci,*cj; 144c1ba5575SJed Brown PetscInt am =A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 145b561aa9dSHong Zhang PetscReal afill; 146eca6b86aSHong Zhang PetscInt i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax; 147eca6b86aSHong Zhang PetscTable ta; 148fb908031SHong Zhang PetscBT lnkbt; 1490298fd71SBarry Smith PetscFreeSpaceList free_space=NULL,current_space=NULL; 150b561aa9dSHong Zhang 151b561aa9dSHong Zhang PetscFunctionBegin; 152bd958071SHong Zhang /* Get ci and cj */ 153bd958071SHong Zhang /*---------------*/ 154fb908031SHong Zhang /* Allocate ci array, arrays for fill computation and */ 155fb908031SHong Zhang /* free space for accumulating nonzero column info */ 156854ce69bSBarry Smith ierr = PetscMalloc1(am+2,&ci);CHKERRQ(ierr); 157fb908031SHong Zhang ci[0] = 0; 158fb908031SHong Zhang 159fb908031SHong Zhang /* create and initialize a linked list */ 160c373ccc6SHong Zhang ierr = PetscTableCreate(bn,bn,&ta);CHKERRQ(ierr); 161c373ccc6SHong Zhang MatRowMergeMax_SeqAIJ(b,bm,ta); 162eca6b86aSHong Zhang ierr = PetscTableGetCount(ta,&Crmax);CHKERRQ(ierr); 163eca6b86aSHong Zhang ierr = PetscTableDestroy(&ta);CHKERRQ(ierr); 164eca6b86aSHong Zhang 165eca6b86aSHong Zhang ierr = PetscLLCondensedCreate(Crmax,bn,&lnk,&lnkbt);CHKERRQ(ierr); 166fb908031SHong Zhang 167fb908031SHong Zhang /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 168f91af8c7SBarry Smith ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);CHKERRQ(ierr); 1692205254eSKarl Rupp 170fb908031SHong Zhang current_space = free_space; 171fb908031SHong Zhang 172fb908031SHong Zhang /* Determine ci and cj */ 173fb908031SHong Zhang for (i=0; i<am; i++) { 174fb908031SHong Zhang anzi = ai[i+1] - ai[i]; 175fb908031SHong Zhang aj = a->j + ai[i]; 176fb908031SHong Zhang for (j=0; j<anzi; j++) { 177fb908031SHong Zhang brow = aj[j]; 178fb908031SHong Zhang bnzj = bi[brow+1] - bi[brow]; 179fb908031SHong Zhang bj = b->j + bi[brow]; 180fb908031SHong Zhang /* add non-zero cols of B into the sorted linked list lnk */ 181fb908031SHong Zhang ierr = PetscLLCondensedAddSorted(bnzj,bj,lnk,lnkbt);CHKERRQ(ierr); 182fb908031SHong Zhang } 183fb908031SHong Zhang cnzi = lnk[0]; 184fb908031SHong Zhang 185fb908031SHong Zhang /* If free space is not available, make more free space */ 186fb908031SHong Zhang /* Double the amount of total space in the list */ 187fb908031SHong Zhang if (current_space->local_remaining<cnzi) { 188f91af8c7SBarry Smith ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 189fb908031SHong Zhang ndouble++; 190fb908031SHong Zhang } 191fb908031SHong Zhang 192fb908031SHong Zhang /* Copy data into free space, then initialize lnk */ 193fb908031SHong Zhang ierr = PetscLLCondensedClean(bn,cnzi,current_space->array,lnk,lnkbt);CHKERRQ(ierr); 1942205254eSKarl Rupp 195fb908031SHong Zhang current_space->array += cnzi; 196fb908031SHong Zhang current_space->local_used += cnzi; 197fb908031SHong Zhang current_space->local_remaining -= cnzi; 1982205254eSKarl Rupp 199fb908031SHong Zhang ci[i+1] = ci[i] + cnzi; 200fb908031SHong Zhang } 201fb908031SHong Zhang 202fb908031SHong Zhang /* Column indices are in the list of free space */ 203fb908031SHong Zhang /* Allocate space for cj, initialize cj, and */ 204fb908031SHong Zhang /* destroy list of free space and other temporary array(s) */ 205854ce69bSBarry Smith ierr = PetscMalloc1(ci[am]+1,&cj);CHKERRQ(ierr); 206fb908031SHong Zhang ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 207fb908031SHong Zhang ierr = PetscLLCondensedDestroy(lnk,lnkbt);CHKERRQ(ierr); 208b561aa9dSHong Zhang 20926be0446SHong Zhang /* put together the new symbolic matrix */ 210e4e71118SStefano Zampini ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C);CHKERRQ(ierr); 2114222ddf1SHong Zhang ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 21258c24d83SHong Zhang 21358c24d83SHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 21458c24d83SHong Zhang /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 2154222ddf1SHong Zhang c = (Mat_SeqAIJ*)(C->data); 216aa1aec99SHong Zhang c->free_a = PETSC_FALSE; 217e6b907acSBarry Smith c->free_ij = PETSC_TRUE; 21858c24d83SHong Zhang c->nonew = 0; 2194222ddf1SHong Zhang 2204222ddf1SHong Zhang /* fast, needs non-scalable O(bn) array 'abdense' */ 2214222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 2220b7e3e3dSHong Zhang 2237212da7cSHong Zhang /* set MatInfo */ 2247212da7cSHong Zhang afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 225f2b054eeSHong Zhang if (afill < 1.0) afill = 1.0; 2267212da7cSHong Zhang c->maxnz = ci[am]; 2277212da7cSHong Zhang c->nz = ci[am]; 2284222ddf1SHong Zhang C->info.mallocs = ndouble; 2294222ddf1SHong Zhang C->info.fill_ratio_given = fill; 2304222ddf1SHong Zhang C->info.fill_ratio_needed = afill; 2317212da7cSHong Zhang 2327212da7cSHong Zhang #if defined(PETSC_USE_INFO) 2337212da7cSHong Zhang if (ci[am]) { 2344222ddf1SHong Zhang ierr = PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);CHKERRQ(ierr); 2354222ddf1SHong Zhang ierr = PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);CHKERRQ(ierr); 236f2b054eeSHong Zhang } else { 2374222ddf1SHong Zhang ierr = PetscInfo(C,"Empty matrix product\n");CHKERRQ(ierr); 238be0fcf8dSHong Zhang } 239f2b054eeSHong Zhang #endif 24058c24d83SHong Zhang PetscFunctionReturn(0); 24158c24d83SHong Zhang } 242d50806bdSBarry Smith 243df97dc6dSFande Kong PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted(Mat A,Mat B,Mat C) 244d50806bdSBarry Smith { 245dfbe8321SBarry Smith PetscErrorCode ierr; 246d13dce4bSSatish Balay PetscLogDouble flops=0.0; 247d50806bdSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 248d50806bdSBarry Smith Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 249d50806bdSBarry Smith Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 25038baddfdSBarry Smith PetscInt *ai =a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 251c58ca2e3SHong Zhang PetscInt am =A->rmap->n,cm=C->rmap->n; 252519eb980SHong Zhang PetscInt i,j,k,anzi,bnzi,cnzi,brow; 253aa1aec99SHong Zhang PetscScalar *aa=a->a,*ba=b->a,*baj,*ca,valtmp; 254aa1aec99SHong Zhang PetscScalar *ab_dense; 2556718818eSStefano Zampini PetscContainer cab_dense; 256d50806bdSBarry Smith 257d50806bdSBarry Smith PetscFunctionBegin; 258aa1aec99SHong Zhang if (!c->a) { /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */ 259854ce69bSBarry Smith ierr = PetscMalloc1(ci[cm]+1,&ca);CHKERRQ(ierr); 260aa1aec99SHong Zhang c->a = ca; 261aa1aec99SHong Zhang c->free_a = PETSC_TRUE; 2626718818eSStefano Zampini } else ca = c->a; 2636718818eSStefano Zampini 2646718818eSStefano Zampini /* TODO this should be done in the symbolic phase */ 2656718818eSStefano Zampini /* However, this function is so heavily used (sometimes in an hidden way through multnumeric function pointers 2666718818eSStefano Zampini that is hard to eradicate) */ 2676718818eSStefano Zampini ierr = PetscObjectQuery((PetscObject)C,"__PETSc__ab_dense",(PetscObject*)&cab_dense);CHKERRQ(ierr); 2686718818eSStefano Zampini if (!cab_dense) { 2696718818eSStefano Zampini ierr = PetscMalloc1(B->cmap->N,&ab_dense);CHKERRQ(ierr); 2706718818eSStefano Zampini ierr = PetscContainerCreate(PETSC_COMM_SELF,&cab_dense);CHKERRQ(ierr); 2716718818eSStefano Zampini ierr = PetscContainerSetPointer(cab_dense,ab_dense);CHKERRQ(ierr); 2726718818eSStefano Zampini ierr = PetscContainerSetUserDestroy(cab_dense,PetscContainerUserDestroyDefault);CHKERRQ(ierr); 2736718818eSStefano Zampini ierr = PetscObjectCompose((PetscObject)C,"__PETSc__ab_dense",(PetscObject)cab_dense);CHKERRQ(ierr); 2746718818eSStefano Zampini ierr = PetscObjectDereference((PetscObject)cab_dense);CHKERRQ(ierr); 275d90d86d0SMatthew G. Knepley } 2766718818eSStefano Zampini ierr = PetscContainerGetPointer(cab_dense,(void**)&ab_dense);CHKERRQ(ierr); 2776718818eSStefano Zampini ierr = PetscArrayzero(ab_dense,B->cmap->N);CHKERRQ(ierr); 278aa1aec99SHong Zhang 279c124e916SHong Zhang /* clean old values in C */ 280580bdb30SBarry Smith ierr = PetscArrayzero(ca,ci[cm]);CHKERRQ(ierr); 281d50806bdSBarry Smith /* Traverse A row-wise. */ 282d50806bdSBarry Smith /* Build the ith row in C by summing over nonzero columns in A, */ 283d50806bdSBarry Smith /* the rows of B corresponding to nonzeros of A. */ 284d50806bdSBarry Smith for (i=0; i<am; i++) { 285d50806bdSBarry Smith anzi = ai[i+1] - ai[i]; 286d50806bdSBarry Smith for (j=0; j<anzi; j++) { 287519eb980SHong Zhang brow = aj[j]; 288d50806bdSBarry Smith bnzi = bi[brow+1] - bi[brow]; 289d50806bdSBarry Smith bjj = bj + bi[brow]; 290d50806bdSBarry Smith baj = ba + bi[brow]; 291519eb980SHong Zhang /* perform dense axpy */ 29236ec6d2dSHong Zhang valtmp = aa[j]; 293519eb980SHong Zhang for (k=0; k<bnzi; k++) { 29436ec6d2dSHong Zhang ab_dense[bjj[k]] += valtmp*baj[k]; 295519eb980SHong Zhang } 296519eb980SHong Zhang flops += 2*bnzi; 297519eb980SHong Zhang } 298c58ca2e3SHong Zhang aj += anzi; aa += anzi; 299c58ca2e3SHong Zhang 300c58ca2e3SHong Zhang cnzi = ci[i+1] - ci[i]; 301519eb980SHong Zhang for (k=0; k<cnzi; k++) { 302519eb980SHong Zhang ca[k] += ab_dense[cj[k]]; 303519eb980SHong Zhang ab_dense[cj[k]] = 0.0; /* zero ab_dense */ 304519eb980SHong Zhang } 305519eb980SHong Zhang flops += cnzi; 306519eb980SHong Zhang cj += cnzi; ca += cnzi; 307519eb980SHong Zhang } 308c58ca2e3SHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 309c58ca2e3SHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 310c58ca2e3SHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 311c58ca2e3SHong Zhang PetscFunctionReturn(0); 312c58ca2e3SHong Zhang } 313c58ca2e3SHong Zhang 31425023028SHong Zhang PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,Mat C) 315c58ca2e3SHong Zhang { 316c58ca2e3SHong Zhang PetscErrorCode ierr; 317c58ca2e3SHong Zhang PetscLogDouble flops=0.0; 318c58ca2e3SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 319c58ca2e3SHong Zhang Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 320c58ca2e3SHong Zhang Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 321c58ca2e3SHong Zhang PetscInt *ai = a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 322c58ca2e3SHong Zhang PetscInt am = A->rmap->N,cm=C->rmap->N; 323c58ca2e3SHong Zhang PetscInt i,j,k,anzi,bnzi,cnzi,brow; 32436ec6d2dSHong Zhang PetscScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,valtmp; 325c58ca2e3SHong Zhang PetscInt nextb; 326c58ca2e3SHong Zhang 327c58ca2e3SHong Zhang PetscFunctionBegin; 328cf742fccSHong Zhang if (!ca) { /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */ 329cf742fccSHong Zhang ierr = PetscMalloc1(ci[cm]+1,&ca);CHKERRQ(ierr); 330cf742fccSHong Zhang c->a = ca; 331cf742fccSHong Zhang c->free_a = PETSC_TRUE; 332cf742fccSHong Zhang } 333cf742fccSHong Zhang 334c58ca2e3SHong Zhang /* clean old values in C */ 335580bdb30SBarry Smith ierr = PetscArrayzero(ca,ci[cm]);CHKERRQ(ierr); 336c58ca2e3SHong Zhang /* Traverse A row-wise. */ 337c58ca2e3SHong Zhang /* Build the ith row in C by summing over nonzero columns in A, */ 338c58ca2e3SHong Zhang /* the rows of B corresponding to nonzeros of A. */ 339519eb980SHong Zhang for (i=0; i<am; i++) { 340519eb980SHong Zhang anzi = ai[i+1] - ai[i]; 341519eb980SHong Zhang cnzi = ci[i+1] - ci[i]; 342519eb980SHong Zhang for (j=0; j<anzi; j++) { 343519eb980SHong Zhang brow = aj[j]; 344519eb980SHong Zhang bnzi = bi[brow+1] - bi[brow]; 345519eb980SHong Zhang bjj = bj + bi[brow]; 346519eb980SHong Zhang baj = ba + bi[brow]; 347519eb980SHong Zhang /* perform sparse axpy */ 34836ec6d2dSHong Zhang valtmp = aa[j]; 349c124e916SHong Zhang nextb = 0; 350c124e916SHong Zhang for (k=0; nextb<bnzi; k++) { 351c124e916SHong Zhang if (cj[k] == bjj[nextb]) { /* ccol == bcol */ 35236ec6d2dSHong Zhang ca[k] += valtmp*baj[nextb++]; 353c124e916SHong Zhang } 354d50806bdSBarry Smith } 355d50806bdSBarry Smith flops += 2*bnzi; 356d50806bdSBarry Smith } 357519eb980SHong Zhang aj += anzi; aa += anzi; 358519eb980SHong Zhang cj += cnzi; ca += cnzi; 359d50806bdSBarry Smith } 360716bacf3SKris Buschelman ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 361716bacf3SKris Buschelman ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 362d50806bdSBarry Smith ierr = PetscLogFlops(flops);CHKERRQ(ierr); 363d50806bdSBarry Smith PetscFunctionReturn(0); 364d50806bdSBarry Smith } 365bc011b1eSHong Zhang 3664222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(Mat A,Mat B,PetscReal fill,Mat C) 36725296bd5SBarry Smith { 36825296bd5SBarry Smith PetscErrorCode ierr; 36925296bd5SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 37025296bd5SBarry Smith PetscInt *ai = a->i,*bi=b->i,*ci,*cj; 3713c50cad2SHong Zhang PetscInt am = A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 37225296bd5SBarry Smith MatScalar *ca; 37325296bd5SBarry Smith PetscReal afill; 374eca6b86aSHong Zhang PetscInt i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax; 375eca6b86aSHong Zhang PetscTable ta; 3760298fd71SBarry Smith PetscFreeSpaceList free_space=NULL,current_space=NULL; 37725296bd5SBarry Smith 37825296bd5SBarry Smith PetscFunctionBegin; 3793c50cad2SHong Zhang /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_fast() */ 3803c50cad2SHong Zhang /*-----------------------------------------------------------------------------------------*/ 3813c50cad2SHong Zhang /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 382854ce69bSBarry Smith ierr = PetscMalloc1(am+2,&ci);CHKERRQ(ierr); 3833c50cad2SHong Zhang ci[0] = 0; 3843c50cad2SHong Zhang 3853c50cad2SHong Zhang /* create and initialize a linked list */ 386c373ccc6SHong Zhang ierr = PetscTableCreate(bn,bn,&ta);CHKERRQ(ierr); 387c373ccc6SHong Zhang MatRowMergeMax_SeqAIJ(b,bm,ta); 388eca6b86aSHong Zhang ierr = PetscTableGetCount(ta,&Crmax);CHKERRQ(ierr); 389eca6b86aSHong Zhang ierr = PetscTableDestroy(&ta);CHKERRQ(ierr); 390eca6b86aSHong Zhang 391eca6b86aSHong Zhang ierr = PetscLLCondensedCreate_fast(Crmax,&lnk);CHKERRQ(ierr); 3923c50cad2SHong Zhang 3933c50cad2SHong Zhang /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 394f91af8c7SBarry Smith ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);CHKERRQ(ierr); 3953c50cad2SHong Zhang current_space = free_space; 3963c50cad2SHong Zhang 3973c50cad2SHong Zhang /* Determine ci and cj */ 3983c50cad2SHong Zhang for (i=0; i<am; i++) { 3993c50cad2SHong Zhang anzi = ai[i+1] - ai[i]; 4003c50cad2SHong Zhang aj = a->j + ai[i]; 4013c50cad2SHong Zhang for (j=0; j<anzi; j++) { 4023c50cad2SHong Zhang brow = aj[j]; 4033c50cad2SHong Zhang bnzj = bi[brow+1] - bi[brow]; 4043c50cad2SHong Zhang bj = b->j + bi[brow]; 4053c50cad2SHong Zhang /* add non-zero cols of B into the sorted linked list lnk */ 4063c50cad2SHong Zhang ierr = PetscLLCondensedAddSorted_fast(bnzj,bj,lnk);CHKERRQ(ierr); 4073c50cad2SHong Zhang } 4083c50cad2SHong Zhang cnzi = lnk[1]; 4093c50cad2SHong Zhang 4103c50cad2SHong Zhang /* If free space is not available, make more free space */ 4113c50cad2SHong Zhang /* Double the amount of total space in the list */ 4123c50cad2SHong Zhang if (current_space->local_remaining<cnzi) { 413f91af8c7SBarry Smith ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4143c50cad2SHong Zhang ndouble++; 4153c50cad2SHong Zhang } 4163c50cad2SHong Zhang 4173c50cad2SHong Zhang /* Copy data into free space, then initialize lnk */ 4183c50cad2SHong Zhang ierr = PetscLLCondensedClean_fast(cnzi,current_space->array,lnk);CHKERRQ(ierr); 4192205254eSKarl Rupp 4203c50cad2SHong Zhang current_space->array += cnzi; 4213c50cad2SHong Zhang current_space->local_used += cnzi; 4223c50cad2SHong Zhang current_space->local_remaining -= cnzi; 4232205254eSKarl Rupp 4243c50cad2SHong Zhang ci[i+1] = ci[i] + cnzi; 4253c50cad2SHong Zhang } 4263c50cad2SHong Zhang 4273c50cad2SHong Zhang /* Column indices are in the list of free space */ 4283c50cad2SHong Zhang /* Allocate space for cj, initialize cj, and */ 4293c50cad2SHong Zhang /* destroy list of free space and other temporary array(s) */ 430854ce69bSBarry Smith ierr = PetscMalloc1(ci[am]+1,&cj);CHKERRQ(ierr); 4313c50cad2SHong Zhang ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 4323c50cad2SHong Zhang ierr = PetscLLCondensedDestroy_fast(lnk);CHKERRQ(ierr); 43325296bd5SBarry Smith 43425296bd5SBarry Smith /* Allocate space for ca */ 435580bdb30SBarry Smith ierr = PetscCalloc1(ci[am]+1,&ca);CHKERRQ(ierr); 43625296bd5SBarry Smith 43725296bd5SBarry Smith /* put together the new symbolic matrix */ 438e4e71118SStefano Zampini ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,ca,((PetscObject)A)->type_name,C);CHKERRQ(ierr); 4394222ddf1SHong Zhang ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 44025296bd5SBarry Smith 44125296bd5SBarry Smith /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 44225296bd5SBarry Smith /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 4434222ddf1SHong Zhang c = (Mat_SeqAIJ*)(C->data); 44425296bd5SBarry Smith c->free_a = PETSC_TRUE; 44525296bd5SBarry Smith c->free_ij = PETSC_TRUE; 44625296bd5SBarry Smith c->nonew = 0; 4472205254eSKarl Rupp 4484222ddf1SHong Zhang /* slower, less memory */ 4494222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; 45025296bd5SBarry Smith 45125296bd5SBarry Smith /* set MatInfo */ 45225296bd5SBarry Smith afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 45325296bd5SBarry Smith if (afill < 1.0) afill = 1.0; 45425296bd5SBarry Smith c->maxnz = ci[am]; 45525296bd5SBarry Smith c->nz = ci[am]; 4564222ddf1SHong Zhang C->info.mallocs = ndouble; 4574222ddf1SHong Zhang C->info.fill_ratio_given = fill; 4584222ddf1SHong Zhang C->info.fill_ratio_needed = afill; 45925296bd5SBarry Smith 46025296bd5SBarry Smith #if defined(PETSC_USE_INFO) 46125296bd5SBarry Smith if (ci[am]) { 4624222ddf1SHong Zhang ierr = PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);CHKERRQ(ierr); 4634222ddf1SHong Zhang ierr = PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);CHKERRQ(ierr); 46425296bd5SBarry Smith } else { 4654222ddf1SHong Zhang ierr = PetscInfo(C,"Empty matrix product\n");CHKERRQ(ierr); 46625296bd5SBarry Smith } 46725296bd5SBarry Smith #endif 46825296bd5SBarry Smith PetscFunctionReturn(0); 46925296bd5SBarry Smith } 47025296bd5SBarry Smith 4714222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,PetscReal fill,Mat C) 472e9e4536cSHong Zhang { 473e9e4536cSHong Zhang PetscErrorCode ierr; 474e9e4536cSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 475bf9555e6SHong Zhang PetscInt *ai = a->i,*bi=b->i,*ci,*cj; 47625c41797SHong Zhang PetscInt am = A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 477e9e4536cSHong Zhang MatScalar *ca; 478e9e4536cSHong Zhang PetscReal afill; 479eca6b86aSHong Zhang PetscInt i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax; 480eca6b86aSHong Zhang PetscTable ta; 4810298fd71SBarry Smith PetscFreeSpaceList free_space=NULL,current_space=NULL; 482e9e4536cSHong Zhang 483e9e4536cSHong Zhang PetscFunctionBegin; 484bd958071SHong Zhang /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_Scalalbe() */ 485bd958071SHong Zhang /*---------------------------------------------------------------------------------------------*/ 486bd958071SHong Zhang /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 487854ce69bSBarry Smith ierr = PetscMalloc1(am+2,&ci);CHKERRQ(ierr); 488bd958071SHong Zhang ci[0] = 0; 489bd958071SHong Zhang 490bd958071SHong Zhang /* create and initialize a linked list */ 491c373ccc6SHong Zhang ierr = PetscTableCreate(bn,bn,&ta);CHKERRQ(ierr); 492c373ccc6SHong Zhang MatRowMergeMax_SeqAIJ(b,bm,ta); 493eca6b86aSHong Zhang ierr = PetscTableGetCount(ta,&Crmax);CHKERRQ(ierr); 494eca6b86aSHong Zhang ierr = PetscTableDestroy(&ta);CHKERRQ(ierr); 495eca6b86aSHong Zhang ierr = PetscLLCondensedCreate_Scalable(Crmax,&lnk);CHKERRQ(ierr); 496bd958071SHong Zhang 497bd958071SHong Zhang /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 498f91af8c7SBarry Smith ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);CHKERRQ(ierr); 499bd958071SHong Zhang current_space = free_space; 500bd958071SHong Zhang 501bd958071SHong Zhang /* Determine ci and cj */ 502bd958071SHong Zhang for (i=0; i<am; i++) { 503bd958071SHong Zhang anzi = ai[i+1] - ai[i]; 504bd958071SHong Zhang aj = a->j + ai[i]; 505bd958071SHong Zhang for (j=0; j<anzi; j++) { 506bd958071SHong Zhang brow = aj[j]; 507bd958071SHong Zhang bnzj = bi[brow+1] - bi[brow]; 508bd958071SHong Zhang bj = b->j + bi[brow]; 509bd958071SHong Zhang /* add non-zero cols of B into the sorted linked list lnk */ 510bd958071SHong Zhang ierr = PetscLLCondensedAddSorted_Scalable(bnzj,bj,lnk);CHKERRQ(ierr); 511bd958071SHong Zhang } 512bd958071SHong Zhang cnzi = lnk[0]; 513bd958071SHong Zhang 514bd958071SHong Zhang /* If free space is not available, make more free space */ 515bd958071SHong Zhang /* Double the amount of total space in the list */ 516bd958071SHong Zhang if (current_space->local_remaining<cnzi) { 517f91af8c7SBarry Smith ierr = PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 518bd958071SHong Zhang ndouble++; 519bd958071SHong Zhang } 520bd958071SHong Zhang 521bd958071SHong Zhang /* Copy data into free space, then initialize lnk */ 522bd958071SHong Zhang ierr = PetscLLCondensedClean_Scalable(cnzi,current_space->array,lnk);CHKERRQ(ierr); 5232205254eSKarl Rupp 524bd958071SHong Zhang current_space->array += cnzi; 525bd958071SHong Zhang current_space->local_used += cnzi; 526bd958071SHong Zhang current_space->local_remaining -= cnzi; 5272205254eSKarl Rupp 528bd958071SHong Zhang ci[i+1] = ci[i] + cnzi; 529bd958071SHong Zhang } 530bd958071SHong Zhang 531bd958071SHong Zhang /* Column indices are in the list of free space */ 532bd958071SHong Zhang /* Allocate space for cj, initialize cj, and */ 533bd958071SHong Zhang /* destroy list of free space and other temporary array(s) */ 534854ce69bSBarry Smith ierr = PetscMalloc1(ci[am]+1,&cj);CHKERRQ(ierr); 535bd958071SHong Zhang ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 536bd958071SHong Zhang ierr = PetscLLCondensedDestroy_Scalable(lnk);CHKERRQ(ierr); 537e9e4536cSHong Zhang 538e9e4536cSHong Zhang /* Allocate space for ca */ 539bd958071SHong Zhang /*-----------------------*/ 540580bdb30SBarry Smith ierr = PetscCalloc1(ci[am]+1,&ca);CHKERRQ(ierr); 541e9e4536cSHong Zhang 542e9e4536cSHong Zhang /* put together the new symbolic matrix */ 543e4e71118SStefano Zampini ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,ca,((PetscObject)A)->type_name,C);CHKERRQ(ierr); 5444222ddf1SHong Zhang ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 545e9e4536cSHong Zhang 546e9e4536cSHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 547e9e4536cSHong Zhang /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 5484222ddf1SHong Zhang c = (Mat_SeqAIJ*)(C->data); 549e9e4536cSHong Zhang c->free_a = PETSC_TRUE; 550e9e4536cSHong Zhang c->free_ij = PETSC_TRUE; 551e9e4536cSHong Zhang c->nonew = 0; 5522205254eSKarl Rupp 5534222ddf1SHong Zhang /* slower, less memory */ 5544222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; 555e9e4536cSHong Zhang 556e9e4536cSHong Zhang /* set MatInfo */ 557e9e4536cSHong Zhang afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 558e9e4536cSHong Zhang if (afill < 1.0) afill = 1.0; 559e9e4536cSHong Zhang c->maxnz = ci[am]; 560e9e4536cSHong Zhang c->nz = ci[am]; 5614222ddf1SHong Zhang C->info.mallocs = ndouble; 5624222ddf1SHong Zhang C->info.fill_ratio_given = fill; 5634222ddf1SHong Zhang C->info.fill_ratio_needed = afill; 564e9e4536cSHong Zhang 565e9e4536cSHong Zhang #if defined(PETSC_USE_INFO) 566e9e4536cSHong Zhang if (ci[am]) { 5674222ddf1SHong Zhang ierr = PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);CHKERRQ(ierr); 5684222ddf1SHong Zhang ierr = PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);CHKERRQ(ierr); 569e9e4536cSHong Zhang } else { 5704222ddf1SHong Zhang ierr = PetscInfo(C,"Empty matrix product\n");CHKERRQ(ierr); 571e9e4536cSHong Zhang } 572e9e4536cSHong Zhang #endif 573e9e4536cSHong Zhang PetscFunctionReturn(0); 574e9e4536cSHong Zhang } 575e9e4536cSHong Zhang 5764222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(Mat A,Mat B,PetscReal fill,Mat C) 5770ced3a2bSJed Brown { 5780ced3a2bSJed Brown PetscErrorCode ierr; 5790ced3a2bSJed Brown Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 5800ced3a2bSJed Brown const PetscInt *ai=a->i,*bi=b->i,*aj=a->j,*bj=b->j; 5810ced3a2bSJed Brown PetscInt *ci,*cj,*bb; 5820ced3a2bSJed Brown PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 5830ced3a2bSJed Brown PetscReal afill; 5840ced3a2bSJed Brown PetscInt i,j,col,ndouble = 0; 5850298fd71SBarry Smith PetscFreeSpaceList free_space=NULL,current_space=NULL; 5860ced3a2bSJed Brown PetscHeap h; 5870ced3a2bSJed Brown 5880ced3a2bSJed Brown PetscFunctionBegin; 589cd093f1dSJed Brown /* Get ci and cj - by merging sorted rows using a heap */ 5900ced3a2bSJed Brown /*---------------------------------------------------------------------------------------------*/ 5910ced3a2bSJed Brown /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 592854ce69bSBarry Smith ierr = PetscMalloc1(am+2,&ci);CHKERRQ(ierr); 5930ced3a2bSJed Brown ci[0] = 0; 5940ced3a2bSJed Brown 5950ced3a2bSJed Brown /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 596f91af8c7SBarry Smith ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);CHKERRQ(ierr); 5970ced3a2bSJed Brown current_space = free_space; 5980ced3a2bSJed Brown 5990ced3a2bSJed Brown ierr = PetscHeapCreate(a->rmax,&h);CHKERRQ(ierr); 600785e854fSJed Brown ierr = PetscMalloc1(a->rmax,&bb);CHKERRQ(ierr); 6010ced3a2bSJed Brown 6020ced3a2bSJed Brown /* Determine ci and cj */ 6030ced3a2bSJed Brown for (i=0; i<am; i++) { 6040ced3a2bSJed Brown const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 6050ced3a2bSJed Brown const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 6060ced3a2bSJed Brown ci[i+1] = ci[i]; 6070ced3a2bSJed Brown /* Populate the min heap */ 6080ced3a2bSJed Brown for (j=0; j<anzi; j++) { 6090ced3a2bSJed Brown bb[j] = bi[acol[j]]; /* bb points at the start of the row */ 6100ced3a2bSJed Brown if (bb[j] < bi[acol[j]+1]) { /* Add if row is nonempty */ 6110ced3a2bSJed Brown ierr = PetscHeapAdd(h,j,bj[bb[j]++]);CHKERRQ(ierr); 6120ced3a2bSJed Brown } 6130ced3a2bSJed Brown } 6140ced3a2bSJed Brown /* Pick off the min element, adding it to free space */ 6150ced3a2bSJed Brown ierr = PetscHeapPop(h,&j,&col);CHKERRQ(ierr); 6160ced3a2bSJed Brown while (j >= 0) { 6170ced3a2bSJed Brown if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */ 618f91af8c7SBarry Smith ierr = PetscFreeSpaceGet(PetscMin(PetscIntMultTruncate(2,current_space->total_array_size),16 << 20),¤t_space);CHKERRQ(ierr); 6190ced3a2bSJed Brown ndouble++; 6200ced3a2bSJed Brown } 6210ced3a2bSJed Brown *(current_space->array++) = col; 6220ced3a2bSJed Brown current_space->local_used++; 6230ced3a2bSJed Brown current_space->local_remaining--; 6240ced3a2bSJed Brown ci[i+1]++; 6250ced3a2bSJed Brown 6260ced3a2bSJed Brown /* stash if anything else remains in this row of B */ 6270ced3a2bSJed Brown if (bb[j] < bi[acol[j]+1]) {ierr = PetscHeapStash(h,j,bj[bb[j]++]);CHKERRQ(ierr);} 6280ced3a2bSJed Brown while (1) { /* pop and stash any other rows of B that also had an entry in this column */ 6290ced3a2bSJed Brown PetscInt j2,col2; 6300ced3a2bSJed Brown ierr = PetscHeapPeek(h,&j2,&col2);CHKERRQ(ierr); 6310ced3a2bSJed Brown if (col2 != col) break; 6320ced3a2bSJed Brown ierr = PetscHeapPop(h,&j2,&col2);CHKERRQ(ierr); 6330ced3a2bSJed Brown if (bb[j2] < bi[acol[j2]+1]) {ierr = PetscHeapStash(h,j2,bj[bb[j2]++]);CHKERRQ(ierr);} 6340ced3a2bSJed Brown } 6350ced3a2bSJed Brown /* Put any stashed elements back into the min heap */ 6360ced3a2bSJed Brown ierr = PetscHeapUnstash(h);CHKERRQ(ierr); 6370ced3a2bSJed Brown ierr = PetscHeapPop(h,&j,&col);CHKERRQ(ierr); 6380ced3a2bSJed Brown } 6390ced3a2bSJed Brown } 6400ced3a2bSJed Brown ierr = PetscFree(bb);CHKERRQ(ierr); 6410ced3a2bSJed Brown ierr = PetscHeapDestroy(&h);CHKERRQ(ierr); 6420ced3a2bSJed Brown 6430ced3a2bSJed Brown /* Column indices are in the list of free space */ 6440ced3a2bSJed Brown /* Allocate space for cj, initialize cj, and */ 6450ced3a2bSJed Brown /* destroy list of free space and other temporary array(s) */ 646785e854fSJed Brown ierr = PetscMalloc1(ci[am],&cj);CHKERRQ(ierr); 6470ced3a2bSJed Brown ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 6480ced3a2bSJed Brown 6490ced3a2bSJed Brown /* put together the new symbolic matrix */ 650e4e71118SStefano Zampini ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C);CHKERRQ(ierr); 6514222ddf1SHong Zhang ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 6520ced3a2bSJed Brown 6530ced3a2bSJed Brown /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6540ced3a2bSJed Brown /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 6554222ddf1SHong Zhang c = (Mat_SeqAIJ*)(C->data); 6560ced3a2bSJed Brown c->free_a = PETSC_TRUE; 6570ced3a2bSJed Brown c->free_ij = PETSC_TRUE; 6580ced3a2bSJed Brown c->nonew = 0; 65926fbe8dcSKarl Rupp 6604222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 6610ced3a2bSJed Brown 6620ced3a2bSJed Brown /* set MatInfo */ 6630ced3a2bSJed Brown afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 6640ced3a2bSJed Brown if (afill < 1.0) afill = 1.0; 6650ced3a2bSJed Brown c->maxnz = ci[am]; 6660ced3a2bSJed Brown c->nz = ci[am]; 6674222ddf1SHong Zhang C->info.mallocs = ndouble; 6684222ddf1SHong Zhang C->info.fill_ratio_given = fill; 6694222ddf1SHong Zhang C->info.fill_ratio_needed = afill; 6700ced3a2bSJed Brown 6710ced3a2bSJed Brown #if defined(PETSC_USE_INFO) 6720ced3a2bSJed Brown if (ci[am]) { 6734222ddf1SHong Zhang ierr = PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);CHKERRQ(ierr); 6744222ddf1SHong Zhang ierr = PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);CHKERRQ(ierr); 6750ced3a2bSJed Brown } else { 6764222ddf1SHong Zhang ierr = PetscInfo(C,"Empty matrix product\n");CHKERRQ(ierr); 6770ced3a2bSJed Brown } 6780ced3a2bSJed Brown #endif 6790ced3a2bSJed Brown PetscFunctionReturn(0); 6800ced3a2bSJed Brown } 681e9e4536cSHong Zhang 6824222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(Mat A,Mat B,PetscReal fill,Mat C) 6838a07c6f1SJed Brown { 6848a07c6f1SJed Brown PetscErrorCode ierr; 6858a07c6f1SJed Brown Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 6868a07c6f1SJed Brown const PetscInt *ai = a->i,*bi=b->i,*aj=a->j,*bj=b->j; 6878a07c6f1SJed Brown PetscInt *ci,*cj,*bb; 6888a07c6f1SJed Brown PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 6898a07c6f1SJed Brown PetscReal afill; 6908a07c6f1SJed Brown PetscInt i,j,col,ndouble = 0; 6910298fd71SBarry Smith PetscFreeSpaceList free_space=NULL,current_space=NULL; 6928a07c6f1SJed Brown PetscHeap h; 6938a07c6f1SJed Brown PetscBT bt; 6948a07c6f1SJed Brown 6958a07c6f1SJed Brown PetscFunctionBegin; 696cd093f1dSJed Brown /* Get ci and cj - using a heap for the sorted rows, but use BT so that each index is only added once */ 6978a07c6f1SJed Brown /*---------------------------------------------------------------------------------------------*/ 6988a07c6f1SJed Brown /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 699854ce69bSBarry Smith ierr = PetscMalloc1(am+2,&ci);CHKERRQ(ierr); 7008a07c6f1SJed Brown ci[0] = 0; 7018a07c6f1SJed Brown 7028a07c6f1SJed Brown /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 703f91af8c7SBarry Smith ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);CHKERRQ(ierr); 7042205254eSKarl Rupp 7058a07c6f1SJed Brown current_space = free_space; 7068a07c6f1SJed Brown 7078a07c6f1SJed Brown ierr = PetscHeapCreate(a->rmax,&h);CHKERRQ(ierr); 708785e854fSJed Brown ierr = PetscMalloc1(a->rmax,&bb);CHKERRQ(ierr); 7098a07c6f1SJed Brown ierr = PetscBTCreate(bn,&bt);CHKERRQ(ierr); 7108a07c6f1SJed Brown 7118a07c6f1SJed Brown /* Determine ci and cj */ 7128a07c6f1SJed Brown for (i=0; i<am; i++) { 7138a07c6f1SJed Brown const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 7148a07c6f1SJed Brown const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 7158a07c6f1SJed Brown const PetscInt *fptr = current_space->array; /* Save beginning of the row so we can clear the BT later */ 7168a07c6f1SJed Brown ci[i+1] = ci[i]; 7178a07c6f1SJed Brown /* Populate the min heap */ 7188a07c6f1SJed Brown for (j=0; j<anzi; j++) { 7198a07c6f1SJed Brown PetscInt brow = acol[j]; 7208a07c6f1SJed Brown for (bb[j] = bi[brow]; bb[j] < bi[brow+1]; bb[j]++) { 7218a07c6f1SJed Brown PetscInt bcol = bj[bb[j]]; 7228a07c6f1SJed Brown if (!PetscBTLookupSet(bt,bcol)) { /* new entry */ 7238a07c6f1SJed Brown ierr = PetscHeapAdd(h,j,bcol);CHKERRQ(ierr); 7248a07c6f1SJed Brown bb[j]++; 7258a07c6f1SJed Brown break; 7268a07c6f1SJed Brown } 7278a07c6f1SJed Brown } 7288a07c6f1SJed Brown } 7298a07c6f1SJed Brown /* Pick off the min element, adding it to free space */ 7308a07c6f1SJed Brown ierr = PetscHeapPop(h,&j,&col);CHKERRQ(ierr); 7318a07c6f1SJed Brown while (j >= 0) { 7328a07c6f1SJed Brown if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */ 7330298fd71SBarry Smith fptr = NULL; /* need PetscBTMemzero */ 734f91af8c7SBarry Smith ierr = PetscFreeSpaceGet(PetscMin(PetscIntMultTruncate(2,current_space->total_array_size),16 << 20),¤t_space);CHKERRQ(ierr); 7358a07c6f1SJed Brown ndouble++; 7368a07c6f1SJed Brown } 7378a07c6f1SJed Brown *(current_space->array++) = col; 7388a07c6f1SJed Brown current_space->local_used++; 7398a07c6f1SJed Brown current_space->local_remaining--; 7408a07c6f1SJed Brown ci[i+1]++; 7418a07c6f1SJed Brown 7428a07c6f1SJed Brown /* stash if anything else remains in this row of B */ 7438a07c6f1SJed Brown for (; bb[j] < bi[acol[j]+1]; bb[j]++) { 7448a07c6f1SJed Brown PetscInt bcol = bj[bb[j]]; 7458a07c6f1SJed Brown if (!PetscBTLookupSet(bt,bcol)) { /* new entry */ 7468a07c6f1SJed Brown ierr = PetscHeapAdd(h,j,bcol);CHKERRQ(ierr); 7478a07c6f1SJed Brown bb[j]++; 7488a07c6f1SJed Brown break; 7498a07c6f1SJed Brown } 7508a07c6f1SJed Brown } 7518a07c6f1SJed Brown ierr = PetscHeapPop(h,&j,&col);CHKERRQ(ierr); 7528a07c6f1SJed Brown } 7538a07c6f1SJed Brown if (fptr) { /* Clear the bits for this row */ 7548a07c6f1SJed Brown for (; fptr<current_space->array; fptr++) {ierr = PetscBTClear(bt,*fptr);CHKERRQ(ierr);} 7558a07c6f1SJed Brown } else { /* We reallocated so we don't remember (easily) how to clear only the bits we changed */ 7568a07c6f1SJed Brown ierr = PetscBTMemzero(bn,bt);CHKERRQ(ierr); 7578a07c6f1SJed Brown } 7588a07c6f1SJed Brown } 7598a07c6f1SJed Brown ierr = PetscFree(bb);CHKERRQ(ierr); 7608a07c6f1SJed Brown ierr = PetscHeapDestroy(&h);CHKERRQ(ierr); 7618a07c6f1SJed Brown ierr = PetscBTDestroy(&bt);CHKERRQ(ierr); 7628a07c6f1SJed Brown 7638a07c6f1SJed Brown /* Column indices are in the list of free space */ 7648a07c6f1SJed Brown /* Allocate space for cj, initialize cj, and */ 7658a07c6f1SJed Brown /* destroy list of free space and other temporary array(s) */ 766785e854fSJed Brown ierr = PetscMalloc1(ci[am],&cj);CHKERRQ(ierr); 7678a07c6f1SJed Brown ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 7688a07c6f1SJed Brown 7698a07c6f1SJed Brown /* put together the new symbolic matrix */ 770e4e71118SStefano Zampini ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C);CHKERRQ(ierr); 7714222ddf1SHong Zhang ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 7728a07c6f1SJed Brown 7738a07c6f1SJed Brown /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 7748a07c6f1SJed Brown /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 7754222ddf1SHong Zhang c = (Mat_SeqAIJ*)(C->data); 7768a07c6f1SJed Brown c->free_a = PETSC_TRUE; 7778a07c6f1SJed Brown c->free_ij = PETSC_TRUE; 7788a07c6f1SJed Brown c->nonew = 0; 77926fbe8dcSKarl Rupp 7804222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 7818a07c6f1SJed Brown 7828a07c6f1SJed Brown /* set MatInfo */ 7838a07c6f1SJed Brown afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 7848a07c6f1SJed Brown if (afill < 1.0) afill = 1.0; 7858a07c6f1SJed Brown c->maxnz = ci[am]; 7868a07c6f1SJed Brown c->nz = ci[am]; 7874222ddf1SHong Zhang C->info.mallocs = ndouble; 7884222ddf1SHong Zhang C->info.fill_ratio_given = fill; 7894222ddf1SHong Zhang C->info.fill_ratio_needed = afill; 7908a07c6f1SJed Brown 7918a07c6f1SJed Brown #if defined(PETSC_USE_INFO) 7928a07c6f1SJed Brown if (ci[am]) { 7934222ddf1SHong Zhang ierr = PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);CHKERRQ(ierr); 7944222ddf1SHong Zhang ierr = PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);CHKERRQ(ierr); 7958a07c6f1SJed Brown } else { 7964222ddf1SHong Zhang ierr = PetscInfo(C,"Empty matrix product\n");CHKERRQ(ierr); 7978a07c6f1SJed Brown } 7988a07c6f1SJed Brown #endif 7998a07c6f1SJed Brown PetscFunctionReturn(0); 8008a07c6f1SJed Brown } 8018a07c6f1SJed Brown 802d7ed1a4dSandi selinger 8034222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_RowMerge(Mat A,Mat B,PetscReal fill,Mat C) 804d7ed1a4dSandi selinger { 805d7ed1a4dSandi selinger PetscErrorCode ierr; 806d7ed1a4dSandi selinger Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 807d7ed1a4dSandi selinger const PetscInt *ai=a->i,*bi=b->i,*aj=a->j,*bj=b->j,*inputi,*inputj,*inputcol,*inputcol_L1; 808d7ed1a4dSandi selinger PetscInt *ci,*cj,*outputj,worki_L1[9],worki_L2[9]; 809d7ed1a4dSandi selinger PetscInt c_maxmem,a_maxrownnz=0,a_rownnz; 810d7ed1a4dSandi selinger const PetscInt workcol[8]={0,1,2,3,4,5,6,7}; 811d7ed1a4dSandi selinger const PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 812d7ed1a4dSandi selinger const PetscInt *brow_ptr[8],*brow_end[8]; 813d7ed1a4dSandi selinger PetscInt window[8]; 814d7ed1a4dSandi selinger PetscInt window_min,old_window_min,ci_nnz,outputi_nnz=0,L1_nrows,L2_nrows; 815d7ed1a4dSandi selinger PetscInt i,k,ndouble=0,L1_rowsleft,rowsleft; 816d7ed1a4dSandi selinger PetscReal afill; 817f83700f2Sandi selinger PetscInt *workj_L1,*workj_L2,*workj_L3; 8187660c4dbSandi selinger PetscInt L1_nnz,L2_nnz; 819d7ed1a4dSandi selinger 820d7ed1a4dSandi selinger /* Step 1: Get upper bound on memory required for allocation. 821d7ed1a4dSandi selinger Because of the way virtual memory works, 822d7ed1a4dSandi selinger only the memory pages that are actually needed will be physically allocated. */ 823d7ed1a4dSandi selinger PetscFunctionBegin; 824d7ed1a4dSandi selinger ierr = PetscMalloc1(am+1,&ci);CHKERRQ(ierr); 825d7ed1a4dSandi selinger for (i=0; i<am; i++) { 826d7ed1a4dSandi selinger const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 827d7ed1a4dSandi selinger const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 828d7ed1a4dSandi selinger a_rownnz = 0; 829d7ed1a4dSandi selinger for (k=0; k<anzi; ++k) { 830d7ed1a4dSandi selinger a_rownnz += bi[acol[k]+1] - bi[acol[k]]; 831d7ed1a4dSandi selinger if (a_rownnz > bn) { 832d7ed1a4dSandi selinger a_rownnz = bn; 833d7ed1a4dSandi selinger break; 834d7ed1a4dSandi selinger } 835d7ed1a4dSandi selinger } 836d7ed1a4dSandi selinger a_maxrownnz = PetscMax(a_maxrownnz, a_rownnz); 837d7ed1a4dSandi selinger } 838d7ed1a4dSandi selinger /* temporary work areas for merging rows */ 839d7ed1a4dSandi selinger ierr = PetscMalloc1(a_maxrownnz*8,&workj_L1);CHKERRQ(ierr); 840f83700f2Sandi selinger ierr = PetscMalloc1(a_maxrownnz*8,&workj_L2);CHKERRQ(ierr); 841f83700f2Sandi selinger ierr = PetscMalloc1(a_maxrownnz,&workj_L3);CHKERRQ(ierr); 842d7ed1a4dSandi selinger 8437660c4dbSandi selinger /* This should be enough for almost all matrices. If not, memory is reallocated later. */ 8447660c4dbSandi selinger c_maxmem = 8*(ai[am]+bi[bm]); 845d7ed1a4dSandi selinger /* Step 2: Populate pattern for C */ 846d7ed1a4dSandi selinger ierr = PetscMalloc1(c_maxmem,&cj);CHKERRQ(ierr); 847d7ed1a4dSandi selinger 848d7ed1a4dSandi selinger ci_nnz = 0; 849d7ed1a4dSandi selinger ci[0] = 0; 850d7ed1a4dSandi selinger worki_L1[0] = 0; 851d7ed1a4dSandi selinger worki_L2[0] = 0; 852d7ed1a4dSandi selinger for (i=0; i<am; i++) { 853d7ed1a4dSandi selinger const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 854d7ed1a4dSandi selinger const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 855d7ed1a4dSandi selinger rowsleft = anzi; 856d7ed1a4dSandi selinger inputcol_L1 = acol; 857d7ed1a4dSandi selinger L2_nnz = 0; 8587660c4dbSandi selinger L2_nrows = 1; /* Number of rows to be merged on Level 3. output of L3 already exists -> initial value 1 */ 8597660c4dbSandi selinger worki_L2[1] = 0; 86008fe1b3cSKarl Rupp outputi_nnz = 0; 861d7ed1a4dSandi selinger 862d7ed1a4dSandi selinger /* If the number of indices in C so far + the max number of columns in the next row > c_maxmem -> allocate more memory */ 863d7ed1a4dSandi selinger while (ci_nnz+a_maxrownnz > c_maxmem) { 864d7ed1a4dSandi selinger c_maxmem *= 2; 865d7ed1a4dSandi selinger ndouble++; 866d7ed1a4dSandi selinger ierr = PetscRealloc(sizeof(PetscInt)*c_maxmem,&cj);CHKERRQ(ierr); 867d7ed1a4dSandi selinger } 868d7ed1a4dSandi selinger 869d7ed1a4dSandi selinger while (rowsleft) { 870d7ed1a4dSandi selinger L1_rowsleft = PetscMin(64, rowsleft); /* In the inner loop max 64 rows of B can be merged */ 871d7ed1a4dSandi selinger L1_nrows = 0; 8727660c4dbSandi selinger L1_nnz = 0; 873d7ed1a4dSandi selinger inputcol = inputcol_L1; 874d7ed1a4dSandi selinger inputi = bi; 875d7ed1a4dSandi selinger inputj = bj; 876d7ed1a4dSandi selinger 877d7ed1a4dSandi selinger /* The following macro is used to specialize for small rows in A. 878d7ed1a4dSandi selinger This helps with compiler unrolling, improving performance substantially. 879f83700f2Sandi selinger Input: inputj inputi inputcol bn 880d7ed1a4dSandi selinger Output: outputj outputi_nnz */ 881d7ed1a4dSandi selinger #define MatMatMultSymbolic_RowMergeMacro(ANNZ) \ 882d7ed1a4dSandi selinger window_min = bn; \ 8837660c4dbSandi selinger outputi_nnz = 0; \ 8847660c4dbSandi selinger for (k=0; k<ANNZ; ++k) { \ 885d7ed1a4dSandi selinger brow_ptr[k] = inputj + inputi[inputcol[k]]; \ 886d7ed1a4dSandi selinger brow_end[k] = inputj + inputi[inputcol[k]+1]; \ 887d7ed1a4dSandi selinger window[k] = (brow_ptr[k] != brow_end[k]) ? *brow_ptr[k] : bn; \ 888d7ed1a4dSandi selinger window_min = PetscMin(window[k], window_min); \ 889d7ed1a4dSandi selinger } \ 890d7ed1a4dSandi selinger while (window_min < bn) { \ 891d7ed1a4dSandi selinger outputj[outputi_nnz++] = window_min; \ 892d7ed1a4dSandi selinger /* advance front and compute new minimum */ \ 893d7ed1a4dSandi selinger old_window_min = window_min; \ 894d7ed1a4dSandi selinger window_min = bn; \ 895d7ed1a4dSandi selinger for (k=0; k<ANNZ; ++k) { \ 896d7ed1a4dSandi selinger if (window[k] == old_window_min) { \ 897d7ed1a4dSandi selinger brow_ptr[k]++; \ 898d7ed1a4dSandi selinger window[k] = (brow_ptr[k] != brow_end[k]) ? *brow_ptr[k] : bn; \ 899d7ed1a4dSandi selinger } \ 900d7ed1a4dSandi selinger window_min = PetscMin(window[k], window_min); \ 901d7ed1a4dSandi selinger } \ 902d7ed1a4dSandi selinger } 903d7ed1a4dSandi selinger 904d7ed1a4dSandi selinger /************** L E V E L 1 ***************/ 905d7ed1a4dSandi selinger /* Merge up to 8 rows of B to L1 work array*/ 906d7ed1a4dSandi selinger while (L1_rowsleft) { 9077660c4dbSandi selinger outputi_nnz = 0; 9087660c4dbSandi selinger if (anzi > 8) outputj = workj_L1 + L1_nnz; /* Level 1 rowmerge*/ 9097660c4dbSandi selinger else outputj = cj + ci_nnz; /* Merge directly to C */ 9107660c4dbSandi selinger 911d7ed1a4dSandi selinger switch (L1_rowsleft) { 912d7ed1a4dSandi selinger case 1: brow_ptr[0] = inputj + inputi[inputcol[0]]; 913d7ed1a4dSandi selinger brow_end[0] = inputj + inputi[inputcol[0]+1]; 914d7ed1a4dSandi selinger for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */ 915d7ed1a4dSandi selinger inputcol += L1_rowsleft; 916d7ed1a4dSandi selinger rowsleft -= L1_rowsleft; 917d7ed1a4dSandi selinger L1_rowsleft = 0; 918d7ed1a4dSandi selinger break; 919d7ed1a4dSandi selinger case 2: MatMatMultSymbolic_RowMergeMacro(2); 920d7ed1a4dSandi selinger inputcol += L1_rowsleft; 921d7ed1a4dSandi selinger rowsleft -= L1_rowsleft; 922d7ed1a4dSandi selinger L1_rowsleft = 0; 923d7ed1a4dSandi selinger break; 924d7ed1a4dSandi selinger case 3: MatMatMultSymbolic_RowMergeMacro(3); 925d7ed1a4dSandi selinger inputcol += L1_rowsleft; 926d7ed1a4dSandi selinger rowsleft -= L1_rowsleft; 927d7ed1a4dSandi selinger L1_rowsleft = 0; 928d7ed1a4dSandi selinger break; 929d7ed1a4dSandi selinger case 4: MatMatMultSymbolic_RowMergeMacro(4); 930d7ed1a4dSandi selinger inputcol += L1_rowsleft; 931d7ed1a4dSandi selinger rowsleft -= L1_rowsleft; 932d7ed1a4dSandi selinger L1_rowsleft = 0; 933d7ed1a4dSandi selinger break; 934d7ed1a4dSandi selinger case 5: MatMatMultSymbolic_RowMergeMacro(5); 935d7ed1a4dSandi selinger inputcol += L1_rowsleft; 936d7ed1a4dSandi selinger rowsleft -= L1_rowsleft; 937d7ed1a4dSandi selinger L1_rowsleft = 0; 938d7ed1a4dSandi selinger break; 939d7ed1a4dSandi selinger case 6: MatMatMultSymbolic_RowMergeMacro(6); 940d7ed1a4dSandi selinger inputcol += L1_rowsleft; 941d7ed1a4dSandi selinger rowsleft -= L1_rowsleft; 942d7ed1a4dSandi selinger L1_rowsleft = 0; 943d7ed1a4dSandi selinger break; 944d7ed1a4dSandi selinger case 7: MatMatMultSymbolic_RowMergeMacro(7); 945d7ed1a4dSandi selinger inputcol += L1_rowsleft; 946d7ed1a4dSandi selinger rowsleft -= L1_rowsleft; 947d7ed1a4dSandi selinger L1_rowsleft = 0; 948d7ed1a4dSandi selinger break; 949d7ed1a4dSandi selinger default: MatMatMultSymbolic_RowMergeMacro(8); 950d7ed1a4dSandi selinger inputcol += 8; 951d7ed1a4dSandi selinger rowsleft -= 8; 952d7ed1a4dSandi selinger L1_rowsleft -= 8; 953d7ed1a4dSandi selinger break; 954d7ed1a4dSandi selinger } 955d7ed1a4dSandi selinger inputcol_L1 = inputcol; 9567660c4dbSandi selinger L1_nnz += outputi_nnz; 9577660c4dbSandi selinger worki_L1[++L1_nrows] = L1_nnz; 958d7ed1a4dSandi selinger } 959d7ed1a4dSandi selinger 960d7ed1a4dSandi selinger /********************** L E V E L 2 ************************/ 961d7ed1a4dSandi selinger /* Merge from L1 work array to either C or to L2 work array */ 962d7ed1a4dSandi selinger if (anzi > 8) { 963d7ed1a4dSandi selinger inputi = worki_L1; 964d7ed1a4dSandi selinger inputj = workj_L1; 965d7ed1a4dSandi selinger inputcol = workcol; 966d7ed1a4dSandi selinger outputi_nnz = 0; 967d7ed1a4dSandi selinger 968d7ed1a4dSandi selinger if (anzi <= 64) outputj = cj + ci_nnz; /* Merge from L1 work array to C */ 969d7ed1a4dSandi selinger else outputj = workj_L2 + L2_nnz; /* Merge from L1 work array to L2 work array */ 970d7ed1a4dSandi selinger 971d7ed1a4dSandi selinger switch (L1_nrows) { 972d7ed1a4dSandi selinger case 1: brow_ptr[0] = inputj + inputi[inputcol[0]]; 973d7ed1a4dSandi selinger brow_end[0] = inputj + inputi[inputcol[0]+1]; 974d7ed1a4dSandi selinger for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */ 975d7ed1a4dSandi selinger break; 976d7ed1a4dSandi selinger case 2: MatMatMultSymbolic_RowMergeMacro(2); break; 977d7ed1a4dSandi selinger case 3: MatMatMultSymbolic_RowMergeMacro(3); break; 978d7ed1a4dSandi selinger case 4: MatMatMultSymbolic_RowMergeMacro(4); break; 979d7ed1a4dSandi selinger case 5: MatMatMultSymbolic_RowMergeMacro(5); break; 980d7ed1a4dSandi selinger case 6: MatMatMultSymbolic_RowMergeMacro(6); break; 981d7ed1a4dSandi selinger case 7: MatMatMultSymbolic_RowMergeMacro(7); break; 982d7ed1a4dSandi selinger case 8: MatMatMultSymbolic_RowMergeMacro(8); break; 983d7ed1a4dSandi selinger default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatMult logic error: Not merging 1-8 rows from L1 work array!"); 984d7ed1a4dSandi selinger } 985d7ed1a4dSandi selinger L2_nnz += outputi_nnz; 986d7ed1a4dSandi selinger worki_L2[++L2_nrows] = L2_nnz; 987d7ed1a4dSandi selinger 9887660c4dbSandi selinger /************************ L E V E L 3 **********************/ 9897660c4dbSandi selinger /* Merge from L2 work array to either C or to L2 work array */ 990d7ed1a4dSandi selinger if (anzi > 64 && (L2_nrows == 8 || rowsleft == 0)) { 991d7ed1a4dSandi selinger inputi = worki_L2; 992d7ed1a4dSandi selinger inputj = workj_L2; 993d7ed1a4dSandi selinger inputcol = workcol; 994d7ed1a4dSandi selinger outputi_nnz = 0; 995f83700f2Sandi selinger if (rowsleft) outputj = workj_L3; 996d7ed1a4dSandi selinger else outputj = cj + ci_nnz; 997d7ed1a4dSandi selinger switch (L2_nrows) { 998d7ed1a4dSandi selinger case 1: brow_ptr[0] = inputj + inputi[inputcol[0]]; 999d7ed1a4dSandi selinger brow_end[0] = inputj + inputi[inputcol[0]+1]; 1000d7ed1a4dSandi selinger for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */ 1001d7ed1a4dSandi selinger break; 1002d7ed1a4dSandi selinger case 2: MatMatMultSymbolic_RowMergeMacro(2); break; 1003d7ed1a4dSandi selinger case 3: MatMatMultSymbolic_RowMergeMacro(3); break; 1004d7ed1a4dSandi selinger case 4: MatMatMultSymbolic_RowMergeMacro(4); break; 1005d7ed1a4dSandi selinger case 5: MatMatMultSymbolic_RowMergeMacro(5); break; 1006d7ed1a4dSandi selinger case 6: MatMatMultSymbolic_RowMergeMacro(6); break; 1007d7ed1a4dSandi selinger case 7: MatMatMultSymbolic_RowMergeMacro(7); break; 1008d7ed1a4dSandi selinger case 8: MatMatMultSymbolic_RowMergeMacro(8); break; 1009d7ed1a4dSandi selinger default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatMult logic error: Not merging 1-8 rows from L2 work array!"); 1010d7ed1a4dSandi selinger } 1011d7ed1a4dSandi selinger L2_nrows = 1; 10127660c4dbSandi selinger L2_nnz = outputi_nnz; 10137660c4dbSandi selinger worki_L2[1] = outputi_nnz; 10147660c4dbSandi selinger /* Copy to workj_L2 */ 1015d7ed1a4dSandi selinger if (rowsleft) { 10167660c4dbSandi selinger for (k=0; k<outputi_nnz; ++k) workj_L2[k] = outputj[k]; 1017d7ed1a4dSandi selinger } 1018d7ed1a4dSandi selinger } 1019d7ed1a4dSandi selinger } 1020d7ed1a4dSandi selinger } /* while (rowsleft) */ 1021d7ed1a4dSandi selinger #undef MatMatMultSymbolic_RowMergeMacro 1022d7ed1a4dSandi selinger 1023d7ed1a4dSandi selinger /* terminate current row */ 1024d7ed1a4dSandi selinger ci_nnz += outputi_nnz; 1025d7ed1a4dSandi selinger ci[i+1] = ci_nnz; 1026d7ed1a4dSandi selinger } 1027d7ed1a4dSandi selinger 1028d7ed1a4dSandi selinger /* Step 3: Create the new symbolic matrix */ 1029e4e71118SStefano Zampini ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C);CHKERRQ(ierr); 10304222ddf1SHong Zhang ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 1031d7ed1a4dSandi selinger 1032d7ed1a4dSandi selinger /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1033d7ed1a4dSandi selinger /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 10344222ddf1SHong Zhang c = (Mat_SeqAIJ*)(C->data); 1035d7ed1a4dSandi selinger c->free_a = PETSC_TRUE; 1036d7ed1a4dSandi selinger c->free_ij = PETSC_TRUE; 1037d7ed1a4dSandi selinger c->nonew = 0; 1038d7ed1a4dSandi selinger 10394222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 1040d7ed1a4dSandi selinger 1041d7ed1a4dSandi selinger /* set MatInfo */ 1042d7ed1a4dSandi selinger afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 1043d7ed1a4dSandi selinger if (afill < 1.0) afill = 1.0; 1044d7ed1a4dSandi selinger c->maxnz = ci[am]; 1045d7ed1a4dSandi selinger c->nz = ci[am]; 10464222ddf1SHong Zhang C->info.mallocs = ndouble; 10474222ddf1SHong Zhang C->info.fill_ratio_given = fill; 10484222ddf1SHong Zhang C->info.fill_ratio_needed = afill; 1049d7ed1a4dSandi selinger 1050d7ed1a4dSandi selinger #if defined(PETSC_USE_INFO) 1051d7ed1a4dSandi selinger if (ci[am]) { 10524222ddf1SHong Zhang ierr = PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);CHKERRQ(ierr); 10534222ddf1SHong Zhang ierr = PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);CHKERRQ(ierr); 1054d7ed1a4dSandi selinger } else { 10554222ddf1SHong Zhang ierr = PetscInfo(C,"Empty matrix product\n");CHKERRQ(ierr); 1056d7ed1a4dSandi selinger } 1057d7ed1a4dSandi selinger #endif 1058d7ed1a4dSandi selinger 1059d7ed1a4dSandi selinger /* Step 4: Free temporary work areas */ 1060d7ed1a4dSandi selinger ierr = PetscFree(workj_L1);CHKERRQ(ierr); 1061d7ed1a4dSandi selinger ierr = PetscFree(workj_L2);CHKERRQ(ierr); 1062f83700f2Sandi selinger ierr = PetscFree(workj_L3);CHKERRQ(ierr); 1063d7ed1a4dSandi selinger PetscFunctionReturn(0); 1064d7ed1a4dSandi selinger } 1065d7ed1a4dSandi selinger 1066cd093f1dSJed Brown /* concatenate unique entries and then sort */ 10674222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Sorted(Mat A,Mat B,PetscReal fill,Mat C) 1068cd093f1dSJed Brown { 1069cd093f1dSJed Brown PetscErrorCode ierr; 1070cd093f1dSJed Brown Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 1071cd093f1dSJed Brown const PetscInt *ai = a->i,*bi=b->i,*aj=a->j,*bj=b->j; 1072cd093f1dSJed Brown PetscInt *ci,*cj; 1073cd093f1dSJed Brown PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 1074cd093f1dSJed Brown PetscReal afill; 1075cd093f1dSJed Brown PetscInt i,j,ndouble = 0; 1076cd093f1dSJed Brown PetscSegBuffer seg,segrow; 1077cd093f1dSJed Brown char *seen; 1078cd093f1dSJed Brown 1079cd093f1dSJed Brown PetscFunctionBegin; 1080854ce69bSBarry Smith ierr = PetscMalloc1(am+1,&ci);CHKERRQ(ierr); 1081cd093f1dSJed Brown ci[0] = 0; 1082cd093f1dSJed Brown 1083cd093f1dSJed Brown /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 1084cd093f1dSJed Brown ierr = PetscSegBufferCreate(sizeof(PetscInt),(PetscInt)(fill*(ai[am]+bi[bm])),&seg);CHKERRQ(ierr); 1085cd093f1dSJed Brown ierr = PetscSegBufferCreate(sizeof(PetscInt),100,&segrow);CHKERRQ(ierr); 1086580bdb30SBarry Smith ierr = PetscCalloc1(bn,&seen);CHKERRQ(ierr); 1087cd093f1dSJed Brown 1088cd093f1dSJed Brown /* Determine ci and cj */ 1089cd093f1dSJed Brown for (i=0; i<am; i++) { 1090cd093f1dSJed Brown const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 1091cd093f1dSJed Brown const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 1092cd093f1dSJed Brown PetscInt packlen = 0,*PETSC_RESTRICT crow; 1093cd093f1dSJed Brown /* Pack segrow */ 1094cd093f1dSJed Brown for (j=0; j<anzi; j++) { 1095cd093f1dSJed Brown PetscInt brow = acol[j],bjstart = bi[brow],bjend = bi[brow+1],k; 1096cd093f1dSJed Brown for (k=bjstart; k<bjend; k++) { 1097cd093f1dSJed Brown PetscInt bcol = bj[k]; 1098cd093f1dSJed Brown if (!seen[bcol]) { /* new entry */ 1099cd093f1dSJed Brown PetscInt *PETSC_RESTRICT slot; 1100cd093f1dSJed Brown ierr = PetscSegBufferGetInts(segrow,1,&slot);CHKERRQ(ierr); 1101cd093f1dSJed Brown *slot = bcol; 1102cd093f1dSJed Brown seen[bcol] = 1; 1103cd093f1dSJed Brown packlen++; 1104cd093f1dSJed Brown } 1105cd093f1dSJed Brown } 1106cd093f1dSJed Brown } 1107cd093f1dSJed Brown ierr = PetscSegBufferGetInts(seg,packlen,&crow);CHKERRQ(ierr); 1108cd093f1dSJed Brown ierr = PetscSegBufferExtractTo(segrow,crow);CHKERRQ(ierr); 1109cd093f1dSJed Brown ierr = PetscSortInt(packlen,crow);CHKERRQ(ierr); 1110cd093f1dSJed Brown ci[i+1] = ci[i] + packlen; 1111cd093f1dSJed Brown for (j=0; j<packlen; j++) seen[crow[j]] = 0; 1112cd093f1dSJed Brown } 1113cd093f1dSJed Brown ierr = PetscSegBufferDestroy(&segrow);CHKERRQ(ierr); 1114cd093f1dSJed Brown ierr = PetscFree(seen);CHKERRQ(ierr); 1115cd093f1dSJed Brown 1116cd093f1dSJed Brown /* Column indices are in the segmented buffer */ 1117cd093f1dSJed Brown ierr = PetscSegBufferExtractAlloc(seg,&cj);CHKERRQ(ierr); 1118cd093f1dSJed Brown ierr = PetscSegBufferDestroy(&seg);CHKERRQ(ierr); 1119cd093f1dSJed Brown 1120cd093f1dSJed Brown /* put together the new symbolic matrix */ 1121e4e71118SStefano Zampini ierr = MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C);CHKERRQ(ierr); 11224222ddf1SHong Zhang ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 1123cd093f1dSJed Brown 1124cd093f1dSJed Brown /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1125cd093f1dSJed Brown /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 11264222ddf1SHong Zhang c = (Mat_SeqAIJ*)(C->data); 1127cd093f1dSJed Brown c->free_a = PETSC_TRUE; 1128cd093f1dSJed Brown c->free_ij = PETSC_TRUE; 1129cd093f1dSJed Brown c->nonew = 0; 1130cd093f1dSJed Brown 11314222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 1132cd093f1dSJed Brown 1133cd093f1dSJed Brown /* set MatInfo */ 11342a09556fSStefano Zampini afill = (PetscReal)ci[am]/PetscMax(ai[am]+bi[bm],1) + 1.e-5; 1135cd093f1dSJed Brown if (afill < 1.0) afill = 1.0; 1136cd093f1dSJed Brown c->maxnz = ci[am]; 1137cd093f1dSJed Brown c->nz = ci[am]; 11384222ddf1SHong Zhang C->info.mallocs = ndouble; 11394222ddf1SHong Zhang C->info.fill_ratio_given = fill; 11404222ddf1SHong Zhang C->info.fill_ratio_needed = afill; 1141cd093f1dSJed Brown 1142cd093f1dSJed Brown #if defined(PETSC_USE_INFO) 1143cd093f1dSJed Brown if (ci[am]) { 11444222ddf1SHong Zhang ierr = PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);CHKERRQ(ierr); 11454222ddf1SHong Zhang ierr = PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);CHKERRQ(ierr); 1146cd093f1dSJed Brown } else { 11474222ddf1SHong Zhang ierr = PetscInfo(C,"Empty matrix product\n");CHKERRQ(ierr); 1148cd093f1dSJed Brown } 1149cd093f1dSJed Brown #endif 1150cd093f1dSJed Brown PetscFunctionReturn(0); 1151cd093f1dSJed Brown } 1152cd093f1dSJed Brown 11536718818eSStefano Zampini PetscErrorCode MatDestroy_SeqAIJ_MatMatMultTrans(void *data) 11542128a86cSHong Zhang { 11552128a86cSHong Zhang PetscErrorCode ierr; 11566718818eSStefano Zampini Mat_MatMatTransMult *abt=(Mat_MatMatTransMult *)data; 11572128a86cSHong Zhang 11582128a86cSHong Zhang PetscFunctionBegin; 115940192850SHong Zhang ierr = MatTransposeColoringDestroy(&abt->matcoloring);CHKERRQ(ierr); 116040192850SHong Zhang ierr = MatDestroy(&abt->Bt_den);CHKERRQ(ierr); 116140192850SHong Zhang ierr = MatDestroy(&abt->ABt_den);CHKERRQ(ierr); 116240192850SHong Zhang ierr = PetscFree(abt);CHKERRQ(ierr); 11632128a86cSHong Zhang PetscFunctionReturn(0); 11642128a86cSHong Zhang } 11652128a86cSHong Zhang 11664222ddf1SHong Zhang PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C) 1167bc011b1eSHong Zhang { 11685df89d91SHong Zhang PetscErrorCode ierr; 116981d82fe4SHong Zhang Mat Bt; 117081d82fe4SHong Zhang PetscInt *bti,*btj; 117140192850SHong Zhang Mat_MatMatTransMult *abt; 11724222ddf1SHong Zhang Mat_Product *product = C->product; 11736718818eSStefano Zampini char *alg; 1174d2853540SHong Zhang 117581d82fe4SHong Zhang PetscFunctionBegin; 11766718818eSStefano Zampini if (!product) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing product struct"); 11776718818eSStefano Zampini if (product->data) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Extra product struct not empty"); 11786718818eSStefano Zampini 117981d82fe4SHong Zhang /* create symbolic Bt */ 118081d82fe4SHong Zhang ierr = MatGetSymbolicTranspose_SeqAIJ(B,&bti,&btj);CHKERRQ(ierr); 11810298fd71SBarry Smith ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,B->cmap->n,B->rmap->n,bti,btj,NULL,&Bt);CHKERRQ(ierr); 118233d57670SJed Brown ierr = MatSetBlockSizes(Bt,PetscAbs(A->cmap->bs),PetscAbs(B->cmap->bs));CHKERRQ(ierr); 118304b858e0SBarry Smith ierr = MatSetType(Bt,((PetscObject)A)->type_name);CHKERRQ(ierr); 118481d82fe4SHong Zhang 118581d82fe4SHong Zhang /* get symbolic C=A*Bt */ 11866718818eSStefano Zampini ierr = PetscStrallocpy(product->alg,&alg);CHKERRQ(ierr); 11874222ddf1SHong Zhang ierr = MatProductSetAlgorithm(C,"sorted");CHKERRQ(ierr); /* set algorithm for C = A*Bt */ 118881d82fe4SHong Zhang ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,Bt,fill,C);CHKERRQ(ierr); 11894222ddf1SHong Zhang ierr = MatProductSetAlgorithm(C,alg);CHKERRQ(ierr); /* resume original algorithm for ABt product */ 11906718818eSStefano Zampini ierr = PetscFree(alg);CHKERRQ(ierr); 119181d82fe4SHong Zhang 11922128a86cSHong Zhang /* create a supporting struct for reuse intermidiate dense matrices with matcoloring */ 1193b00a9115SJed Brown ierr = PetscNew(&abt);CHKERRQ(ierr); 11942128a86cSHong Zhang 11956718818eSStefano Zampini product->data = abt; 11966718818eSStefano Zampini product->destroy = MatDestroy_SeqAIJ_MatMatMultTrans; 11976718818eSStefano Zampini 11984222ddf1SHong Zhang C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ; 11992128a86cSHong Zhang 12004222ddf1SHong Zhang abt->usecoloring = PETSC_FALSE; 12014222ddf1SHong Zhang ierr = PetscStrcmp(product->alg,"color",&abt->usecoloring);CHKERRQ(ierr); 120240192850SHong Zhang if (abt->usecoloring) { 1203b9af6bddSHong Zhang /* Create MatTransposeColoring from symbolic C=A*B^T */ 1204b9af6bddSHong Zhang MatTransposeColoring matcoloring; 1205335efc43SPeter Brune MatColoring coloring; 12062128a86cSHong Zhang ISColoring iscoloring; 12072128a86cSHong Zhang Mat Bt_dense,C_dense; 1208e8354b3eSHong Zhang 12094222ddf1SHong Zhang /* inode causes memory problem */ 12104222ddf1SHong Zhang ierr = MatSetOption(C,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 12114222ddf1SHong Zhang 12124222ddf1SHong Zhang ierr = MatColoringCreate(C,&coloring);CHKERRQ(ierr); 1213335efc43SPeter Brune ierr = MatColoringSetDistance(coloring,2);CHKERRQ(ierr); 1214335efc43SPeter Brune ierr = MatColoringSetType(coloring,MATCOLORINGSL);CHKERRQ(ierr); 1215335efc43SPeter Brune ierr = MatColoringSetFromOptions(coloring);CHKERRQ(ierr); 1216335efc43SPeter Brune ierr = MatColoringApply(coloring,&iscoloring);CHKERRQ(ierr); 1217335efc43SPeter Brune ierr = MatColoringDestroy(&coloring);CHKERRQ(ierr); 12184222ddf1SHong Zhang ierr = MatTransposeColoringCreate(C,iscoloring,&matcoloring);CHKERRQ(ierr); 12192205254eSKarl Rupp 122040192850SHong Zhang abt->matcoloring = matcoloring; 12212205254eSKarl Rupp 12222128a86cSHong Zhang ierr = ISColoringDestroy(&iscoloring);CHKERRQ(ierr); 12232128a86cSHong Zhang 12242128a86cSHong Zhang /* Create Bt_dense and C_dense = A*Bt_dense */ 12252128a86cSHong Zhang ierr = MatCreate(PETSC_COMM_SELF,&Bt_dense);CHKERRQ(ierr); 12262128a86cSHong Zhang ierr = MatSetSizes(Bt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors);CHKERRQ(ierr); 12272128a86cSHong Zhang ierr = MatSetType(Bt_dense,MATSEQDENSE);CHKERRQ(ierr); 12280298fd71SBarry Smith ierr = MatSeqDenseSetPreallocation(Bt_dense,NULL);CHKERRQ(ierr); 12292205254eSKarl Rupp 12302128a86cSHong Zhang Bt_dense->assembled = PETSC_TRUE; 123140192850SHong Zhang abt->Bt_den = Bt_dense; 12322128a86cSHong Zhang 12332128a86cSHong Zhang ierr = MatCreate(PETSC_COMM_SELF,&C_dense);CHKERRQ(ierr); 12342128a86cSHong Zhang ierr = MatSetSizes(C_dense,A->rmap->n,matcoloring->ncolors,A->rmap->n,matcoloring->ncolors);CHKERRQ(ierr); 12352128a86cSHong Zhang ierr = MatSetType(C_dense,MATSEQDENSE);CHKERRQ(ierr); 12360298fd71SBarry Smith ierr = MatSeqDenseSetPreallocation(C_dense,NULL);CHKERRQ(ierr); 12372205254eSKarl Rupp 12382128a86cSHong Zhang Bt_dense->assembled = PETSC_TRUE; 123940192850SHong Zhang abt->ABt_den = C_dense; 1240f94ccd6cSHong Zhang 1241f94ccd6cSHong Zhang #if defined(PETSC_USE_INFO) 12421ce71dffSSatish Balay { 12434222ddf1SHong Zhang Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 12444222ddf1SHong Zhang ierr = PetscInfo7(C,"Use coloring of C=A*B^T; B^T: %D %D, Bt_dense: %D,%D; Cnz %D / (cm*ncolors %D) = %g\n",B->cmap->n,B->rmap->n,Bt_dense->rmap->n,Bt_dense->cmap->n,c->nz,A->rmap->n*matcoloring->ncolors,(PetscReal)(c->nz)/(A->rmap->n*matcoloring->ncolors));CHKERRQ(ierr); 12451ce71dffSSatish Balay } 1246f94ccd6cSHong Zhang #endif 12472128a86cSHong Zhang } 1248e99be685SHong Zhang /* clean up */ 1249e99be685SHong Zhang ierr = MatDestroy(&Bt);CHKERRQ(ierr); 1250e99be685SHong Zhang ierr = MatRestoreSymbolicTranspose_SeqAIJ(B,&bti,&btj);CHKERRQ(ierr); 12515df89d91SHong Zhang PetscFunctionReturn(0); 12525df89d91SHong Zhang } 12535df89d91SHong Zhang 12546fc122caSHong Zhang PetscErrorCode MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 12555df89d91SHong Zhang { 12565df89d91SHong Zhang PetscErrorCode ierr; 12575df89d91SHong Zhang Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 1258e2cac8caSJed Brown PetscInt *ai =a->i,*aj=a->j,*bi=b->i,*bj=b->j,anzi,bnzj,nexta,nextb,*acol,*bcol,brow; 125981d82fe4SHong Zhang PetscInt cm =C->rmap->n,*ci=c->i,*cj=c->j,i,j,cnzi,*ccol; 12605df89d91SHong Zhang PetscLogDouble flops=0.0; 1261aa1aec99SHong Zhang MatScalar *aa =a->a,*aval,*ba=b->a,*bval,*ca,*cval; 12626718818eSStefano Zampini Mat_MatMatTransMult *abt; 12636718818eSStefano Zampini Mat_Product *product = C->product; 12645df89d91SHong Zhang 12655df89d91SHong Zhang PetscFunctionBegin; 12666718818eSStefano Zampini if (!product) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing product struct"); 12676718818eSStefano Zampini abt = (Mat_MatMatTransMult *)product->data; 12686718818eSStefano Zampini if (!abt) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing product struct"); 126958ed3ceaSHong Zhang /* clear old values in C */ 127058ed3ceaSHong Zhang if (!c->a) { 1271580bdb30SBarry Smith ierr = PetscCalloc1(ci[cm]+1,&ca);CHKERRQ(ierr); 127258ed3ceaSHong Zhang c->a = ca; 127358ed3ceaSHong Zhang c->free_a = PETSC_TRUE; 127458ed3ceaSHong Zhang } else { 127558ed3ceaSHong Zhang ca = c->a; 1276580bdb30SBarry Smith ierr = PetscArrayzero(ca,ci[cm]+1);CHKERRQ(ierr); 127758ed3ceaSHong Zhang } 127858ed3ceaSHong Zhang 127940192850SHong Zhang if (abt->usecoloring) { 128040192850SHong Zhang MatTransposeColoring matcoloring = abt->matcoloring; 128140192850SHong Zhang Mat Bt_dense,C_dense = abt->ABt_den; 1282c8db22f6SHong Zhang 1283b9af6bddSHong Zhang /* Get Bt_dense by Apply MatTransposeColoring to B */ 128440192850SHong Zhang Bt_dense = abt->Bt_den; 1285b9af6bddSHong Zhang ierr = MatTransColoringApplySpToDen(matcoloring,B,Bt_dense);CHKERRQ(ierr); 1286c8db22f6SHong Zhang 1287c8db22f6SHong Zhang /* C_dense = A*Bt_dense */ 12882128a86cSHong Zhang ierr = MatMatMultNumeric_SeqAIJ_SeqDense(A,Bt_dense,C_dense);CHKERRQ(ierr); 1289c8db22f6SHong Zhang 12902128a86cSHong Zhang /* Recover C from C_dense */ 1291b9af6bddSHong Zhang ierr = MatTransColoringApplyDenToSp(matcoloring,C_dense,C);CHKERRQ(ierr); 1292c8db22f6SHong Zhang PetscFunctionReturn(0); 1293c8db22f6SHong Zhang } 1294c8db22f6SHong Zhang 12951fa1209cSHong Zhang for (i=0; i<cm; i++) { 129681d82fe4SHong Zhang anzi = ai[i+1] - ai[i]; 129781d82fe4SHong Zhang acol = aj + ai[i]; 12986973769bSHong Zhang aval = aa + ai[i]; 12991fa1209cSHong Zhang cnzi = ci[i+1] - ci[i]; 13001fa1209cSHong Zhang ccol = cj + ci[i]; 13016973769bSHong Zhang cval = ca + ci[i]; 13021fa1209cSHong Zhang for (j=0; j<cnzi; j++) { 130381d82fe4SHong Zhang brow = ccol[j]; 130481d82fe4SHong Zhang bnzj = bi[brow+1] - bi[brow]; 130581d82fe4SHong Zhang bcol = bj + bi[brow]; 13066973769bSHong Zhang bval = ba + bi[brow]; 13076973769bSHong Zhang 130881d82fe4SHong Zhang /* perform sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */ 130981d82fe4SHong Zhang nexta = 0; nextb = 0; 131081d82fe4SHong Zhang while (nexta<anzi && nextb<bnzj) { 13117b6d5e96SMark Adams while (nexta < anzi && acol[nexta] < bcol[nextb]) nexta++; 131281d82fe4SHong Zhang if (nexta == anzi) break; 13137b6d5e96SMark Adams while (nextb < bnzj && acol[nexta] > bcol[nextb]) nextb++; 131481d82fe4SHong Zhang if (nextb == bnzj) break; 131581d82fe4SHong Zhang if (acol[nexta] == bcol[nextb]) { 13166973769bSHong Zhang cval[j] += aval[nexta]*bval[nextb]; 131781d82fe4SHong Zhang nexta++; nextb++; 131881d82fe4SHong Zhang flops += 2; 13191fa1209cSHong Zhang } 13201fa1209cSHong Zhang } 132181d82fe4SHong Zhang } 132281d82fe4SHong Zhang } 132381d82fe4SHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 132481d82fe4SHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 132581d82fe4SHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 13265df89d91SHong Zhang PetscFunctionReturn(0); 13275df89d91SHong Zhang } 13285df89d91SHong Zhang 13296718818eSStefano Zampini PetscErrorCode MatDestroy_SeqAIJ_MatTransMatMult(void *data) 13306d373c3eSHong Zhang { 13316d373c3eSHong Zhang PetscErrorCode ierr; 13326718818eSStefano Zampini Mat_MatTransMatMult *atb = (Mat_MatTransMatMult*)data; 13336d373c3eSHong Zhang 13346d373c3eSHong Zhang PetscFunctionBegin; 13356d373c3eSHong Zhang ierr = MatDestroy(&atb->At);CHKERRQ(ierr); 13366718818eSStefano Zampini if (atb->destroy) { 13376718818eSStefano Zampini ierr = (*atb->destroy)(atb->data);CHKERRQ(ierr); 13386473ade5SStefano Zampini } 13396d373c3eSHong Zhang ierr = PetscFree(atb);CHKERRQ(ierr); 13406d373c3eSHong Zhang PetscFunctionReturn(0); 13416d373c3eSHong Zhang } 13426d373c3eSHong Zhang 13434222ddf1SHong Zhang PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C) 13445df89d91SHong Zhang { 1345bc011b1eSHong Zhang PetscErrorCode ierr; 1346*089a957eSStefano Zampini Mat At = NULL; 134738baddfdSBarry Smith PetscInt *ati,*atj; 13484222ddf1SHong Zhang Mat_Product *product = C->product; 1349*089a957eSStefano Zampini PetscBool flg,def,square; 1350bc011b1eSHong Zhang 1351bc011b1eSHong Zhang PetscFunctionBegin; 1352*089a957eSStefano Zampini MatCheckProduct(C,4); 1353*089a957eSStefano Zampini square = (PetscBool)(A == B && A->symmetric && A->symmetric_set); 13544222ddf1SHong Zhang /* outerproduct */ 1355*089a957eSStefano Zampini ierr = PetscStrcmp(product->alg,"outerproduct",&flg);CHKERRQ(ierr); 13564222ddf1SHong Zhang if (flg) { 1357bc011b1eSHong Zhang /* create symbolic At */ 1358*089a957eSStefano Zampini if (!square) { 1359bc011b1eSHong Zhang ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 13600298fd71SBarry Smith ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->cmap->n,A->rmap->n,ati,atj,NULL,&At);CHKERRQ(ierr); 136133d57670SJed Brown ierr = MatSetBlockSizes(At,PetscAbs(A->cmap->bs),PetscAbs(B->cmap->bs));CHKERRQ(ierr); 136204b858e0SBarry Smith ierr = MatSetType(At,((PetscObject)A)->type_name);CHKERRQ(ierr); 1363*089a957eSStefano Zampini } 1364bc011b1eSHong Zhang /* get symbolic C=At*B */ 13657a3c3d58SStefano Zampini ierr = MatProductSetAlgorithm(C,"sorted");CHKERRQ(ierr); 1366*089a957eSStefano Zampini ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(square ? A : At,B,fill,C);CHKERRQ(ierr); 1367bc011b1eSHong Zhang 1368bc011b1eSHong Zhang /* clean up */ 1369*089a957eSStefano Zampini if (!square) { 13706bf464f9SBarry Smith ierr = MatDestroy(&At);CHKERRQ(ierr); 1371bc011b1eSHong Zhang ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 1372*089a957eSStefano Zampini } 13736d373c3eSHong Zhang 13744222ddf1SHong Zhang C->ops->mattransposemultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; /* outerproduct */ 13757a3c3d58SStefano Zampini ierr = MatProductSetAlgorithm(C,"outerproduct");CHKERRQ(ierr); 13764222ddf1SHong Zhang PetscFunctionReturn(0); 13774222ddf1SHong Zhang } 13784222ddf1SHong Zhang 13794222ddf1SHong Zhang /* matmatmult */ 1380*089a957eSStefano Zampini ierr = PetscStrcmp(product->alg,"default",&def);CHKERRQ(ierr); 1381*089a957eSStefano Zampini ierr = PetscStrcmp(product->alg,"at*b",&flg);CHKERRQ(ierr); 1382*089a957eSStefano Zampini if (flg || def) { 13834222ddf1SHong Zhang Mat_MatTransMatMult *atb; 13844222ddf1SHong Zhang 13856718818eSStefano Zampini if (product->data) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Extra product struct not empty"); 13864222ddf1SHong Zhang ierr = PetscNew(&atb);CHKERRQ(ierr); 1387*089a957eSStefano Zampini if (!square) { 13884222ddf1SHong Zhang ierr = MatTranspose_SeqAIJ(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 1389*089a957eSStefano Zampini } 13907a3c3d58SStefano Zampini ierr = MatProductSetAlgorithm(C,"sorted");CHKERRQ(ierr); 1391*089a957eSStefano Zampini ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(square ? A : At,B,fill,C);CHKERRQ(ierr); 13926718818eSStefano Zampini ierr = MatProductSetAlgorithm(C,"at*b");CHKERRQ(ierr); 13936718818eSStefano Zampini product->data = atb; 13946718818eSStefano Zampini product->destroy = MatDestroy_SeqAIJ_MatTransMatMult; 13954222ddf1SHong Zhang atb->At = At; 13964222ddf1SHong Zhang atb->updateAt = PETSC_FALSE; /* because At is computed here */ 13974222ddf1SHong Zhang 13984222ddf1SHong Zhang C->ops->mattransposemultnumeric = NULL; /* see MatProductNumeric_AtB_SeqAIJ_SeqAIJ */ 13994222ddf1SHong Zhang PetscFunctionReturn(0); 14004222ddf1SHong Zhang } 14014222ddf1SHong Zhang 14024222ddf1SHong Zhang SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product Algorithm is not supported"); 1403bc011b1eSHong Zhang PetscFunctionReturn(0); 1404bc011b1eSHong Zhang } 1405bc011b1eSHong Zhang 140675648e8dSHong Zhang PetscErrorCode MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 1407bc011b1eSHong Zhang { 1408bc011b1eSHong Zhang PetscErrorCode ierr; 14090fbc74f4SHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 1410d0f46423SBarry Smith PetscInt am=A->rmap->n,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb; 1411d0f46423SBarry Smith PetscInt cm=C->rmap->n,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k; 1412d13dce4bSSatish Balay PetscLogDouble flops=0.0; 1413aa1aec99SHong Zhang MatScalar *aa=a->a,*ba,*ca,*caj; 1414bc011b1eSHong Zhang 1415bc011b1eSHong Zhang PetscFunctionBegin; 1416aa1aec99SHong Zhang if (!c->a) { 1417580bdb30SBarry Smith ierr = PetscCalloc1(ci[cm]+1,&ca);CHKERRQ(ierr); 14182205254eSKarl Rupp 1419aa1aec99SHong Zhang c->a = ca; 1420aa1aec99SHong Zhang c->free_a = PETSC_TRUE; 1421aa1aec99SHong Zhang } else { 1422aa1aec99SHong Zhang ca = c->a; 1423580bdb30SBarry Smith ierr = PetscArrayzero(ca,ci[cm]);CHKERRQ(ierr); 1424aa1aec99SHong Zhang } 1425bc011b1eSHong Zhang 1426bc011b1eSHong Zhang /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */ 1427bc011b1eSHong Zhang for (i=0; i<am; i++) { 1428bc011b1eSHong Zhang bj = b->j + bi[i]; 1429bc011b1eSHong Zhang ba = b->a + bi[i]; 1430bc011b1eSHong Zhang bnzi = bi[i+1] - bi[i]; 1431bc011b1eSHong Zhang anzi = ai[i+1] - ai[i]; 1432bc011b1eSHong Zhang for (j=0; j<anzi; j++) { 1433bc011b1eSHong Zhang nextb = 0; 14340fbc74f4SHong Zhang crow = *aj++; 1435bc011b1eSHong Zhang cjj = cj + ci[crow]; 1436bc011b1eSHong Zhang caj = ca + ci[crow]; 1437bc011b1eSHong Zhang /* perform sparse axpy operation. Note cjj includes bj. */ 1438bc011b1eSHong Zhang for (k=0; nextb<bnzi; k++) { 14390fbc74f4SHong Zhang if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */ 14400fbc74f4SHong Zhang caj[k] += (*aa)*(*(ba+nextb)); 1441bc011b1eSHong Zhang nextb++; 1442bc011b1eSHong Zhang } 1443bc011b1eSHong Zhang } 1444bc011b1eSHong Zhang flops += 2*bnzi; 14450fbc74f4SHong Zhang aa++; 1446bc011b1eSHong Zhang } 1447bc011b1eSHong Zhang } 1448bc011b1eSHong Zhang 1449bc011b1eSHong Zhang /* Assemble the final matrix and clean up */ 1450bc011b1eSHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1451bc011b1eSHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1452bc011b1eSHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1453bc011b1eSHong Zhang PetscFunctionReturn(0); 1454bc011b1eSHong Zhang } 14559123193aSHong Zhang 14564222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqDense(Mat A,Mat B,PetscReal fill,Mat C) 14579123193aSHong Zhang { 14589123193aSHong Zhang PetscErrorCode ierr; 14599123193aSHong Zhang 14609123193aSHong Zhang PetscFunctionBegin; 14615a586d82SBarry Smith ierr = MatMatMultSymbolic_SeqDense_SeqDense(A,B,0.0,C);CHKERRQ(ierr); 14624222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqDense; 14639123193aSHong Zhang PetscFunctionReturn(0); 14649123193aSHong Zhang } 14659123193aSHong Zhang 146693aa15f2SStefano Zampini PETSC_INTERN PetscErrorCode MatMatMultNumericAdd_SeqAIJ_SeqDense(Mat A,Mat B,Mat C,const PetscBool add) 14679123193aSHong Zhang { 1468f32d5d43SBarry Smith Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1469612438f1SStefano Zampini Mat_SeqDense *bd=(Mat_SeqDense*)B->data; 14701ca9667aSStefano Zampini Mat_SeqDense *cd=(Mat_SeqDense*)C->data; 14719123193aSHong Zhang PetscErrorCode ierr; 14721ca9667aSStefano Zampini PetscScalar *c,r1,r2,r3,r4,*c1,*c2,*c3,*c4; 1473a4af7ca8SStefano Zampini const PetscScalar *aa,*b,*b1,*b2,*b3,*b4,*av; 1474daf57211SHong Zhang const PetscInt *aj; 1475612438f1SStefano Zampini PetscInt cm=C->rmap->n,cn=B->cmap->n,bm=bd->lda,am=A->rmap->n; 14761ca9667aSStefano Zampini PetscInt clda=cd->lda; 14771ca9667aSStefano Zampini PetscInt am4=4*clda,bm4=4*bm,col,i,j,n; 14789123193aSHong Zhang 14799123193aSHong Zhang PetscFunctionBegin; 1480f32d5d43SBarry Smith if (!cm || !cn) PetscFunctionReturn(0); 1481a4af7ca8SStefano Zampini ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr); 148293aa15f2SStefano Zampini if (add) { 14838c778c55SBarry Smith ierr = MatDenseGetArray(C,&c);CHKERRQ(ierr); 148493aa15f2SStefano Zampini } else { 148593aa15f2SStefano Zampini ierr = MatDenseGetArrayWrite(C,&c);CHKERRQ(ierr); 148693aa15f2SStefano Zampini } 1487a4af7ca8SStefano Zampini ierr = MatDenseGetArrayRead(B,&b);CHKERRQ(ierr); 1488f32d5d43SBarry Smith b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 14891ca9667aSStefano Zampini c1 = c; c2 = c1 + clda; c3 = c2 + clda; c4 = c3 + clda; 14901ca9667aSStefano Zampini for (col=0; col<(cn/4)*4; col += 4) { /* over columns of C */ 14911ca9667aSStefano Zampini for (i=0; i<am; i++) { /* over rows of A in those columns */ 1492f32d5d43SBarry Smith r1 = r2 = r3 = r4 = 0.0; 1493f32d5d43SBarry Smith n = a->i[i+1] - a->i[i]; 1494f32d5d43SBarry Smith aj = a->j + a->i[i]; 1495a4af7ca8SStefano Zampini aa = av + a->i[i]; 1496f32d5d43SBarry Smith for (j=0; j<n; j++) { 14971ca9667aSStefano Zampini const PetscScalar aatmp = aa[j]; 14981ca9667aSStefano Zampini const PetscInt ajtmp = aj[j]; 1499730858b9SHong Zhang r1 += aatmp*b1[ajtmp]; 1500730858b9SHong Zhang r2 += aatmp*b2[ajtmp]; 1501730858b9SHong Zhang r3 += aatmp*b3[ajtmp]; 1502730858b9SHong Zhang r4 += aatmp*b4[ajtmp]; 15039123193aSHong Zhang } 150493aa15f2SStefano Zampini if (add) { 150587753ddeSHong Zhang c1[i] += r1; 150687753ddeSHong Zhang c2[i] += r2; 150787753ddeSHong Zhang c3[i] += r3; 150887753ddeSHong Zhang c4[i] += r4; 150993aa15f2SStefano Zampini } else { 151093aa15f2SStefano Zampini c1[i] = r1; 151193aa15f2SStefano Zampini c2[i] = r2; 151293aa15f2SStefano Zampini c3[i] = r3; 151393aa15f2SStefano Zampini c4[i] = r4; 151493aa15f2SStefano Zampini } 1515f32d5d43SBarry Smith } 1516730858b9SHong Zhang b1 += bm4; b2 += bm4; b3 += bm4; b4 += bm4; 1517730858b9SHong Zhang c1 += am4; c2 += am4; c3 += am4; c4 += am4; 1518f32d5d43SBarry Smith } 151993aa15f2SStefano Zampini /* process remaining columns */ 152093aa15f2SStefano Zampini if (col != cn) { 152193aa15f2SStefano Zampini PetscInt rc = cn-col; 152293aa15f2SStefano Zampini 152393aa15f2SStefano Zampini if (rc == 1) { 152493aa15f2SStefano Zampini for (i=0; i<am; i++) { 1525f32d5d43SBarry Smith r1 = 0.0; 1526f32d5d43SBarry Smith n = a->i[i+1] - a->i[i]; 1527f32d5d43SBarry Smith aj = a->j + a->i[i]; 1528a4af7ca8SStefano Zampini aa = av + a->i[i]; 152993aa15f2SStefano Zampini for (j=0; j<n; j++) r1 += aa[j]*b1[aj[j]]; 153093aa15f2SStefano Zampini if (add) c1[i] += r1; 153193aa15f2SStefano Zampini else c1[i] = r1; 153293aa15f2SStefano Zampini } 153393aa15f2SStefano Zampini } else if (rc == 2) { 153493aa15f2SStefano Zampini for (i=0; i<am; i++) { 153593aa15f2SStefano Zampini r1 = r2 = 0.0; 153693aa15f2SStefano Zampini n = a->i[i+1] - a->i[i]; 153793aa15f2SStefano Zampini aj = a->j + a->i[i]; 153893aa15f2SStefano Zampini aa = av + a->i[i]; 1539f32d5d43SBarry Smith for (j=0; j<n; j++) { 154093aa15f2SStefano Zampini const PetscScalar aatmp = aa[j]; 154193aa15f2SStefano Zampini const PetscInt ajtmp = aj[j]; 154293aa15f2SStefano Zampini r1 += aatmp*b1[ajtmp]; 154393aa15f2SStefano Zampini r2 += aatmp*b2[ajtmp]; 1544f32d5d43SBarry Smith } 154593aa15f2SStefano Zampini if (add) { 154687753ddeSHong Zhang c1[i] += r1; 154793aa15f2SStefano Zampini c2[i] += r2; 154893aa15f2SStefano Zampini } else { 154993aa15f2SStefano Zampini c1[i] = r1; 155093aa15f2SStefano Zampini c2[i] = r2; 1551f32d5d43SBarry Smith } 155293aa15f2SStefano Zampini } 155393aa15f2SStefano Zampini } else { 155493aa15f2SStefano Zampini for (i=0; i<am; i++) { 155593aa15f2SStefano Zampini r1 = r2 = r3 = 0.0; 155693aa15f2SStefano Zampini n = a->i[i+1] - a->i[i]; 155793aa15f2SStefano Zampini aj = a->j + a->i[i]; 155893aa15f2SStefano Zampini aa = av + a->i[i]; 155993aa15f2SStefano Zampini for (j=0; j<n; j++) { 156093aa15f2SStefano Zampini const PetscScalar aatmp = aa[j]; 156193aa15f2SStefano Zampini const PetscInt ajtmp = aj[j]; 156293aa15f2SStefano Zampini r1 += aatmp*b1[ajtmp]; 156393aa15f2SStefano Zampini r2 += aatmp*b2[ajtmp]; 156493aa15f2SStefano Zampini r3 += aatmp*b3[ajtmp]; 156593aa15f2SStefano Zampini } 156693aa15f2SStefano Zampini if (add) { 156793aa15f2SStefano Zampini c1[i] += r1; 156893aa15f2SStefano Zampini c2[i] += r2; 156993aa15f2SStefano Zampini c3[i] += r3; 157093aa15f2SStefano Zampini } else { 157193aa15f2SStefano Zampini c1[i] = r1; 157293aa15f2SStefano Zampini c2[i] = r2; 157393aa15f2SStefano Zampini c3[i] = r3; 157493aa15f2SStefano Zampini } 157593aa15f2SStefano Zampini } 157693aa15f2SStefano Zampini } 1577f32d5d43SBarry Smith } 1578dc0b31edSSatish Balay ierr = PetscLogFlops(cn*(2.0*a->nz));CHKERRQ(ierr); 157993aa15f2SStefano Zampini if (add) { 15808c778c55SBarry Smith ierr = MatDenseRestoreArray(C,&c);CHKERRQ(ierr); 158193aa15f2SStefano Zampini } else { 158293aa15f2SStefano Zampini ierr = MatDenseRestoreArrayWrite(C,&c);CHKERRQ(ierr); 158393aa15f2SStefano Zampini } 1584a4af7ca8SStefano Zampini ierr = MatDenseRestoreArrayRead(B,&b);CHKERRQ(ierr); 1585a4af7ca8SStefano Zampini ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr); 1586f32d5d43SBarry Smith PetscFunctionReturn(0); 1587f32d5d43SBarry Smith } 1588f32d5d43SBarry Smith 158987753ddeSHong Zhang PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqDense(Mat A,Mat B,Mat C) 1590f32d5d43SBarry Smith { 1591f32d5d43SBarry Smith PetscErrorCode ierr; 1592f32d5d43SBarry Smith 1593f32d5d43SBarry Smith PetscFunctionBegin; 159487753ddeSHong Zhang if (B->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %D not equal rows in B %D\n",A->cmap->n,B->rmap->n); 159587753ddeSHong Zhang if (A->rmap->n != C->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in C %D not equal rows in A %D\n",C->rmap->n,A->rmap->n); 159687753ddeSHong Zhang if (B->cmap->n != C->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in B %D not equal columns in C %D\n",B->cmap->n,C->cmap->n); 15974324174eSBarry Smith 159893aa15f2SStefano Zampini ierr = MatMatMultNumericAdd_SeqAIJ_SeqDense(A,B,C,PETSC_FALSE);CHKERRQ(ierr); 15999123193aSHong Zhang PetscFunctionReturn(0); 16009123193aSHong Zhang } 1601b1683b59SHong Zhang 16024222ddf1SHong Zhang /* ------------------------------------------------------- */ 16034222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_AB(Mat C) 16044222ddf1SHong Zhang { 16054222ddf1SHong Zhang PetscFunctionBegin; 16064222ddf1SHong Zhang C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqDense; 16074222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_AB; 16084222ddf1SHong Zhang PetscFunctionReturn(0); 16094222ddf1SHong Zhang } 16104222ddf1SHong Zhang 16116718818eSStefano Zampini PETSC_INTERN PetscErrorCode MatTMatTMultSymbolic_SeqAIJ_SeqDense(Mat,Mat,PetscReal,Mat); 16126718818eSStefano Zampini 16134222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_AtB(Mat C) 16144222ddf1SHong Zhang { 16154222ddf1SHong Zhang PetscFunctionBegin; 16166718818eSStefano Zampini C->ops->transposematmultsymbolic = MatTMatTMultSymbolic_SeqAIJ_SeqDense; 16174222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_AtB; 16186718818eSStefano Zampini PetscFunctionReturn(0); 16196718818eSStefano Zampini } 16206718818eSStefano Zampini 16216718818eSStefano Zampini static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_ABt(Mat C) 16226718818eSStefano Zampini { 16236718818eSStefano Zampini PetscFunctionBegin; 16246718818eSStefano Zampini C->ops->mattransposemultsymbolic = MatTMatTMultSymbolic_SeqAIJ_SeqDense; 16256718818eSStefano Zampini C->ops->productsymbolic = MatProductSymbolic_ABt; 16264222ddf1SHong Zhang PetscFunctionReturn(0); 16274222ddf1SHong Zhang } 16284222ddf1SHong Zhang 16294222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat C) 16304222ddf1SHong Zhang { 16314222ddf1SHong Zhang PetscErrorCode ierr; 16324222ddf1SHong Zhang Mat_Product *product = C->product; 16334222ddf1SHong Zhang 16344222ddf1SHong Zhang PetscFunctionBegin; 16354222ddf1SHong Zhang switch (product->type) { 16364222ddf1SHong Zhang case MATPRODUCT_AB: 16374222ddf1SHong Zhang ierr = MatProductSetFromOptions_SeqAIJ_SeqDense_AB(C);CHKERRQ(ierr); 16384222ddf1SHong Zhang break; 16394222ddf1SHong Zhang case MATPRODUCT_AtB: 16404222ddf1SHong Zhang ierr = MatProductSetFromOptions_SeqAIJ_SeqDense_AtB(C);CHKERRQ(ierr); 16414222ddf1SHong Zhang break; 16426718818eSStefano Zampini case MATPRODUCT_ABt: 16436718818eSStefano Zampini ierr = MatProductSetFromOptions_SeqAIJ_SeqDense_ABt(C);CHKERRQ(ierr); 16444222ddf1SHong Zhang break; 16454222ddf1SHong Zhang default: 16466718818eSStefano Zampini break; 16474222ddf1SHong Zhang } 16484222ddf1SHong Zhang PetscFunctionReturn(0); 16494222ddf1SHong Zhang } 16504222ddf1SHong Zhang /* ------------------------------------------------------- */ 16514222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqXBAIJ_SeqDense_AB(Mat C) 16524222ddf1SHong Zhang { 16534222ddf1SHong Zhang PetscErrorCode ierr; 16544222ddf1SHong Zhang Mat_Product *product = C->product; 16554222ddf1SHong Zhang Mat A = product->A; 16564222ddf1SHong Zhang PetscBool baij; 16574222ddf1SHong Zhang 16584222ddf1SHong Zhang PetscFunctionBegin; 16594222ddf1SHong Zhang ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&baij);CHKERRQ(ierr); 16604222ddf1SHong Zhang if (!baij) { /* A is seqsbaij */ 16614222ddf1SHong Zhang PetscBool sbaij; 16624222ddf1SHong Zhang ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&sbaij);CHKERRQ(ierr); 16634222ddf1SHong Zhang if (!sbaij) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_ARG_WRONGSTATE,"Mat must be either seqbaij or seqsbaij format"); 16644222ddf1SHong Zhang 16654222ddf1SHong Zhang C->ops->matmultsymbolic = MatMatMultSymbolic_SeqSBAIJ_SeqDense; 16664222ddf1SHong Zhang } else { /* A is seqbaij */ 16674222ddf1SHong Zhang C->ops->matmultsymbolic = MatMatMultSymbolic_SeqBAIJ_SeqDense; 16684222ddf1SHong Zhang } 16694222ddf1SHong Zhang 16704222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_AB; 16714222ddf1SHong Zhang PetscFunctionReturn(0); 16724222ddf1SHong Zhang } 16734222ddf1SHong Zhang 16744222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqXBAIJ_SeqDense(Mat C) 16754222ddf1SHong Zhang { 16764222ddf1SHong Zhang PetscErrorCode ierr; 16774222ddf1SHong Zhang Mat_Product *product = C->product; 16784222ddf1SHong Zhang 16794222ddf1SHong Zhang PetscFunctionBegin; 16806718818eSStefano Zampini MatCheckProduct(C,1); 16816718818eSStefano Zampini if (!product->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing A"); 16826718818eSStefano Zampini if (product->type == MATPRODUCT_AB || (product->type == MATPRODUCT_AtB && product->A->symmetric)) { 16834222ddf1SHong Zhang ierr = MatProductSetFromOptions_SeqXBAIJ_SeqDense_AB(C);CHKERRQ(ierr); 16846718818eSStefano Zampini } 16854222ddf1SHong Zhang PetscFunctionReturn(0); 16864222ddf1SHong Zhang } 16876718818eSStefano Zampini 16884222ddf1SHong Zhang /* ------------------------------------------------------- */ 16894222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqDense_SeqAIJ_AB(Mat C) 16904222ddf1SHong Zhang { 16914222ddf1SHong Zhang PetscFunctionBegin; 16924222ddf1SHong Zhang C->ops->matmultsymbolic = MatMatMultSymbolic_SeqDense_SeqAIJ; 16934222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_AB; 16944222ddf1SHong Zhang PetscFunctionReturn(0); 16954222ddf1SHong Zhang } 16964222ddf1SHong Zhang 16974222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqDense_SeqAIJ(Mat C) 16984222ddf1SHong Zhang { 16994222ddf1SHong Zhang PetscErrorCode ierr; 17004222ddf1SHong Zhang Mat_Product *product = C->product; 17014222ddf1SHong Zhang 17024222ddf1SHong Zhang PetscFunctionBegin; 17034222ddf1SHong Zhang if (product->type == MATPRODUCT_AB) { 17044222ddf1SHong Zhang ierr = MatProductSetFromOptions_SeqDense_SeqAIJ_AB(C);CHKERRQ(ierr); 17056718818eSStefano Zampini } 17064222ddf1SHong Zhang PetscFunctionReturn(0); 17074222ddf1SHong Zhang } 17084222ddf1SHong Zhang /* ------------------------------------------------------- */ 17094222ddf1SHong Zhang 1710b9af6bddSHong Zhang PetscErrorCode MatTransColoringApplySpToDen_SeqAIJ(MatTransposeColoring coloring,Mat B,Mat Btdense) 1711c8db22f6SHong Zhang { 1712c8db22f6SHong Zhang PetscErrorCode ierr; 17132f5f1f90SHong Zhang Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 17142f5f1f90SHong Zhang Mat_SeqDense *btdense = (Mat_SeqDense*)Btdense->data; 17152f5f1f90SHong Zhang PetscInt *bi = b->i,*bj=b->j; 17162f5f1f90SHong Zhang PetscInt m = Btdense->rmap->n,n=Btdense->cmap->n,j,k,l,col,anz,*btcol,brow,ncolumns; 17172f5f1f90SHong Zhang MatScalar *btval,*btval_den,*ba=b->a; 17182f5f1f90SHong Zhang PetscInt *columns=coloring->columns,*colorforcol=coloring->colorforcol,ncolors=coloring->ncolors; 1719c8db22f6SHong Zhang 1720c8db22f6SHong Zhang PetscFunctionBegin; 17212f5f1f90SHong Zhang btval_den=btdense->v; 1722580bdb30SBarry Smith ierr = PetscArrayzero(btval_den,m*n);CHKERRQ(ierr); 17232f5f1f90SHong Zhang for (k=0; k<ncolors; k++) { 17242f5f1f90SHong Zhang ncolumns = coloring->ncolumns[k]; 17252f5f1f90SHong Zhang for (l=0; l<ncolumns; l++) { /* insert a row of B to a column of Btdense */ 1726d2853540SHong Zhang col = *(columns + colorforcol[k] + l); 17272f5f1f90SHong Zhang btcol = bj + bi[col]; 17282f5f1f90SHong Zhang btval = ba + bi[col]; 17292f5f1f90SHong Zhang anz = bi[col+1] - bi[col]; 1730d2853540SHong Zhang for (j=0; j<anz; j++) { 17312f5f1f90SHong Zhang brow = btcol[j]; 17322f5f1f90SHong Zhang btval_den[brow] = btval[j]; 1733c8db22f6SHong Zhang } 1734c8db22f6SHong Zhang } 17352f5f1f90SHong Zhang btval_den += m; 1736c8db22f6SHong Zhang } 1737c8db22f6SHong Zhang PetscFunctionReturn(0); 1738c8db22f6SHong Zhang } 1739c8db22f6SHong Zhang 1740b9af6bddSHong Zhang PetscErrorCode MatTransColoringApplyDenToSp_SeqAIJ(MatTransposeColoring matcoloring,Mat Cden,Mat Csp) 17418972f759SHong Zhang { 17428972f759SHong Zhang PetscErrorCode ierr; 1743b2d2b04fSHong Zhang Mat_SeqAIJ *csp = (Mat_SeqAIJ*)Csp->data; 17441683a169SBarry Smith const PetscScalar *ca_den,*ca_den_ptr; 17451683a169SBarry Smith PetscScalar *ca=csp->a; 1746f99a636bSHong Zhang PetscInt k,l,m=Cden->rmap->n,ncolors=matcoloring->ncolors; 1747e88777f2SHong Zhang PetscInt brows=matcoloring->brows,*den2sp=matcoloring->den2sp; 1748077f23c2SHong Zhang PetscInt nrows,*row,*idx; 1749077f23c2SHong Zhang PetscInt *rows=matcoloring->rows,*colorforrow=matcoloring->colorforrow; 17508972f759SHong Zhang 17518972f759SHong Zhang PetscFunctionBegin; 17521683a169SBarry Smith ierr = MatDenseGetArrayRead(Cden,&ca_den);CHKERRQ(ierr); 1753f99a636bSHong Zhang 1754077f23c2SHong Zhang if (brows > 0) { 1755077f23c2SHong Zhang PetscInt *lstart,row_end,row_start; 1756980ae229SHong Zhang lstart = matcoloring->lstart; 1757580bdb30SBarry Smith ierr = PetscArrayzero(lstart,ncolors);CHKERRQ(ierr); 1758eeb4fd02SHong Zhang 1759077f23c2SHong Zhang row_end = brows; 1760eeb4fd02SHong Zhang if (row_end > m) row_end = m; 1761077f23c2SHong Zhang for (row_start=0; row_start<m; row_start+=brows) { /* loop over row blocks of Csp */ 1762077f23c2SHong Zhang ca_den_ptr = ca_den; 1763980ae229SHong Zhang for (k=0; k<ncolors; k++) { /* loop over colors (columns of Cden) */ 1764eeb4fd02SHong Zhang nrows = matcoloring->nrows[k]; 1765eeb4fd02SHong Zhang row = rows + colorforrow[k]; 1766eeb4fd02SHong Zhang idx = den2sp + colorforrow[k]; 1767eeb4fd02SHong Zhang for (l=lstart[k]; l<nrows; l++) { 1768eeb4fd02SHong Zhang if (row[l] >= row_end) { 1769eeb4fd02SHong Zhang lstart[k] = l; 1770eeb4fd02SHong Zhang break; 1771eeb4fd02SHong Zhang } else { 1772077f23c2SHong Zhang ca[idx[l]] = ca_den_ptr[row[l]]; 1773eeb4fd02SHong Zhang } 1774eeb4fd02SHong Zhang } 1775077f23c2SHong Zhang ca_den_ptr += m; 1776eeb4fd02SHong Zhang } 1777077f23c2SHong Zhang row_end += brows; 1778eeb4fd02SHong Zhang if (row_end > m) row_end = m; 1779eeb4fd02SHong Zhang } 1780077f23c2SHong Zhang } else { /* non-blocked impl: loop over columns of Csp - slow if Csp is large */ 1781077f23c2SHong Zhang ca_den_ptr = ca_den; 1782077f23c2SHong Zhang for (k=0; k<ncolors; k++) { 1783077f23c2SHong Zhang nrows = matcoloring->nrows[k]; 1784077f23c2SHong Zhang row = rows + colorforrow[k]; 1785077f23c2SHong Zhang idx = den2sp + colorforrow[k]; 1786077f23c2SHong Zhang for (l=0; l<nrows; l++) { 1787077f23c2SHong Zhang ca[idx[l]] = ca_den_ptr[row[l]]; 1788077f23c2SHong Zhang } 1789077f23c2SHong Zhang ca_den_ptr += m; 1790077f23c2SHong Zhang } 1791f99a636bSHong Zhang } 1792f99a636bSHong Zhang 17931683a169SBarry Smith ierr = MatDenseRestoreArrayRead(Cden,&ca_den);CHKERRQ(ierr); 1794f99a636bSHong Zhang #if defined(PETSC_USE_INFO) 1795077f23c2SHong Zhang if (matcoloring->brows > 0) { 1796f99a636bSHong Zhang ierr = PetscInfo1(Csp,"Loop over %D row blocks for den2sp\n",brows);CHKERRQ(ierr); 1797e88777f2SHong Zhang } else { 1798077f23c2SHong Zhang ierr = PetscInfo(Csp,"Loop over colors/columns of Cden, inefficient for large sparse matrix product \n");CHKERRQ(ierr); 1799e88777f2SHong Zhang } 1800f99a636bSHong Zhang #endif 18018972f759SHong Zhang PetscFunctionReturn(0); 18028972f759SHong Zhang } 18038972f759SHong Zhang 1804b9af6bddSHong Zhang PetscErrorCode MatTransposeColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatTransposeColoring c) 1805b1683b59SHong Zhang { 1806b1683b59SHong Zhang PetscErrorCode ierr; 1807e88777f2SHong Zhang PetscInt i,n,nrows,Nbs,j,k,m,ncols,col,cm; 18081a83f524SJed Brown const PetscInt *is,*ci,*cj,*row_idx; 1809b28d80bdSHong Zhang PetscInt nis = iscoloring->n,*rowhit,bs = 1; 1810b1683b59SHong Zhang IS *isa; 1811b28d80bdSHong Zhang Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 1812e88777f2SHong Zhang PetscInt *colorforrow,*rows,*rows_i,*idxhit,*spidx,*den2sp,*den2sp_i; 1813e88777f2SHong Zhang PetscInt *colorforcol,*columns,*columns_i,brows; 1814e88777f2SHong Zhang PetscBool flg; 1815b1683b59SHong Zhang 1816b1683b59SHong Zhang PetscFunctionBegin; 1817071fcb05SBarry Smith ierr = ISColoringGetIS(iscoloring,PETSC_USE_POINTER,PETSC_IGNORE,&isa);CHKERRQ(ierr); 1818e99be685SHong Zhang 18197ecccc15SHong Zhang /* bs >1 is not being tested yet! */ 1820e88777f2SHong Zhang Nbs = mat->cmap->N/bs; 1821b1683b59SHong Zhang c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 1822e88777f2SHong Zhang c->N = Nbs; 1823e88777f2SHong Zhang c->m = c->M; 1824b1683b59SHong Zhang c->rstart = 0; 1825e88777f2SHong Zhang c->brows = 100; 1826b1683b59SHong Zhang 1827b1683b59SHong Zhang c->ncolors = nis; 1828dcca6d9dSJed Brown ierr = PetscMalloc3(nis,&c->ncolumns,nis,&c->nrows,nis+1,&colorforrow);CHKERRQ(ierr); 1829854ce69bSBarry Smith ierr = PetscMalloc1(csp->nz+1,&rows);CHKERRQ(ierr); 1830854ce69bSBarry Smith ierr = PetscMalloc1(csp->nz+1,&den2sp);CHKERRQ(ierr); 1831e88777f2SHong Zhang 1832e88777f2SHong Zhang brows = c->brows; 1833c5929fdfSBarry Smith ierr = PetscOptionsGetInt(NULL,NULL,"-matden2sp_brows",&brows,&flg);CHKERRQ(ierr); 1834e88777f2SHong Zhang if (flg) c->brows = brows; 1835eeb4fd02SHong Zhang if (brows > 0) { 1836854ce69bSBarry Smith ierr = PetscMalloc1(nis+1,&c->lstart);CHKERRQ(ierr); 1837e88777f2SHong Zhang } 18382205254eSKarl Rupp 1839d2853540SHong Zhang colorforrow[0] = 0; 1840d2853540SHong Zhang rows_i = rows; 1841f99a636bSHong Zhang den2sp_i = den2sp; 1842b1683b59SHong Zhang 1843854ce69bSBarry Smith ierr = PetscMalloc1(nis+1,&colorforcol);CHKERRQ(ierr); 1844854ce69bSBarry Smith ierr = PetscMalloc1(Nbs+1,&columns);CHKERRQ(ierr); 18452205254eSKarl Rupp 1846d2853540SHong Zhang colorforcol[0] = 0; 1847d2853540SHong Zhang columns_i = columns; 1848d2853540SHong Zhang 1849077f23c2SHong Zhang /* get column-wise storage of mat */ 1850077f23c2SHong Zhang ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 1851b1683b59SHong Zhang 1852b28d80bdSHong Zhang cm = c->m; 1853854ce69bSBarry Smith ierr = PetscMalloc1(cm+1,&rowhit);CHKERRQ(ierr); 1854854ce69bSBarry Smith ierr = PetscMalloc1(cm+1,&idxhit);CHKERRQ(ierr); 1855f99a636bSHong Zhang for (i=0; i<nis; i++) { /* loop over color */ 1856b1683b59SHong Zhang ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 1857b1683b59SHong Zhang ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 18582205254eSKarl Rupp 1859b1683b59SHong Zhang c->ncolumns[i] = n; 1860b1683b59SHong Zhang if (n) { 1861580bdb30SBarry Smith ierr = PetscArraycpy(columns_i,is,n);CHKERRQ(ierr); 1862b1683b59SHong Zhang } 1863d2853540SHong Zhang colorforcol[i+1] = colorforcol[i] + n; 1864d2853540SHong Zhang columns_i += n; 1865b1683b59SHong Zhang 1866b1683b59SHong Zhang /* fast, crude version requires O(N*N) work */ 1867580bdb30SBarry Smith ierr = PetscArrayzero(rowhit,cm);CHKERRQ(ierr); 1868e99be685SHong Zhang 1869b7caf3d6SHong Zhang for (j=0; j<n; j++) { /* loop over columns*/ 1870b1683b59SHong Zhang col = is[j]; 1871d2853540SHong Zhang row_idx = cj + ci[col]; 1872b1683b59SHong Zhang m = ci[col+1] - ci[col]; 1873b7caf3d6SHong Zhang for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */ 1874e99be685SHong Zhang idxhit[*row_idx] = spidx[ci[col] + k]; 1875d2853540SHong Zhang rowhit[*row_idx++] = col + 1; 1876b1683b59SHong Zhang } 1877b1683b59SHong Zhang } 1878b1683b59SHong Zhang /* count the number of hits */ 1879b1683b59SHong Zhang nrows = 0; 1880e8354b3eSHong Zhang for (j=0; j<cm; j++) { 1881b1683b59SHong Zhang if (rowhit[j]) nrows++; 1882b1683b59SHong Zhang } 1883b1683b59SHong Zhang c->nrows[i] = nrows; 1884d2853540SHong Zhang colorforrow[i+1] = colorforrow[i] + nrows; 1885d2853540SHong Zhang 1886b1683b59SHong Zhang nrows = 0; 1887b7caf3d6SHong Zhang for (j=0; j<cm; j++) { /* loop over rows */ 1888b1683b59SHong Zhang if (rowhit[j]) { 1889d2853540SHong Zhang rows_i[nrows] = j; 189012b89a43SHong Zhang den2sp_i[nrows] = idxhit[j]; 1891b1683b59SHong Zhang nrows++; 1892b1683b59SHong Zhang } 1893b1683b59SHong Zhang } 1894e88777f2SHong Zhang den2sp_i += nrows; 1895077f23c2SHong Zhang 1896b1683b59SHong Zhang ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 1897f99a636bSHong Zhang rows_i += nrows; 1898b1683b59SHong Zhang } 18990298fd71SBarry Smith ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 1900b28d80bdSHong Zhang ierr = PetscFree(rowhit);CHKERRQ(ierr); 1901071fcb05SBarry Smith ierr = ISColoringRestoreIS(iscoloring,PETSC_USE_POINTER,&isa);CHKERRQ(ierr); 1902d2853540SHong Zhang if (csp->nz != colorforrow[nis]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"csp->nz %d != colorforrow[nis] %d",csp->nz,colorforrow[nis]); 1903b28d80bdSHong Zhang 1904d2853540SHong Zhang c->colorforrow = colorforrow; 1905d2853540SHong Zhang c->rows = rows; 1906f99a636bSHong Zhang c->den2sp = den2sp; 1907d2853540SHong Zhang c->colorforcol = colorforcol; 1908d2853540SHong Zhang c->columns = columns; 19092205254eSKarl Rupp 1910f94ccd6cSHong Zhang ierr = PetscFree(idxhit);CHKERRQ(ierr); 1911b1683b59SHong Zhang PetscFunctionReturn(0); 1912b1683b59SHong Zhang } 1913d013fc79Sandi selinger 19144222ddf1SHong Zhang /* --------------------------------------------------------------- */ 19154222ddf1SHong Zhang static PetscErrorCode MatProductNumeric_AtB_SeqAIJ_SeqAIJ(Mat C) 1916df97dc6dSFande Kong { 19174222ddf1SHong Zhang PetscErrorCode ierr; 19184222ddf1SHong Zhang Mat_Product *product = C->product; 19194222ddf1SHong Zhang Mat A=product->A,B=product->B; 19204222ddf1SHong Zhang 1921df97dc6dSFande Kong PetscFunctionBegin; 19224222ddf1SHong Zhang if (C->ops->mattransposemultnumeric) { 19234222ddf1SHong Zhang /* Alg: "outerproduct" */ 19246718818eSStefano Zampini ierr = (*C->ops->mattransposemultnumeric)(A,B,C);CHKERRQ(ierr); 19254222ddf1SHong Zhang } else { 19264222ddf1SHong Zhang /* Alg: "matmatmult" -- C = At*B */ 19276718818eSStefano Zampini Mat_MatTransMatMult *atb = (Mat_MatTransMatMult *)product->data; 19286718818eSStefano Zampini Mat At; 19294222ddf1SHong Zhang 19306718818eSStefano Zampini if (!atb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing product struct"); 19316718818eSStefano Zampini At = atb->At; 1932*089a957eSStefano Zampini if (atb->updateAt && At) { /* At is computed in MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ() */ 19334222ddf1SHong Zhang ierr = MatTranspose_SeqAIJ(A,MAT_REUSE_MATRIX,&At);CHKERRQ(ierr); 19344222ddf1SHong Zhang } 1935*089a957eSStefano Zampini ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ(At ? At : A,B,C);CHKERRQ(ierr); 19364222ddf1SHong Zhang atb->updateAt = PETSC_TRUE; 19374222ddf1SHong Zhang } 1938df97dc6dSFande Kong PetscFunctionReturn(0); 1939df97dc6dSFande Kong } 1940df97dc6dSFande Kong 19414222ddf1SHong Zhang static PetscErrorCode MatProductSymbolic_AtB_SeqAIJ_SeqAIJ(Mat C) 1942d013fc79Sandi selinger { 1943d013fc79Sandi selinger PetscErrorCode ierr; 19444222ddf1SHong Zhang Mat_Product *product = C->product; 19454222ddf1SHong Zhang Mat A=product->A,B=product->B; 19464222ddf1SHong Zhang PetscReal fill=product->fill; 1947d013fc79Sandi selinger 1948d013fc79Sandi selinger PetscFunctionBegin; 19494222ddf1SHong Zhang ierr = MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 19502869b61bSandi selinger 19514222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJ_SeqAIJ; 19524222ddf1SHong Zhang PetscFunctionReturn(0); 19532869b61bSandi selinger } 1954d013fc79Sandi selinger 19554222ddf1SHong Zhang /* --------------------------------------------------------------- */ 19564222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_AB(Mat C) 19574222ddf1SHong Zhang { 19584222ddf1SHong Zhang PetscErrorCode ierr; 19594222ddf1SHong Zhang Mat_Product *product = C->product; 19604222ddf1SHong Zhang PetscInt alg = 0; /* default algorithm */ 19614222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 19624222ddf1SHong Zhang #if !defined(PETSC_HAVE_HYPRE) 19634222ddf1SHong Zhang const char *algTypes[7] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge"}; 19644222ddf1SHong Zhang PetscInt nalg = 7; 19654222ddf1SHong Zhang #else 19664222ddf1SHong Zhang const char *algTypes[8] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge","hypre"}; 19674222ddf1SHong Zhang PetscInt nalg = 8; 19684222ddf1SHong Zhang #endif 19694222ddf1SHong Zhang 19704222ddf1SHong Zhang PetscFunctionBegin; 19714222ddf1SHong Zhang /* Set default algorithm */ 19724222ddf1SHong Zhang ierr = PetscStrcmp(C->product->alg,"default",&flg);CHKERRQ(ierr); 19734222ddf1SHong Zhang if (flg) { 19744222ddf1SHong Zhang ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 1975d013fc79Sandi selinger } 1976d013fc79Sandi selinger 19774222ddf1SHong Zhang /* Get runtime option */ 19784222ddf1SHong Zhang if (product->api_user) { 19794222ddf1SHong Zhang ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr); 19804222ddf1SHong Zhang ierr = PetscOptionsEList("-matmatmult_via","Algorithmic approach","MatMatMult",algTypes,nalg,algTypes[0],&alg,&flg);CHKERRQ(ierr); 19814222ddf1SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 19824222ddf1SHong Zhang } else { 19834222ddf1SHong Zhang ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr); 19844222ddf1SHong Zhang ierr = PetscOptionsEList("-matproduct_ab_via","Algorithmic approach","MatProduct_AB",algTypes,nalg,algTypes[0],&alg,&flg);CHKERRQ(ierr); 19854222ddf1SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 1986d013fc79Sandi selinger } 19874222ddf1SHong Zhang if (flg) { 19884222ddf1SHong Zhang ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 1989d013fc79Sandi selinger } 1990d013fc79Sandi selinger 19914222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_AB; 19924222ddf1SHong Zhang C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ; 19934222ddf1SHong Zhang PetscFunctionReturn(0); 19944222ddf1SHong Zhang } 1995d013fc79Sandi selinger 19964222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_AtB(Mat C) 19974222ddf1SHong Zhang { 19984222ddf1SHong Zhang PetscErrorCode ierr; 19994222ddf1SHong Zhang Mat_Product *product = C->product; 20004222ddf1SHong Zhang PetscInt alg = 0; /* default algorithm */ 20014222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 2002*089a957eSStefano Zampini const char *algTypes[3] = {"default","at*b","outerproduct"}; 2003*089a957eSStefano Zampini PetscInt nalg = 3; 2004d013fc79Sandi selinger 20054222ddf1SHong Zhang PetscFunctionBegin; 20064222ddf1SHong Zhang /* Get runtime option */ 20074222ddf1SHong Zhang if (product->api_user) { 20084222ddf1SHong Zhang ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr); 20094222ddf1SHong Zhang ierr = PetscOptionsEList("-mattransposematmult_via","Algorithmic approach","MatTransposeMatMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 20104222ddf1SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 20114222ddf1SHong Zhang } else { 20124222ddf1SHong Zhang ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr); 20134222ddf1SHong Zhang ierr = PetscOptionsEList("-matproduct_atb_via","Algorithmic approach","MatProduct_AtB",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 20144222ddf1SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 20154222ddf1SHong Zhang } 20164222ddf1SHong Zhang if (flg) { 20174222ddf1SHong Zhang ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 20184222ddf1SHong Zhang } 2019d013fc79Sandi selinger 20204222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJ_SeqAIJ; 20214222ddf1SHong Zhang PetscFunctionReturn(0); 20224222ddf1SHong Zhang } 20234222ddf1SHong Zhang 20244222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_ABt(Mat C) 20254222ddf1SHong Zhang { 20264222ddf1SHong Zhang PetscErrorCode ierr; 20274222ddf1SHong Zhang Mat_Product *product = C->product; 20284222ddf1SHong Zhang PetscInt alg = 0; /* default algorithm */ 20294222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 20304222ddf1SHong Zhang const char *algTypes[2] = {"default","color"}; 20314222ddf1SHong Zhang PetscInt nalg = 2; 20324222ddf1SHong Zhang 20334222ddf1SHong Zhang PetscFunctionBegin; 20344222ddf1SHong Zhang /* Set default algorithm */ 20354222ddf1SHong Zhang ierr = PetscStrcmp(C->product->alg,"default",&flg);CHKERRQ(ierr); 20364222ddf1SHong Zhang if (!flg) { 20374222ddf1SHong Zhang alg = 1; 20384222ddf1SHong Zhang ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 20394222ddf1SHong Zhang } 20404222ddf1SHong Zhang 20414222ddf1SHong Zhang /* Get runtime option */ 20424222ddf1SHong Zhang if (product->api_user) { 20434222ddf1SHong Zhang ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 20444222ddf1SHong Zhang ierr = PetscOptionsEList("-matmattransmult_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 20454222ddf1SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 20464222ddf1SHong Zhang } else { 20474222ddf1SHong Zhang ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr); 20484222ddf1SHong Zhang ierr = PetscOptionsEList("-matproduct_abt_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 20494222ddf1SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 20504222ddf1SHong Zhang } 20514222ddf1SHong Zhang if (flg) { 20524222ddf1SHong Zhang ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 20534222ddf1SHong Zhang } 20544222ddf1SHong Zhang 20554222ddf1SHong Zhang C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ; 20564222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_ABt; 20574222ddf1SHong Zhang PetscFunctionReturn(0); 20584222ddf1SHong Zhang } 20594222ddf1SHong Zhang 20604222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_PtAP(Mat C) 20614222ddf1SHong Zhang { 20624222ddf1SHong Zhang PetscErrorCode ierr; 20634222ddf1SHong Zhang Mat_Product *product = C->product; 20644222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 20654222ddf1SHong Zhang PetscInt alg = 0; /* default algorithm -- alg=1 should be default!!! */ 20664222ddf1SHong Zhang #if !defined(PETSC_HAVE_HYPRE) 20674222ddf1SHong Zhang const char *algTypes[2] = {"scalable","rap"}; 20684222ddf1SHong Zhang PetscInt nalg = 2; 20694222ddf1SHong Zhang #else 20704222ddf1SHong Zhang const char *algTypes[3] = {"scalable","rap","hypre"}; 20714222ddf1SHong Zhang PetscInt nalg = 3; 20724222ddf1SHong Zhang #endif 20734222ddf1SHong Zhang 20744222ddf1SHong Zhang PetscFunctionBegin; 20754222ddf1SHong Zhang /* Set default algorithm */ 20764222ddf1SHong Zhang ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr); 20774222ddf1SHong Zhang if (flg) { 20784222ddf1SHong Zhang ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 20794222ddf1SHong Zhang } 20804222ddf1SHong Zhang 20814222ddf1SHong Zhang /* Get runtime option */ 20824222ddf1SHong Zhang if (product->api_user) { 20834222ddf1SHong Zhang ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr); 20844222ddf1SHong Zhang ierr = PetscOptionsEList("-matptap_via","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[0],&alg,&flg);CHKERRQ(ierr); 20854222ddf1SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 20864222ddf1SHong Zhang } else { 20874222ddf1SHong Zhang ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr); 20884222ddf1SHong Zhang ierr = PetscOptionsEList("-matproduct_ptap_via","Algorithmic approach","MatProduct_PtAP",algTypes,nalg,algTypes[0],&alg,&flg);CHKERRQ(ierr); 20894222ddf1SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 20904222ddf1SHong Zhang } 20914222ddf1SHong Zhang if (flg) { 20924222ddf1SHong Zhang ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 20934222ddf1SHong Zhang } 20944222ddf1SHong Zhang 20954222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqAIJ; 20964222ddf1SHong Zhang PetscFunctionReturn(0); 20974222ddf1SHong Zhang } 20984222ddf1SHong Zhang 20994222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_RARt(Mat C) 21004222ddf1SHong Zhang { 21014222ddf1SHong Zhang PetscErrorCode ierr; 21024222ddf1SHong Zhang Mat_Product *product = C->product; 21034222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 21044222ddf1SHong Zhang PetscInt alg = 0; /* default algorithm */ 21054222ddf1SHong Zhang const char *algTypes[3] = {"r*a*rt","r*art","coloring_rart"}; 21064222ddf1SHong Zhang PetscInt nalg = 3; 21074222ddf1SHong Zhang 21084222ddf1SHong Zhang PetscFunctionBegin; 21094222ddf1SHong Zhang /* Set default algorithm */ 21104222ddf1SHong Zhang ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr); 21114222ddf1SHong Zhang if (flg) { 21124222ddf1SHong Zhang ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 21134222ddf1SHong Zhang } 21144222ddf1SHong Zhang 21154222ddf1SHong Zhang /* Get runtime option */ 21164222ddf1SHong Zhang if (product->api_user) { 21174222ddf1SHong Zhang ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatRARt","Mat");CHKERRQ(ierr); 21184222ddf1SHong Zhang ierr = PetscOptionsEList("-matrart_via","Algorithmic approach","MatRARt",algTypes,nalg,algTypes[0],&alg,&flg);CHKERRQ(ierr); 21194222ddf1SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 21204222ddf1SHong Zhang } else { 21214222ddf1SHong Zhang ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_RARt","Mat");CHKERRQ(ierr); 21224222ddf1SHong Zhang ierr = PetscOptionsEList("-matproduct_rart_via","Algorithmic approach","MatProduct_RARt",algTypes,nalg,algTypes[0],&alg,&flg);CHKERRQ(ierr); 21234222ddf1SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 21244222ddf1SHong Zhang } 21254222ddf1SHong Zhang if (flg) { 21264222ddf1SHong Zhang ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 21274222ddf1SHong Zhang } 21284222ddf1SHong Zhang 21294222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_RARt_SeqAIJ_SeqAIJ; 21304222ddf1SHong Zhang PetscFunctionReturn(0); 21314222ddf1SHong Zhang } 21324222ddf1SHong Zhang 21334222ddf1SHong Zhang /* ABC = A*B*C = A*(B*C); ABC's algorithm must be chosen from AB's algorithm */ 21344222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_SeqAIJ_ABC(Mat C) 21354222ddf1SHong Zhang { 21364222ddf1SHong Zhang PetscErrorCode ierr; 21374222ddf1SHong Zhang Mat_Product *product = C->product; 21384222ddf1SHong Zhang PetscInt alg = 0; /* default algorithm */ 21394222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 21404222ddf1SHong Zhang const char *algTypes[7] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge"}; 21414222ddf1SHong Zhang PetscInt nalg = 7; 21424222ddf1SHong Zhang 21434222ddf1SHong Zhang PetscFunctionBegin; 21444222ddf1SHong Zhang /* Set default algorithm */ 21454222ddf1SHong Zhang ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr); 21464222ddf1SHong Zhang if (flg) { 21474222ddf1SHong Zhang ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 21484222ddf1SHong Zhang } 21494222ddf1SHong Zhang 21504222ddf1SHong Zhang /* Get runtime option */ 21514222ddf1SHong Zhang if (product->api_user) { 21524222ddf1SHong Zhang ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMatMult","Mat");CHKERRQ(ierr); 21534222ddf1SHong Zhang ierr = PetscOptionsEList("-matmatmatmult_via","Algorithmic approach","MatMatMatMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 21544222ddf1SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 21554222ddf1SHong Zhang } else { 21564222ddf1SHong Zhang ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABC","Mat");CHKERRQ(ierr); 21574222ddf1SHong Zhang ierr = PetscOptionsEList("-matproduct_abc_via","Algorithmic approach","MatProduct_ABC",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 21584222ddf1SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 21594222ddf1SHong Zhang } 21604222ddf1SHong Zhang if (flg) { 21614222ddf1SHong Zhang ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 21624222ddf1SHong Zhang } 21634222ddf1SHong Zhang 21644222ddf1SHong Zhang C->ops->matmatmultsymbolic = MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ; 21654222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_ABC; 21664222ddf1SHong Zhang PetscFunctionReturn(0); 21674222ddf1SHong Zhang } 21684222ddf1SHong Zhang 21694222ddf1SHong Zhang PetscErrorCode MatProductSetFromOptions_SeqAIJ(Mat C) 21704222ddf1SHong Zhang { 21714222ddf1SHong Zhang PetscErrorCode ierr; 21724222ddf1SHong Zhang Mat_Product *product = C->product; 21734222ddf1SHong Zhang 21744222ddf1SHong Zhang PetscFunctionBegin; 21754222ddf1SHong Zhang switch (product->type) { 21764222ddf1SHong Zhang case MATPRODUCT_AB: 21774222ddf1SHong Zhang ierr = MatProductSetFromOptions_SeqAIJ_AB(C);CHKERRQ(ierr); 21784222ddf1SHong Zhang break; 21794222ddf1SHong Zhang case MATPRODUCT_AtB: 21804222ddf1SHong Zhang ierr = MatProductSetFromOptions_SeqAIJ_AtB(C);CHKERRQ(ierr); 21814222ddf1SHong Zhang break; 21824222ddf1SHong Zhang case MATPRODUCT_ABt: 21834222ddf1SHong Zhang ierr = MatProductSetFromOptions_SeqAIJ_ABt(C);CHKERRQ(ierr); 21844222ddf1SHong Zhang break; 21854222ddf1SHong Zhang case MATPRODUCT_PtAP: 21864222ddf1SHong Zhang ierr = MatProductSetFromOptions_SeqAIJ_PtAP(C);CHKERRQ(ierr); 21874222ddf1SHong Zhang break; 21884222ddf1SHong Zhang case MATPRODUCT_RARt: 21894222ddf1SHong Zhang ierr = MatProductSetFromOptions_SeqAIJ_RARt(C);CHKERRQ(ierr); 21904222ddf1SHong Zhang break; 21914222ddf1SHong Zhang case MATPRODUCT_ABC: 21924222ddf1SHong Zhang ierr = MatProductSetFromOptions_SeqAIJ_ABC(C);CHKERRQ(ierr); 21934222ddf1SHong Zhang break; 21946718818eSStefano Zampini default: 21956718818eSStefano Zampini break; 21964222ddf1SHong Zhang } 2197d013fc79Sandi selinger PetscFunctionReturn(0); 2198d013fc79Sandi selinger } 2199