1 2 /* 3 Defines matrix-matrix product routines for pairs of SeqAIJ matrices 4 C = A * B 5 */ 6 7 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/utils/freespace.h> 9 #include <petscbt.h> 10 #include <petsc/private/isimpl.h> 11 #include <../src/mat/impls/dense/seq/dense.h> 12 13 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 14 { 15 PetscFunctionBegin; 16 if (C->ops->matmultnumeric) { 17 PetscCall((*C->ops->matmultnumeric)(A,B,C)); 18 } else { 19 PetscCall(MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted(A,B,C)); 20 } 21 PetscFunctionReturn(0); 22 } 23 24 /* Modified from MatCreateSeqAIJWithArrays() */ 25 PETSC_INTERN PetscErrorCode MatSetSeqAIJWithArrays_private(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],MatType mtype,Mat mat) 26 { 27 PetscInt ii; 28 Mat_SeqAIJ *aij; 29 PetscBool isseqaij, osingle, ofree_a, ofree_ij; 30 31 PetscFunctionBegin; 32 PetscCheck(m <= 0 || !i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 33 PetscCall(MatSetSizes(mat,m,n,m,n)); 34 35 if (!mtype) { 36 PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat,MATSEQAIJ,&isseqaij)); 37 if (!isseqaij) PetscCall(MatSetType(mat,MATSEQAIJ)); 38 } else { 39 PetscCall(MatSetType(mat,mtype)); 40 } 41 42 aij = (Mat_SeqAIJ*)(mat)->data; 43 osingle = aij->singlemalloc; 44 ofree_a = aij->free_a; 45 ofree_ij = aij->free_ij; 46 /* changes the free flags */ 47 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(mat,MAT_SKIP_ALLOCATION,NULL)); 48 49 PetscCall(PetscFree(aij->ilen)); 50 PetscCall(PetscFree(aij->imax)); 51 PetscCall(PetscMalloc1(m,&aij->imax)); 52 PetscCall(PetscMalloc1(m,&aij->ilen)); 53 for (ii=0,aij->nonzerorowcnt=0,aij->rmax = 0; ii<m; ii++) { 54 const PetscInt rnz = i[ii+1] - i[ii]; 55 aij->nonzerorowcnt += !!rnz; 56 aij->rmax = PetscMax(aij->rmax,rnz); 57 aij->ilen[ii] = aij->imax[ii] = i[ii+1] - i[ii]; 58 } 59 aij->maxnz = i[m]; 60 aij->nz = i[m]; 61 62 if (osingle) { 63 PetscCall(PetscFree3(aij->a,aij->j,aij->i)); 64 } else { 65 if (ofree_a) PetscCall(PetscFree(aij->a)); 66 if (ofree_ij) PetscCall(PetscFree(aij->j)); 67 if (ofree_ij) PetscCall(PetscFree(aij->i)); 68 } 69 aij->i = i; 70 aij->j = j; 71 aij->a = a; 72 aij->nonew = -1; /* this indicates that inserting a new value in the matrix that generates a new nonzero is an error */ 73 /* default to not retain ownership */ 74 aij->singlemalloc = PETSC_FALSE; 75 aij->free_a = PETSC_FALSE; 76 aij->free_ij = PETSC_FALSE; 77 PetscCall(MatCheckCompressedRow(mat,aij->nonzerorowcnt,&aij->compressedrow,aij->i,m,0.6)); 78 PetscFunctionReturn(0); 79 } 80 81 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C) 82 { 83 Mat_Product *product = C->product; 84 MatProductAlgorithm alg; 85 PetscBool flg; 86 87 PetscFunctionBegin; 88 if (product) { 89 alg = product->alg; 90 } else { 91 alg = "sorted"; 92 } 93 /* sorted */ 94 PetscCall(PetscStrcmp(alg,"sorted",&flg)); 95 if (flg) { 96 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_Sorted(A,B,fill,C)); 97 PetscFunctionReturn(0); 98 } 99 100 /* scalable */ 101 PetscCall(PetscStrcmp(alg,"scalable",&flg)); 102 if (flg) { 103 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(A,B,fill,C)); 104 PetscFunctionReturn(0); 105 } 106 107 /* scalable_fast */ 108 PetscCall(PetscStrcmp(alg,"scalable_fast",&flg)); 109 if (flg) { 110 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(A,B,fill,C)); 111 PetscFunctionReturn(0); 112 } 113 114 /* heap */ 115 PetscCall(PetscStrcmp(alg,"heap",&flg)); 116 if (flg) { 117 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(A,B,fill,C)); 118 PetscFunctionReturn(0); 119 } 120 121 /* btheap */ 122 PetscCall(PetscStrcmp(alg,"btheap",&flg)); 123 if (flg) { 124 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(A,B,fill,C)); 125 PetscFunctionReturn(0); 126 } 127 128 /* llcondensed */ 129 PetscCall(PetscStrcmp(alg,"llcondensed",&flg)); 130 if (flg) { 131 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(A,B,fill,C)); 132 PetscFunctionReturn(0); 133 } 134 135 /* rowmerge */ 136 PetscCall(PetscStrcmp(alg,"rowmerge",&flg)); 137 if (flg) { 138 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_RowMerge(A,B,fill,C)); 139 PetscFunctionReturn(0); 140 } 141 142 #if defined(PETSC_HAVE_HYPRE) 143 PetscCall(PetscStrcmp(alg,"hypre",&flg)); 144 if (flg) { 145 PetscCall(MatMatMultSymbolic_AIJ_AIJ_wHYPRE(A,B,fill,C)); 146 PetscFunctionReturn(0); 147 } 148 #endif 149 150 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product Algorithm is not supported"); 151 } 152 153 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(Mat A,Mat B,PetscReal fill,Mat C) 154 { 155 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 156 PetscInt *ai=a->i,*bi=b->i,*ci,*cj; 157 PetscInt am =A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 158 PetscReal afill; 159 PetscInt i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax; 160 PetscTable ta; 161 PetscBT lnkbt; 162 PetscFreeSpaceList free_space=NULL,current_space=NULL; 163 164 PetscFunctionBegin; 165 /* Get ci and cj */ 166 /*---------------*/ 167 /* Allocate ci array, arrays for fill computation and */ 168 /* free space for accumulating nonzero column info */ 169 PetscCall(PetscMalloc1(am+2,&ci)); 170 ci[0] = 0; 171 172 /* create and initialize a linked list */ 173 PetscCall(PetscTableCreate(bn,bn,&ta)); 174 MatRowMergeMax_SeqAIJ(b,bm,ta); 175 PetscCall(PetscTableGetCount(ta,&Crmax)); 176 PetscCall(PetscTableDestroy(&ta)); 177 178 PetscCall(PetscLLCondensedCreate(Crmax,bn,&lnk,&lnkbt)); 179 180 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 181 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space)); 182 183 current_space = free_space; 184 185 /* Determine ci and cj */ 186 for (i=0; i<am; i++) { 187 anzi = ai[i+1] - ai[i]; 188 aj = a->j + ai[i]; 189 for (j=0; j<anzi; j++) { 190 brow = aj[j]; 191 bnzj = bi[brow+1] - bi[brow]; 192 bj = b->j + bi[brow]; 193 /* add non-zero cols of B into the sorted linked list lnk */ 194 PetscCall(PetscLLCondensedAddSorted(bnzj,bj,lnk,lnkbt)); 195 } 196 /* add possible missing diagonal entry */ 197 if (C->force_diagonals) { 198 PetscCall(PetscLLCondensedAddSorted(1,&i,lnk,lnkbt)); 199 } 200 cnzi = lnk[0]; 201 202 /* If free space is not available, make more free space */ 203 /* Double the amount of total space in the list */ 204 if (current_space->local_remaining<cnzi) { 205 PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space)); 206 ndouble++; 207 } 208 209 /* Copy data into free space, then initialize lnk */ 210 PetscCall(PetscLLCondensedClean(bn,cnzi,current_space->array,lnk,lnkbt)); 211 212 current_space->array += cnzi; 213 current_space->local_used += cnzi; 214 current_space->local_remaining -= cnzi; 215 216 ci[i+1] = ci[i] + cnzi; 217 } 218 219 /* Column indices are in the list of free space */ 220 /* Allocate space for cj, initialize cj, and */ 221 /* destroy list of free space and other temporary array(s) */ 222 PetscCall(PetscMalloc1(ci[am]+1,&cj)); 223 PetscCall(PetscFreeSpaceContiguous(&free_space,cj)); 224 PetscCall(PetscLLCondensedDestroy(lnk,lnkbt)); 225 226 /* put together the new symbolic matrix */ 227 PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C)); 228 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 229 230 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 231 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 232 c = (Mat_SeqAIJ*)(C->data); 233 c->free_a = PETSC_FALSE; 234 c->free_ij = PETSC_TRUE; 235 c->nonew = 0; 236 237 /* fast, needs non-scalable O(bn) array 'abdense' */ 238 C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 239 240 /* set MatInfo */ 241 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 242 if (afill < 1.0) afill = 1.0; 243 C->info.mallocs = ndouble; 244 C->info.fill_ratio_given = fill; 245 C->info.fill_ratio_needed = afill; 246 247 #if defined(PETSC_USE_INFO) 248 if (ci[am]) { 249 PetscCall(PetscInfo(C,"Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill)); 250 PetscCall(PetscInfo(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill)); 251 } else { 252 PetscCall(PetscInfo(C,"Empty matrix product\n")); 253 } 254 #endif 255 PetscFunctionReturn(0); 256 } 257 258 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted(Mat A,Mat B,Mat C) 259 { 260 PetscLogDouble flops=0.0; 261 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 262 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 263 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 264 PetscInt *ai =a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 265 PetscInt am =A->rmap->n,cm=C->rmap->n; 266 PetscInt i,j,k,anzi,bnzi,cnzi,brow; 267 PetscScalar *ca,valtmp; 268 PetscScalar *ab_dense; 269 PetscContainer cab_dense; 270 const PetscScalar *aa,*ba,*baj; 271 272 PetscFunctionBegin; 273 PetscCall(MatSeqAIJGetArrayRead(A,&aa)); 274 PetscCall(MatSeqAIJGetArrayRead(B,&ba)); 275 if (!c->a) { /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */ 276 PetscCall(PetscMalloc1(ci[cm]+1,&ca)); 277 c->a = ca; 278 c->free_a = PETSC_TRUE; 279 } else ca = c->a; 280 281 /* TODO this should be done in the symbolic phase */ 282 /* However, this function is so heavily used (sometimes in an hidden way through multnumeric function pointers 283 that is hard to eradicate) */ 284 PetscCall(PetscObjectQuery((PetscObject)C,"__PETSc__ab_dense",(PetscObject*)&cab_dense)); 285 if (!cab_dense) { 286 PetscCall(PetscMalloc1(B->cmap->N,&ab_dense)); 287 PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&cab_dense)); 288 PetscCall(PetscContainerSetPointer(cab_dense,ab_dense)); 289 PetscCall(PetscContainerSetUserDestroy(cab_dense,PetscContainerUserDestroyDefault)); 290 PetscCall(PetscObjectCompose((PetscObject)C,"__PETSc__ab_dense",(PetscObject)cab_dense)); 291 PetscCall(PetscObjectDereference((PetscObject)cab_dense)); 292 } 293 PetscCall(PetscContainerGetPointer(cab_dense,(void**)&ab_dense)); 294 PetscCall(PetscArrayzero(ab_dense,B->cmap->N)); 295 296 /* clean old values in C */ 297 PetscCall(PetscArrayzero(ca,ci[cm])); 298 /* Traverse A row-wise. */ 299 /* Build the ith row in C by summing over nonzero columns in A, */ 300 /* the rows of B corresponding to nonzeros of A. */ 301 for (i=0; i<am; i++) { 302 anzi = ai[i+1] - ai[i]; 303 for (j=0; j<anzi; j++) { 304 brow = aj[j]; 305 bnzi = bi[brow+1] - bi[brow]; 306 bjj = bj + bi[brow]; 307 baj = ba + bi[brow]; 308 /* perform dense axpy */ 309 valtmp = aa[j]; 310 for (k=0; k<bnzi; k++) { 311 ab_dense[bjj[k]] += valtmp*baj[k]; 312 } 313 flops += 2*bnzi; 314 } 315 aj += anzi; aa += anzi; 316 317 cnzi = ci[i+1] - ci[i]; 318 for (k=0; k<cnzi; k++) { 319 ca[k] += ab_dense[cj[k]]; 320 ab_dense[cj[k]] = 0.0; /* zero ab_dense */ 321 } 322 flops += cnzi; 323 cj += cnzi; ca += cnzi; 324 } 325 #if defined(PETSC_HAVE_DEVICE) 326 if (C->offloadmask != PETSC_OFFLOAD_UNALLOCATED) C->offloadmask = PETSC_OFFLOAD_CPU; 327 #endif 328 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 329 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 330 PetscCall(PetscLogFlops(flops)); 331 PetscCall(MatSeqAIJRestoreArrayRead(A,&aa)); 332 PetscCall(MatSeqAIJRestoreArrayRead(B,&ba)); 333 PetscFunctionReturn(0); 334 } 335 336 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,Mat C) 337 { 338 PetscLogDouble flops=0.0; 339 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 340 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 341 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 342 PetscInt *ai = a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 343 PetscInt am = A->rmap->N,cm=C->rmap->N; 344 PetscInt i,j,k,anzi,bnzi,cnzi,brow; 345 PetscScalar *ca=c->a,valtmp; 346 const PetscScalar *aa,*ba,*baj; 347 PetscInt nextb; 348 349 PetscFunctionBegin; 350 PetscCall(MatSeqAIJGetArrayRead(A,&aa)); 351 PetscCall(MatSeqAIJGetArrayRead(B,&ba)); 352 if (!ca) { /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */ 353 PetscCall(PetscMalloc1(ci[cm]+1,&ca)); 354 c->a = ca; 355 c->free_a = PETSC_TRUE; 356 } 357 358 /* clean old values in C */ 359 PetscCall(PetscArrayzero(ca,ci[cm])); 360 /* Traverse A row-wise. */ 361 /* Build the ith row in C by summing over nonzero columns in A, */ 362 /* the rows of B corresponding to nonzeros of A. */ 363 for (i=0; i<am; i++) { 364 anzi = ai[i+1] - ai[i]; 365 cnzi = ci[i+1] - ci[i]; 366 for (j=0; j<anzi; j++) { 367 brow = aj[j]; 368 bnzi = bi[brow+1] - bi[brow]; 369 bjj = bj + bi[brow]; 370 baj = ba + bi[brow]; 371 /* perform sparse axpy */ 372 valtmp = aa[j]; 373 nextb = 0; 374 for (k=0; nextb<bnzi; k++) { 375 if (cj[k] == bjj[nextb]) { /* ccol == bcol */ 376 ca[k] += valtmp*baj[nextb++]; 377 } 378 } 379 flops += 2*bnzi; 380 } 381 aj += anzi; aa += anzi; 382 cj += cnzi; ca += cnzi; 383 } 384 #if defined(PETSC_HAVE_DEVICE) 385 if (C->offloadmask != PETSC_OFFLOAD_UNALLOCATED) C->offloadmask = PETSC_OFFLOAD_CPU; 386 #endif 387 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 388 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 389 PetscCall(PetscLogFlops(flops)); 390 PetscCall(MatSeqAIJRestoreArrayRead(A,&aa)); 391 PetscCall(MatSeqAIJRestoreArrayRead(B,&ba)); 392 PetscFunctionReturn(0); 393 } 394 395 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(Mat A,Mat B,PetscReal fill,Mat C) 396 { 397 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 398 PetscInt *ai = a->i,*bi=b->i,*ci,*cj; 399 PetscInt am = A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 400 MatScalar *ca; 401 PetscReal afill; 402 PetscInt i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax; 403 PetscTable ta; 404 PetscFreeSpaceList free_space=NULL,current_space=NULL; 405 406 PetscFunctionBegin; 407 /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_fast() */ 408 /*-----------------------------------------------------------------------------------------*/ 409 /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 410 PetscCall(PetscMalloc1(am+2,&ci)); 411 ci[0] = 0; 412 413 /* create and initialize a linked list */ 414 PetscCall(PetscTableCreate(bn,bn,&ta)); 415 MatRowMergeMax_SeqAIJ(b,bm,ta); 416 PetscCall(PetscTableGetCount(ta,&Crmax)); 417 PetscCall(PetscTableDestroy(&ta)); 418 419 PetscCall(PetscLLCondensedCreate_fast(Crmax,&lnk)); 420 421 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 422 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space)); 423 current_space = free_space; 424 425 /* Determine ci and cj */ 426 for (i=0; i<am; i++) { 427 anzi = ai[i+1] - ai[i]; 428 aj = a->j + ai[i]; 429 for (j=0; j<anzi; j++) { 430 brow = aj[j]; 431 bnzj = bi[brow+1] - bi[brow]; 432 bj = b->j + bi[brow]; 433 /* add non-zero cols of B into the sorted linked list lnk */ 434 PetscCall(PetscLLCondensedAddSorted_fast(bnzj,bj,lnk)); 435 } 436 /* add possible missing diagonal entry */ 437 if (C->force_diagonals) { 438 PetscCall(PetscLLCondensedAddSorted_fast(1,&i,lnk)); 439 } 440 cnzi = lnk[1]; 441 442 /* If free space is not available, make more free space */ 443 /* Double the amount of total space in the list */ 444 if (current_space->local_remaining<cnzi) { 445 PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space)); 446 ndouble++; 447 } 448 449 /* Copy data into free space, then initialize lnk */ 450 PetscCall(PetscLLCondensedClean_fast(cnzi,current_space->array,lnk)); 451 452 current_space->array += cnzi; 453 current_space->local_used += cnzi; 454 current_space->local_remaining -= cnzi; 455 456 ci[i+1] = ci[i] + cnzi; 457 } 458 459 /* Column indices are in the list of free space */ 460 /* Allocate space for cj, initialize cj, and */ 461 /* destroy list of free space and other temporary array(s) */ 462 PetscCall(PetscMalloc1(ci[am]+1,&cj)); 463 PetscCall(PetscFreeSpaceContiguous(&free_space,cj)); 464 PetscCall(PetscLLCondensedDestroy_fast(lnk)); 465 466 /* Allocate space for ca */ 467 PetscCall(PetscCalloc1(ci[am]+1,&ca)); 468 469 /* put together the new symbolic matrix */ 470 PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,ca,((PetscObject)A)->type_name,C)); 471 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 472 473 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 474 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 475 c = (Mat_SeqAIJ*)(C->data); 476 c->free_a = PETSC_TRUE; 477 c->free_ij = PETSC_TRUE; 478 c->nonew = 0; 479 480 /* slower, less memory */ 481 C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; 482 483 /* set MatInfo */ 484 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 485 if (afill < 1.0) afill = 1.0; 486 C->info.mallocs = ndouble; 487 C->info.fill_ratio_given = fill; 488 C->info.fill_ratio_needed = afill; 489 490 #if defined(PETSC_USE_INFO) 491 if (ci[am]) { 492 PetscCall(PetscInfo(C,"Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill)); 493 PetscCall(PetscInfo(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill)); 494 } else { 495 PetscCall(PetscInfo(C,"Empty matrix product\n")); 496 } 497 #endif 498 PetscFunctionReturn(0); 499 } 500 501 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,PetscReal fill,Mat C) 502 { 503 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 504 PetscInt *ai = a->i,*bi=b->i,*ci,*cj; 505 PetscInt am = A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 506 MatScalar *ca; 507 PetscReal afill; 508 PetscInt i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax; 509 PetscTable ta; 510 PetscFreeSpaceList free_space=NULL,current_space=NULL; 511 512 PetscFunctionBegin; 513 /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_Scalalbe() */ 514 /*---------------------------------------------------------------------------------------------*/ 515 /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 516 PetscCall(PetscMalloc1(am+2,&ci)); 517 ci[0] = 0; 518 519 /* create and initialize a linked list */ 520 PetscCall(PetscTableCreate(bn,bn,&ta)); 521 MatRowMergeMax_SeqAIJ(b,bm,ta); 522 PetscCall(PetscTableGetCount(ta,&Crmax)); 523 PetscCall(PetscTableDestroy(&ta)); 524 PetscCall(PetscLLCondensedCreate_Scalable(Crmax,&lnk)); 525 526 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 527 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space)); 528 current_space = free_space; 529 530 /* Determine ci and cj */ 531 for (i=0; i<am; i++) { 532 anzi = ai[i+1] - ai[i]; 533 aj = a->j + ai[i]; 534 for (j=0; j<anzi; j++) { 535 brow = aj[j]; 536 bnzj = bi[brow+1] - bi[brow]; 537 bj = b->j + bi[brow]; 538 /* add non-zero cols of B into the sorted linked list lnk */ 539 PetscCall(PetscLLCondensedAddSorted_Scalable(bnzj,bj,lnk)); 540 } 541 /* add possible missing diagonal entry */ 542 if (C->force_diagonals) { 543 PetscCall(PetscLLCondensedAddSorted_Scalable(1,&i,lnk)); 544 } 545 546 cnzi = lnk[0]; 547 548 /* If free space is not available, make more free space */ 549 /* Double the amount of total space in the list */ 550 if (current_space->local_remaining<cnzi) { 551 PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space)); 552 ndouble++; 553 } 554 555 /* Copy data into free space, then initialize lnk */ 556 PetscCall(PetscLLCondensedClean_Scalable(cnzi,current_space->array,lnk)); 557 558 current_space->array += cnzi; 559 current_space->local_used += cnzi; 560 current_space->local_remaining -= cnzi; 561 562 ci[i+1] = ci[i] + cnzi; 563 } 564 565 /* Column indices are in the list of free space */ 566 /* Allocate space for cj, initialize cj, and */ 567 /* destroy list of free space and other temporary array(s) */ 568 PetscCall(PetscMalloc1(ci[am]+1,&cj)); 569 PetscCall(PetscFreeSpaceContiguous(&free_space,cj)); 570 PetscCall(PetscLLCondensedDestroy_Scalable(lnk)); 571 572 /* Allocate space for ca */ 573 /*-----------------------*/ 574 PetscCall(PetscCalloc1(ci[am]+1,&ca)); 575 576 /* put together the new symbolic matrix */ 577 PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,ca,((PetscObject)A)->type_name,C)); 578 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 579 580 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 581 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 582 c = (Mat_SeqAIJ*)(C->data); 583 c->free_a = PETSC_TRUE; 584 c->free_ij = PETSC_TRUE; 585 c->nonew = 0; 586 587 /* slower, less memory */ 588 C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; 589 590 /* set MatInfo */ 591 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 592 if (afill < 1.0) afill = 1.0; 593 C->info.mallocs = ndouble; 594 C->info.fill_ratio_given = fill; 595 C->info.fill_ratio_needed = afill; 596 597 #if defined(PETSC_USE_INFO) 598 if (ci[am]) { 599 PetscCall(PetscInfo(C,"Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill)); 600 PetscCall(PetscInfo(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill)); 601 } else { 602 PetscCall(PetscInfo(C,"Empty matrix product\n")); 603 } 604 #endif 605 PetscFunctionReturn(0); 606 } 607 608 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(Mat A,Mat B,PetscReal fill,Mat C) 609 { 610 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 611 const PetscInt *ai=a->i,*bi=b->i,*aj=a->j,*bj=b->j; 612 PetscInt *ci,*cj,*bb; 613 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 614 PetscReal afill; 615 PetscInt i,j,col,ndouble = 0; 616 PetscFreeSpaceList free_space=NULL,current_space=NULL; 617 PetscHeap h; 618 619 PetscFunctionBegin; 620 /* Get ci and cj - by merging sorted rows using a heap */ 621 /*---------------------------------------------------------------------------------------------*/ 622 /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 623 PetscCall(PetscMalloc1(am+2,&ci)); 624 ci[0] = 0; 625 626 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 627 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space)); 628 current_space = free_space; 629 630 PetscCall(PetscHeapCreate(a->rmax,&h)); 631 PetscCall(PetscMalloc1(a->rmax,&bb)); 632 633 /* Determine ci and cj */ 634 for (i=0; i<am; i++) { 635 const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 636 const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 637 ci[i+1] = ci[i]; 638 /* Populate the min heap */ 639 for (j=0; j<anzi; j++) { 640 bb[j] = bi[acol[j]]; /* bb points at the start of the row */ 641 if (bb[j] < bi[acol[j]+1]) { /* Add if row is nonempty */ 642 PetscCall(PetscHeapAdd(h,j,bj[bb[j]++])); 643 } 644 } 645 /* Pick off the min element, adding it to free space */ 646 PetscCall(PetscHeapPop(h,&j,&col)); 647 while (j >= 0) { 648 if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */ 649 PetscCall(PetscFreeSpaceGet(PetscMin(PetscIntMultTruncate(2,current_space->total_array_size),16 << 20),¤t_space)); 650 ndouble++; 651 } 652 *(current_space->array++) = col; 653 current_space->local_used++; 654 current_space->local_remaining--; 655 ci[i+1]++; 656 657 /* stash if anything else remains in this row of B */ 658 if (bb[j] < bi[acol[j]+1]) PetscCall(PetscHeapStash(h,j,bj[bb[j]++])); 659 while (1) { /* pop and stash any other rows of B that also had an entry in this column */ 660 PetscInt j2,col2; 661 PetscCall(PetscHeapPeek(h,&j2,&col2)); 662 if (col2 != col) break; 663 PetscCall(PetscHeapPop(h,&j2,&col2)); 664 if (bb[j2] < bi[acol[j2]+1]) PetscCall(PetscHeapStash(h,j2,bj[bb[j2]++])); 665 } 666 /* Put any stashed elements back into the min heap */ 667 PetscCall(PetscHeapUnstash(h)); 668 PetscCall(PetscHeapPop(h,&j,&col)); 669 } 670 } 671 PetscCall(PetscFree(bb)); 672 PetscCall(PetscHeapDestroy(&h)); 673 674 /* Column indices are in the list of free space */ 675 /* Allocate space for cj, initialize cj, and */ 676 /* destroy list of free space and other temporary array(s) */ 677 PetscCall(PetscMalloc1(ci[am],&cj)); 678 PetscCall(PetscFreeSpaceContiguous(&free_space,cj)); 679 680 /* put together the new symbolic matrix */ 681 PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C)); 682 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 683 684 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 685 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 686 c = (Mat_SeqAIJ*)(C->data); 687 c->free_a = PETSC_TRUE; 688 c->free_ij = PETSC_TRUE; 689 c->nonew = 0; 690 691 C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 692 693 /* set MatInfo */ 694 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 695 if (afill < 1.0) afill = 1.0; 696 C->info.mallocs = ndouble; 697 C->info.fill_ratio_given = fill; 698 C->info.fill_ratio_needed = afill; 699 700 #if defined(PETSC_USE_INFO) 701 if (ci[am]) { 702 PetscCall(PetscInfo(C,"Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill)); 703 PetscCall(PetscInfo(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill)); 704 } else { 705 PetscCall(PetscInfo(C,"Empty matrix product\n")); 706 } 707 #endif 708 PetscFunctionReturn(0); 709 } 710 711 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(Mat A,Mat B,PetscReal fill,Mat C) 712 { 713 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 714 const PetscInt *ai = a->i,*bi=b->i,*aj=a->j,*bj=b->j; 715 PetscInt *ci,*cj,*bb; 716 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 717 PetscReal afill; 718 PetscInt i,j,col,ndouble = 0; 719 PetscFreeSpaceList free_space=NULL,current_space=NULL; 720 PetscHeap h; 721 PetscBT bt; 722 723 PetscFunctionBegin; 724 /* Get ci and cj - using a heap for the sorted rows, but use BT so that each index is only added once */ 725 /*---------------------------------------------------------------------------------------------*/ 726 /* Allocate arrays for fill computation and free space for accumulating nonzero column */ 727 PetscCall(PetscMalloc1(am+2,&ci)); 728 ci[0] = 0; 729 730 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 731 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space)); 732 733 current_space = free_space; 734 735 PetscCall(PetscHeapCreate(a->rmax,&h)); 736 PetscCall(PetscMalloc1(a->rmax,&bb)); 737 PetscCall(PetscBTCreate(bn,&bt)); 738 739 /* Determine ci and cj */ 740 for (i=0; i<am; i++) { 741 const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 742 const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 743 const PetscInt *fptr = current_space->array; /* Save beginning of the row so we can clear the BT later */ 744 ci[i+1] = ci[i]; 745 /* Populate the min heap */ 746 for (j=0; j<anzi; j++) { 747 PetscInt brow = acol[j]; 748 for (bb[j] = bi[brow]; bb[j] < bi[brow+1]; bb[j]++) { 749 PetscInt bcol = bj[bb[j]]; 750 if (!PetscBTLookupSet(bt,bcol)) { /* new entry */ 751 PetscCall(PetscHeapAdd(h,j,bcol)); 752 bb[j]++; 753 break; 754 } 755 } 756 } 757 /* Pick off the min element, adding it to free space */ 758 PetscCall(PetscHeapPop(h,&j,&col)); 759 while (j >= 0) { 760 if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */ 761 fptr = NULL; /* need PetscBTMemzero */ 762 PetscCall(PetscFreeSpaceGet(PetscMin(PetscIntMultTruncate(2,current_space->total_array_size),16 << 20),¤t_space)); 763 ndouble++; 764 } 765 *(current_space->array++) = col; 766 current_space->local_used++; 767 current_space->local_remaining--; 768 ci[i+1]++; 769 770 /* stash if anything else remains in this row of B */ 771 for (; bb[j] < bi[acol[j]+1]; bb[j]++) { 772 PetscInt bcol = bj[bb[j]]; 773 if (!PetscBTLookupSet(bt,bcol)) { /* new entry */ 774 PetscCall(PetscHeapAdd(h,j,bcol)); 775 bb[j]++; 776 break; 777 } 778 } 779 PetscCall(PetscHeapPop(h,&j,&col)); 780 } 781 if (fptr) { /* Clear the bits for this row */ 782 for (; fptr<current_space->array; fptr++) PetscCall(PetscBTClear(bt,*fptr)); 783 } else { /* We reallocated so we don't remember (easily) how to clear only the bits we changed */ 784 PetscCall(PetscBTMemzero(bn,bt)); 785 } 786 } 787 PetscCall(PetscFree(bb)); 788 PetscCall(PetscHeapDestroy(&h)); 789 PetscCall(PetscBTDestroy(&bt)); 790 791 /* Column indices are in the list of free space */ 792 /* Allocate space for cj, initialize cj, and */ 793 /* destroy list of free space and other temporary array(s) */ 794 PetscCall(PetscMalloc1(ci[am],&cj)); 795 PetscCall(PetscFreeSpaceContiguous(&free_space,cj)); 796 797 /* put together the new symbolic matrix */ 798 PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C)); 799 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 800 801 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 802 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 803 c = (Mat_SeqAIJ*)(C->data); 804 c->free_a = PETSC_TRUE; 805 c->free_ij = PETSC_TRUE; 806 c->nonew = 0; 807 808 C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 809 810 /* set MatInfo */ 811 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 812 if (afill < 1.0) afill = 1.0; 813 C->info.mallocs = ndouble; 814 C->info.fill_ratio_given = fill; 815 C->info.fill_ratio_needed = afill; 816 817 #if defined(PETSC_USE_INFO) 818 if (ci[am]) { 819 PetscCall(PetscInfo(C,"Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill)); 820 PetscCall(PetscInfo(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill)); 821 } else { 822 PetscCall(PetscInfo(C,"Empty matrix product\n")); 823 } 824 #endif 825 PetscFunctionReturn(0); 826 } 827 828 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_RowMerge(Mat A,Mat B,PetscReal fill,Mat C) 829 { 830 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 831 const PetscInt *ai=a->i,*bi=b->i,*aj=a->j,*bj=b->j,*inputi,*inputj,*inputcol,*inputcol_L1; 832 PetscInt *ci,*cj,*outputj,worki_L1[9],worki_L2[9]; 833 PetscInt c_maxmem,a_maxrownnz=0,a_rownnz; 834 const PetscInt workcol[8]={0,1,2,3,4,5,6,7}; 835 const PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 836 const PetscInt *brow_ptr[8],*brow_end[8]; 837 PetscInt window[8]; 838 PetscInt window_min,old_window_min,ci_nnz,outputi_nnz=0,L1_nrows,L2_nrows; 839 PetscInt i,k,ndouble=0,L1_rowsleft,rowsleft; 840 PetscReal afill; 841 PetscInt *workj_L1,*workj_L2,*workj_L3; 842 PetscInt L1_nnz,L2_nnz; 843 844 /* Step 1: Get upper bound on memory required for allocation. 845 Because of the way virtual memory works, 846 only the memory pages that are actually needed will be physically allocated. */ 847 PetscFunctionBegin; 848 PetscCall(PetscMalloc1(am+1,&ci)); 849 for (i=0; i<am; i++) { 850 const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 851 const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 852 a_rownnz = 0; 853 for (k=0; k<anzi; ++k) { 854 a_rownnz += bi[acol[k]+1] - bi[acol[k]]; 855 if (a_rownnz > bn) { 856 a_rownnz = bn; 857 break; 858 } 859 } 860 a_maxrownnz = PetscMax(a_maxrownnz, a_rownnz); 861 } 862 /* temporary work areas for merging rows */ 863 PetscCall(PetscMalloc1(a_maxrownnz*8,&workj_L1)); 864 PetscCall(PetscMalloc1(a_maxrownnz*8,&workj_L2)); 865 PetscCall(PetscMalloc1(a_maxrownnz,&workj_L3)); 866 867 /* This should be enough for almost all matrices. If not, memory is reallocated later. */ 868 c_maxmem = 8*(ai[am]+bi[bm]); 869 /* Step 2: Populate pattern for C */ 870 PetscCall(PetscMalloc1(c_maxmem,&cj)); 871 872 ci_nnz = 0; 873 ci[0] = 0; 874 worki_L1[0] = 0; 875 worki_L2[0] = 0; 876 for (i=0; i<am; i++) { 877 const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 878 const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 879 rowsleft = anzi; 880 inputcol_L1 = acol; 881 L2_nnz = 0; 882 L2_nrows = 1; /* Number of rows to be merged on Level 3. output of L3 already exists -> initial value 1 */ 883 worki_L2[1] = 0; 884 outputi_nnz = 0; 885 886 /* If the number of indices in C so far + the max number of columns in the next row > c_maxmem -> allocate more memory */ 887 while (ci_nnz+a_maxrownnz > c_maxmem) { 888 c_maxmem *= 2; 889 ndouble++; 890 PetscCall(PetscRealloc(sizeof(PetscInt)*c_maxmem,&cj)); 891 } 892 893 while (rowsleft) { 894 L1_rowsleft = PetscMin(64, rowsleft); /* In the inner loop max 64 rows of B can be merged */ 895 L1_nrows = 0; 896 L1_nnz = 0; 897 inputcol = inputcol_L1; 898 inputi = bi; 899 inputj = bj; 900 901 /* The following macro is used to specialize for small rows in A. 902 This helps with compiler unrolling, improving performance substantially. 903 Input: inputj inputi inputcol bn 904 Output: outputj outputi_nnz */ 905 #define MatMatMultSymbolic_RowMergeMacro(ANNZ) \ 906 window_min = bn; \ 907 outputi_nnz = 0; \ 908 for (k=0; k<ANNZ; ++k) { \ 909 brow_ptr[k] = inputj + inputi[inputcol[k]]; \ 910 brow_end[k] = inputj + inputi[inputcol[k]+1]; \ 911 window[k] = (brow_ptr[k] != brow_end[k]) ? *brow_ptr[k] : bn; \ 912 window_min = PetscMin(window[k], window_min); \ 913 } \ 914 while (window_min < bn) { \ 915 outputj[outputi_nnz++] = window_min; \ 916 /* advance front and compute new minimum */ \ 917 old_window_min = window_min; \ 918 window_min = bn; \ 919 for (k=0; k<ANNZ; ++k) { \ 920 if (window[k] == old_window_min) { \ 921 brow_ptr[k]++; \ 922 window[k] = (brow_ptr[k] != brow_end[k]) ? *brow_ptr[k] : bn; \ 923 } \ 924 window_min = PetscMin(window[k], window_min); \ 925 } \ 926 } 927 928 /************** L E V E L 1 ***************/ 929 /* Merge up to 8 rows of B to L1 work array*/ 930 while (L1_rowsleft) { 931 outputi_nnz = 0; 932 if (anzi > 8) outputj = workj_L1 + L1_nnz; /* Level 1 rowmerge*/ 933 else outputj = cj + ci_nnz; /* Merge directly to C */ 934 935 switch (L1_rowsleft) { 936 case 1: brow_ptr[0] = inputj + inputi[inputcol[0]]; 937 brow_end[0] = inputj + inputi[inputcol[0]+1]; 938 for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */ 939 inputcol += L1_rowsleft; 940 rowsleft -= L1_rowsleft; 941 L1_rowsleft = 0; 942 break; 943 case 2: MatMatMultSymbolic_RowMergeMacro(2); 944 inputcol += L1_rowsleft; 945 rowsleft -= L1_rowsleft; 946 L1_rowsleft = 0; 947 break; 948 case 3: MatMatMultSymbolic_RowMergeMacro(3); 949 inputcol += L1_rowsleft; 950 rowsleft -= L1_rowsleft; 951 L1_rowsleft = 0; 952 break; 953 case 4: MatMatMultSymbolic_RowMergeMacro(4); 954 inputcol += L1_rowsleft; 955 rowsleft -= L1_rowsleft; 956 L1_rowsleft = 0; 957 break; 958 case 5: MatMatMultSymbolic_RowMergeMacro(5); 959 inputcol += L1_rowsleft; 960 rowsleft -= L1_rowsleft; 961 L1_rowsleft = 0; 962 break; 963 case 6: MatMatMultSymbolic_RowMergeMacro(6); 964 inputcol += L1_rowsleft; 965 rowsleft -= L1_rowsleft; 966 L1_rowsleft = 0; 967 break; 968 case 7: MatMatMultSymbolic_RowMergeMacro(7); 969 inputcol += L1_rowsleft; 970 rowsleft -= L1_rowsleft; 971 L1_rowsleft = 0; 972 break; 973 default: MatMatMultSymbolic_RowMergeMacro(8); 974 inputcol += 8; 975 rowsleft -= 8; 976 L1_rowsleft -= 8; 977 break; 978 } 979 inputcol_L1 = inputcol; 980 L1_nnz += outputi_nnz; 981 worki_L1[++L1_nrows] = L1_nnz; 982 } 983 984 /********************** L E V E L 2 ************************/ 985 /* Merge from L1 work array to either C or to L2 work array */ 986 if (anzi > 8) { 987 inputi = worki_L1; 988 inputj = workj_L1; 989 inputcol = workcol; 990 outputi_nnz = 0; 991 992 if (anzi <= 64) outputj = cj + ci_nnz; /* Merge from L1 work array to C */ 993 else outputj = workj_L2 + L2_nnz; /* Merge from L1 work array to L2 work array */ 994 995 switch (L1_nrows) { 996 case 1: brow_ptr[0] = inputj + inputi[inputcol[0]]; 997 brow_end[0] = inputj + inputi[inputcol[0]+1]; 998 for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */ 999 break; 1000 case 2: MatMatMultSymbolic_RowMergeMacro(2); break; 1001 case 3: MatMatMultSymbolic_RowMergeMacro(3); break; 1002 case 4: MatMatMultSymbolic_RowMergeMacro(4); break; 1003 case 5: MatMatMultSymbolic_RowMergeMacro(5); break; 1004 case 6: MatMatMultSymbolic_RowMergeMacro(6); break; 1005 case 7: MatMatMultSymbolic_RowMergeMacro(7); break; 1006 case 8: MatMatMultSymbolic_RowMergeMacro(8); break; 1007 default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatMult logic error: Not merging 1-8 rows from L1 work array!"); 1008 } 1009 L2_nnz += outputi_nnz; 1010 worki_L2[++L2_nrows] = L2_nnz; 1011 1012 /************************ L E V E L 3 **********************/ 1013 /* Merge from L2 work array to either C or to L2 work array */ 1014 if (anzi > 64 && (L2_nrows == 8 || rowsleft == 0)) { 1015 inputi = worki_L2; 1016 inputj = workj_L2; 1017 inputcol = workcol; 1018 outputi_nnz = 0; 1019 if (rowsleft) outputj = workj_L3; 1020 else outputj = cj + ci_nnz; 1021 switch (L2_nrows) { 1022 case 1: brow_ptr[0] = inputj + inputi[inputcol[0]]; 1023 brow_end[0] = inputj + inputi[inputcol[0]+1]; 1024 for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */ 1025 break; 1026 case 2: MatMatMultSymbolic_RowMergeMacro(2); break; 1027 case 3: MatMatMultSymbolic_RowMergeMacro(3); break; 1028 case 4: MatMatMultSymbolic_RowMergeMacro(4); break; 1029 case 5: MatMatMultSymbolic_RowMergeMacro(5); break; 1030 case 6: MatMatMultSymbolic_RowMergeMacro(6); break; 1031 case 7: MatMatMultSymbolic_RowMergeMacro(7); break; 1032 case 8: MatMatMultSymbolic_RowMergeMacro(8); break; 1033 default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatMult logic error: Not merging 1-8 rows from L2 work array!"); 1034 } 1035 L2_nrows = 1; 1036 L2_nnz = outputi_nnz; 1037 worki_L2[1] = outputi_nnz; 1038 /* Copy to workj_L2 */ 1039 if (rowsleft) { 1040 for (k=0; k<outputi_nnz; ++k) workj_L2[k] = outputj[k]; 1041 } 1042 } 1043 } 1044 } /* while (rowsleft) */ 1045 #undef MatMatMultSymbolic_RowMergeMacro 1046 1047 /* terminate current row */ 1048 ci_nnz += outputi_nnz; 1049 ci[i+1] = ci_nnz; 1050 } 1051 1052 /* Step 3: Create the new symbolic matrix */ 1053 PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C)); 1054 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 1055 1056 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1057 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 1058 c = (Mat_SeqAIJ*)(C->data); 1059 c->free_a = PETSC_TRUE; 1060 c->free_ij = PETSC_TRUE; 1061 c->nonew = 0; 1062 1063 C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 1064 1065 /* set MatInfo */ 1066 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 1067 if (afill < 1.0) afill = 1.0; 1068 C->info.mallocs = ndouble; 1069 C->info.fill_ratio_given = fill; 1070 C->info.fill_ratio_needed = afill; 1071 1072 #if defined(PETSC_USE_INFO) 1073 if (ci[am]) { 1074 PetscCall(PetscInfo(C,"Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill)); 1075 PetscCall(PetscInfo(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill)); 1076 } else { 1077 PetscCall(PetscInfo(C,"Empty matrix product\n")); 1078 } 1079 #endif 1080 1081 /* Step 4: Free temporary work areas */ 1082 PetscCall(PetscFree(workj_L1)); 1083 PetscCall(PetscFree(workj_L2)); 1084 PetscCall(PetscFree(workj_L3)); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 /* concatenate unique entries and then sort */ 1089 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Sorted(Mat A,Mat B,PetscReal fill,Mat C) 1090 { 1091 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 1092 const PetscInt *ai = a->i,*bi=b->i,*aj=a->j,*bj=b->j; 1093 PetscInt *ci,*cj,bcol; 1094 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N; 1095 PetscReal afill; 1096 PetscInt i,j,ndouble = 0; 1097 PetscSegBuffer seg,segrow; 1098 char *seen; 1099 1100 PetscFunctionBegin; 1101 PetscCall(PetscMalloc1(am+1,&ci)); 1102 ci[0] = 0; 1103 1104 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 1105 PetscCall(PetscSegBufferCreate(sizeof(PetscInt),(PetscInt)(fill*(ai[am]+bi[bm])),&seg)); 1106 PetscCall(PetscSegBufferCreate(sizeof(PetscInt),100,&segrow)); 1107 PetscCall(PetscCalloc1(bn,&seen)); 1108 1109 /* Determine ci and cj */ 1110 for (i=0; i<am; i++) { 1111 const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */ 1112 const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */ 1113 PetscInt packlen = 0,*PETSC_RESTRICT crow; 1114 1115 /* Pack segrow */ 1116 for (j=0; j<anzi; j++) { 1117 PetscInt brow = acol[j],bjstart = bi[brow],bjend = bi[brow+1],k; 1118 for (k=bjstart; k<bjend; k++) { 1119 bcol = bj[k]; 1120 if (!seen[bcol]) { /* new entry */ 1121 PetscInt *PETSC_RESTRICT slot; 1122 PetscCall(PetscSegBufferGetInts(segrow,1,&slot)); 1123 *slot = bcol; 1124 seen[bcol] = 1; 1125 packlen++; 1126 } 1127 } 1128 } 1129 1130 /* Check i-th diagonal entry */ 1131 if (C->force_diagonals && !seen[i]) { 1132 PetscInt *PETSC_RESTRICT slot; 1133 PetscCall(PetscSegBufferGetInts(segrow,1,&slot)); 1134 *slot = i; 1135 seen[i] = 1; 1136 packlen++; 1137 } 1138 1139 PetscCall(PetscSegBufferGetInts(seg,packlen,&crow)); 1140 PetscCall(PetscSegBufferExtractTo(segrow,crow)); 1141 PetscCall(PetscSortInt(packlen,crow)); 1142 ci[i+1] = ci[i] + packlen; 1143 for (j=0; j<packlen; j++) seen[crow[j]] = 0; 1144 } 1145 PetscCall(PetscSegBufferDestroy(&segrow)); 1146 PetscCall(PetscFree(seen)); 1147 1148 /* Column indices are in the segmented buffer */ 1149 PetscCall(PetscSegBufferExtractAlloc(seg,&cj)); 1150 PetscCall(PetscSegBufferDestroy(&seg)); 1151 1152 /* put together the new symbolic matrix */ 1153 PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C)); 1154 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 1155 1156 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1157 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 1158 c = (Mat_SeqAIJ*)(C->data); 1159 c->free_a = PETSC_TRUE; 1160 c->free_ij = PETSC_TRUE; 1161 c->nonew = 0; 1162 1163 C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted; 1164 1165 /* set MatInfo */ 1166 afill = (PetscReal)ci[am]/PetscMax(ai[am]+bi[bm],1) + 1.e-5; 1167 if (afill < 1.0) afill = 1.0; 1168 C->info.mallocs = ndouble; 1169 C->info.fill_ratio_given = fill; 1170 C->info.fill_ratio_needed = afill; 1171 1172 #if defined(PETSC_USE_INFO) 1173 if (ci[am]) { 1174 PetscCall(PetscInfo(C,"Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill)); 1175 PetscCall(PetscInfo(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill)); 1176 } else { 1177 PetscCall(PetscInfo(C,"Empty matrix product\n")); 1178 } 1179 #endif 1180 PetscFunctionReturn(0); 1181 } 1182 1183 PetscErrorCode MatDestroy_SeqAIJ_MatMatMultTrans(void *data) 1184 { 1185 Mat_MatMatTransMult *abt=(Mat_MatMatTransMult *)data; 1186 1187 PetscFunctionBegin; 1188 PetscCall(MatTransposeColoringDestroy(&abt->matcoloring)); 1189 PetscCall(MatDestroy(&abt->Bt_den)); 1190 PetscCall(MatDestroy(&abt->ABt_den)); 1191 PetscCall(PetscFree(abt)); 1192 PetscFunctionReturn(0); 1193 } 1194 1195 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C) 1196 { 1197 Mat Bt; 1198 Mat_MatMatTransMult *abt; 1199 Mat_Product *product = C->product; 1200 char *alg; 1201 1202 PetscFunctionBegin; 1203 PetscCheck(product,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing product struct"); 1204 PetscCheck(!product->data,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Extra product struct not empty"); 1205 1206 /* create symbolic Bt */ 1207 PetscCall(MatTransposeSymbolic(B,&Bt)); 1208 PetscCall(MatSetBlockSizes(Bt,PetscAbs(A->cmap->bs),PetscAbs(B->cmap->bs))); 1209 PetscCall(MatSetType(Bt,((PetscObject)A)->type_name)); 1210 1211 /* get symbolic C=A*Bt */ 1212 PetscCall(PetscStrallocpy(product->alg,&alg)); 1213 PetscCall(MatProductSetAlgorithm(C,"sorted")); /* set algorithm for C = A*Bt */ 1214 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,Bt,fill,C)); 1215 PetscCall(MatProductSetAlgorithm(C,alg)); /* resume original algorithm for ABt product */ 1216 PetscCall(PetscFree(alg)); 1217 1218 /* create a supporting struct for reuse intermediate dense matrices with matcoloring */ 1219 PetscCall(PetscNew(&abt)); 1220 1221 product->data = abt; 1222 product->destroy = MatDestroy_SeqAIJ_MatMatMultTrans; 1223 1224 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ; 1225 1226 abt->usecoloring = PETSC_FALSE; 1227 PetscCall(PetscStrcmp(product->alg,"color",&abt->usecoloring)); 1228 if (abt->usecoloring) { 1229 /* Create MatTransposeColoring from symbolic C=A*B^T */ 1230 MatTransposeColoring matcoloring; 1231 MatColoring coloring; 1232 ISColoring iscoloring; 1233 Mat Bt_dense,C_dense; 1234 1235 /* inode causes memory problem */ 1236 PetscCall(MatSetOption(C,MAT_USE_INODES,PETSC_FALSE)); 1237 1238 PetscCall(MatColoringCreate(C,&coloring)); 1239 PetscCall(MatColoringSetDistance(coloring,2)); 1240 PetscCall(MatColoringSetType(coloring,MATCOLORINGSL)); 1241 PetscCall(MatColoringSetFromOptions(coloring)); 1242 PetscCall(MatColoringApply(coloring,&iscoloring)); 1243 PetscCall(MatColoringDestroy(&coloring)); 1244 PetscCall(MatTransposeColoringCreate(C,iscoloring,&matcoloring)); 1245 1246 abt->matcoloring = matcoloring; 1247 1248 PetscCall(ISColoringDestroy(&iscoloring)); 1249 1250 /* Create Bt_dense and C_dense = A*Bt_dense */ 1251 PetscCall(MatCreate(PETSC_COMM_SELF,&Bt_dense)); 1252 PetscCall(MatSetSizes(Bt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors)); 1253 PetscCall(MatSetType(Bt_dense,MATSEQDENSE)); 1254 PetscCall(MatSeqDenseSetPreallocation(Bt_dense,NULL)); 1255 1256 Bt_dense->assembled = PETSC_TRUE; 1257 abt->Bt_den = Bt_dense; 1258 1259 PetscCall(MatCreate(PETSC_COMM_SELF,&C_dense)); 1260 PetscCall(MatSetSizes(C_dense,A->rmap->n,matcoloring->ncolors,A->rmap->n,matcoloring->ncolors)); 1261 PetscCall(MatSetType(C_dense,MATSEQDENSE)); 1262 PetscCall(MatSeqDenseSetPreallocation(C_dense,NULL)); 1263 1264 Bt_dense->assembled = PETSC_TRUE; 1265 abt->ABt_den = C_dense; 1266 1267 #if defined(PETSC_USE_INFO) 1268 { 1269 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 1270 PetscCall(PetscInfo(C,"Use coloring of C=A*B^T; B^T: %" PetscInt_FMT " %" PetscInt_FMT ", Bt_dense: %" PetscInt_FMT ",%" PetscInt_FMT "; Cnz %" PetscInt_FMT " / (cm*ncolors %" PetscInt_FMT ") = %g\n",B->cmap->n,B->rmap->n,Bt_dense->rmap->n,Bt_dense->cmap->n,c->nz,A->rmap->n*matcoloring->ncolors,(double)(((PetscReal)(c->nz))/((PetscReal)(A->rmap->n*matcoloring->ncolors))))); 1271 } 1272 #endif 1273 } 1274 /* clean up */ 1275 PetscCall(MatDestroy(&Bt)); 1276 PetscFunctionReturn(0); 1277 } 1278 1279 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 1280 { 1281 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 1282 PetscInt *ai =a->i,*aj=a->j,*bi=b->i,*bj=b->j,anzi,bnzj,nexta,nextb,*acol,*bcol,brow; 1283 PetscInt cm =C->rmap->n,*ci=c->i,*cj=c->j,i,j,cnzi,*ccol; 1284 PetscLogDouble flops=0.0; 1285 MatScalar *aa =a->a,*aval,*ba=b->a,*bval,*ca,*cval; 1286 Mat_MatMatTransMult *abt; 1287 Mat_Product *product = C->product; 1288 1289 PetscFunctionBegin; 1290 PetscCheck(product,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing product struct"); 1291 abt = (Mat_MatMatTransMult *)product->data; 1292 PetscCheck(abt,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing product struct"); 1293 /* clear old values in C */ 1294 if (!c->a) { 1295 PetscCall(PetscCalloc1(ci[cm]+1,&ca)); 1296 c->a = ca; 1297 c->free_a = PETSC_TRUE; 1298 } else { 1299 ca = c->a; 1300 PetscCall(PetscArrayzero(ca,ci[cm]+1)); 1301 } 1302 1303 if (abt->usecoloring) { 1304 MatTransposeColoring matcoloring = abt->matcoloring; 1305 Mat Bt_dense,C_dense = abt->ABt_den; 1306 1307 /* Get Bt_dense by Apply MatTransposeColoring to B */ 1308 Bt_dense = abt->Bt_den; 1309 PetscCall(MatTransColoringApplySpToDen(matcoloring,B,Bt_dense)); 1310 1311 /* C_dense = A*Bt_dense */ 1312 PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(A,Bt_dense,C_dense)); 1313 1314 /* Recover C from C_dense */ 1315 PetscCall(MatTransColoringApplyDenToSp(matcoloring,C_dense,C)); 1316 PetscFunctionReturn(0); 1317 } 1318 1319 for (i=0; i<cm; i++) { 1320 anzi = ai[i+1] - ai[i]; 1321 acol = aj + ai[i]; 1322 aval = aa + ai[i]; 1323 cnzi = ci[i+1] - ci[i]; 1324 ccol = cj + ci[i]; 1325 cval = ca + ci[i]; 1326 for (j=0; j<cnzi; j++) { 1327 brow = ccol[j]; 1328 bnzj = bi[brow+1] - bi[brow]; 1329 bcol = bj + bi[brow]; 1330 bval = ba + bi[brow]; 1331 1332 /* perform sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */ 1333 nexta = 0; nextb = 0; 1334 while (nexta<anzi && nextb<bnzj) { 1335 while (nexta < anzi && acol[nexta] < bcol[nextb]) nexta++; 1336 if (nexta == anzi) break; 1337 while (nextb < bnzj && acol[nexta] > bcol[nextb]) nextb++; 1338 if (nextb == bnzj) break; 1339 if (acol[nexta] == bcol[nextb]) { 1340 cval[j] += aval[nexta]*bval[nextb]; 1341 nexta++; nextb++; 1342 flops += 2; 1343 } 1344 } 1345 } 1346 } 1347 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 1348 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 1349 PetscCall(PetscLogFlops(flops)); 1350 PetscFunctionReturn(0); 1351 } 1352 1353 PetscErrorCode MatDestroy_SeqAIJ_MatTransMatMult(void *data) 1354 { 1355 Mat_MatTransMatMult *atb = (Mat_MatTransMatMult*)data; 1356 1357 PetscFunctionBegin; 1358 PetscCall(MatDestroy(&atb->At)); 1359 if (atb->destroy) PetscCall((*atb->destroy)(atb->data)); 1360 PetscCall(PetscFree(atb)); 1361 PetscFunctionReturn(0); 1362 } 1363 1364 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C) 1365 { 1366 Mat At = NULL; 1367 Mat_Product *product = C->product; 1368 PetscBool flg,def,square; 1369 1370 PetscFunctionBegin; 1371 MatCheckProduct(C,4); 1372 square = (PetscBool)(A == B && A->symmetric == PETSC_BOOL3_TRUE); 1373 /* outerproduct */ 1374 PetscCall(PetscStrcmp(product->alg,"outerproduct",&flg)); 1375 if (flg) { 1376 /* create symbolic At */ 1377 if (!square) { 1378 PetscCall(MatTransposeSymbolic(A,&At)); 1379 PetscCall(MatSetBlockSizes(At,PetscAbs(A->cmap->bs),PetscAbs(B->cmap->bs))); 1380 PetscCall(MatSetType(At,((PetscObject)A)->type_name)); 1381 } 1382 /* get symbolic C=At*B */ 1383 PetscCall(MatProductSetAlgorithm(C,"sorted")); 1384 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(square ? A : At,B,fill,C)); 1385 1386 /* clean up */ 1387 if (!square) { 1388 PetscCall(MatDestroy(&At)); 1389 } 1390 1391 C->ops->mattransposemultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; /* outerproduct */ 1392 PetscCall(MatProductSetAlgorithm(C,"outerproduct")); 1393 PetscFunctionReturn(0); 1394 } 1395 1396 /* matmatmult */ 1397 PetscCall(PetscStrcmp(product->alg,"default",&def)); 1398 PetscCall(PetscStrcmp(product->alg,"at*b",&flg)); 1399 if (flg || def) { 1400 Mat_MatTransMatMult *atb; 1401 1402 PetscCheck(!product->data,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Extra product struct not empty"); 1403 PetscCall(PetscNew(&atb)); 1404 if (!square) { 1405 PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&At)); 1406 } 1407 PetscCall(MatProductSetAlgorithm(C,"sorted")); 1408 PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(square ? A : At,B,fill,C)); 1409 PetscCall(MatProductSetAlgorithm(C,"at*b")); 1410 product->data = atb; 1411 product->destroy = MatDestroy_SeqAIJ_MatTransMatMult; 1412 atb->At = At; 1413 1414 C->ops->mattransposemultnumeric = NULL; /* see MatProductNumeric_AtB_SeqAIJ_SeqAIJ */ 1415 PetscFunctionReturn(0); 1416 } 1417 1418 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product Algorithm is not supported"); 1419 } 1420 1421 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 1422 { 1423 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 1424 PetscInt am=A->rmap->n,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb; 1425 PetscInt cm=C->rmap->n,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k; 1426 PetscLogDouble flops=0.0; 1427 MatScalar *aa=a->a,*ba,*ca,*caj; 1428 1429 PetscFunctionBegin; 1430 if (!c->a) { 1431 PetscCall(PetscCalloc1(ci[cm]+1,&ca)); 1432 1433 c->a = ca; 1434 c->free_a = PETSC_TRUE; 1435 } else { 1436 ca = c->a; 1437 PetscCall(PetscArrayzero(ca,ci[cm])); 1438 } 1439 1440 /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */ 1441 for (i=0; i<am; i++) { 1442 bj = b->j + bi[i]; 1443 ba = b->a + bi[i]; 1444 bnzi = bi[i+1] - bi[i]; 1445 anzi = ai[i+1] - ai[i]; 1446 for (j=0; j<anzi; j++) { 1447 nextb = 0; 1448 crow = *aj++; 1449 cjj = cj + ci[crow]; 1450 caj = ca + ci[crow]; 1451 /* perform sparse axpy operation. Note cjj includes bj. */ 1452 for (k=0; nextb<bnzi; k++) { 1453 if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */ 1454 caj[k] += (*aa)*(*(ba+nextb)); 1455 nextb++; 1456 } 1457 } 1458 flops += 2*bnzi; 1459 aa++; 1460 } 1461 } 1462 1463 /* Assemble the final matrix and clean up */ 1464 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 1465 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 1466 PetscCall(PetscLogFlops(flops)); 1467 PetscFunctionReturn(0); 1468 } 1469 1470 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqDense(Mat A,Mat B,PetscReal fill,Mat C) 1471 { 1472 PetscFunctionBegin; 1473 PetscCall(MatMatMultSymbolic_SeqDense_SeqDense(A,B,0.0,C)); 1474 C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqDense; 1475 PetscFunctionReturn(0); 1476 } 1477 1478 PETSC_INTERN PetscErrorCode MatMatMultNumericAdd_SeqAIJ_SeqDense(Mat A,Mat B,Mat C,const PetscBool add) 1479 { 1480 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1481 PetscScalar *c,r1,r2,r3,r4,*c1,*c2,*c3,*c4; 1482 const PetscScalar *aa,*b,*b1,*b2,*b3,*b4,*av; 1483 const PetscInt *aj; 1484 PetscInt cm=C->rmap->n,cn=B->cmap->n,bm,am=A->rmap->n; 1485 PetscInt clda; 1486 PetscInt am4,bm4,col,i,j,n; 1487 1488 PetscFunctionBegin; 1489 if (!cm || !cn) PetscFunctionReturn(0); 1490 PetscCall(MatSeqAIJGetArrayRead(A,&av)); 1491 if (add) { 1492 PetscCall(MatDenseGetArray(C,&c)); 1493 } else { 1494 PetscCall(MatDenseGetArrayWrite(C,&c)); 1495 } 1496 PetscCall(MatDenseGetArrayRead(B,&b)); 1497 PetscCall(MatDenseGetLDA(B,&bm)); 1498 PetscCall(MatDenseGetLDA(C,&clda)); 1499 am4 = 4*clda; 1500 bm4 = 4*bm; 1501 b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 1502 c1 = c; c2 = c1 + clda; c3 = c2 + clda; c4 = c3 + clda; 1503 for (col=0; col<(cn/4)*4; col += 4) { /* over columns of C */ 1504 for (i=0; i<am; i++) { /* over rows of A in those columns */ 1505 r1 = r2 = r3 = r4 = 0.0; 1506 n = a->i[i+1] - a->i[i]; 1507 aj = a->j + a->i[i]; 1508 aa = av + a->i[i]; 1509 for (j=0; j<n; j++) { 1510 const PetscScalar aatmp = aa[j]; 1511 const PetscInt ajtmp = aj[j]; 1512 r1 += aatmp*b1[ajtmp]; 1513 r2 += aatmp*b2[ajtmp]; 1514 r3 += aatmp*b3[ajtmp]; 1515 r4 += aatmp*b4[ajtmp]; 1516 } 1517 if (add) { 1518 c1[i] += r1; 1519 c2[i] += r2; 1520 c3[i] += r3; 1521 c4[i] += r4; 1522 } else { 1523 c1[i] = r1; 1524 c2[i] = r2; 1525 c3[i] = r3; 1526 c4[i] = r4; 1527 } 1528 } 1529 b1 += bm4; b2 += bm4; b3 += bm4; b4 += bm4; 1530 c1 += am4; c2 += am4; c3 += am4; c4 += am4; 1531 } 1532 /* process remaining columns */ 1533 if (col != cn) { 1534 PetscInt rc = cn-col; 1535 1536 if (rc == 1) { 1537 for (i=0; i<am; i++) { 1538 r1 = 0.0; 1539 n = a->i[i+1] - a->i[i]; 1540 aj = a->j + a->i[i]; 1541 aa = av + a->i[i]; 1542 for (j=0; j<n; j++) r1 += aa[j]*b1[aj[j]]; 1543 if (add) c1[i] += r1; 1544 else c1[i] = r1; 1545 } 1546 } else if (rc == 2) { 1547 for (i=0; i<am; i++) { 1548 r1 = r2 = 0.0; 1549 n = a->i[i+1] - a->i[i]; 1550 aj = a->j + a->i[i]; 1551 aa = av + a->i[i]; 1552 for (j=0; j<n; j++) { 1553 const PetscScalar aatmp = aa[j]; 1554 const PetscInt ajtmp = aj[j]; 1555 r1 += aatmp*b1[ajtmp]; 1556 r2 += aatmp*b2[ajtmp]; 1557 } 1558 if (add) { 1559 c1[i] += r1; 1560 c2[i] += r2; 1561 } else { 1562 c1[i] = r1; 1563 c2[i] = r2; 1564 } 1565 } 1566 } else { 1567 for (i=0; i<am; i++) { 1568 r1 = r2 = r3 = 0.0; 1569 n = a->i[i+1] - a->i[i]; 1570 aj = a->j + a->i[i]; 1571 aa = av + a->i[i]; 1572 for (j=0; j<n; j++) { 1573 const PetscScalar aatmp = aa[j]; 1574 const PetscInt ajtmp = aj[j]; 1575 r1 += aatmp*b1[ajtmp]; 1576 r2 += aatmp*b2[ajtmp]; 1577 r3 += aatmp*b3[ajtmp]; 1578 } 1579 if (add) { 1580 c1[i] += r1; 1581 c2[i] += r2; 1582 c3[i] += r3; 1583 } else { 1584 c1[i] = r1; 1585 c2[i] = r2; 1586 c3[i] = r3; 1587 } 1588 } 1589 } 1590 } 1591 PetscCall(PetscLogFlops(cn*(2.0*a->nz))); 1592 if (add) { 1593 PetscCall(MatDenseRestoreArray(C,&c)); 1594 } else { 1595 PetscCall(MatDenseRestoreArrayWrite(C,&c)); 1596 } 1597 PetscCall(MatDenseRestoreArrayRead(B,&b)); 1598 PetscCall(MatSeqAIJRestoreArrayRead(A,&av)); 1599 PetscFunctionReturn(0); 1600 } 1601 1602 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqDense(Mat A,Mat B,Mat C) 1603 { 1604 PetscFunctionBegin; 1605 PetscCheck(B->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %" PetscInt_FMT " not equal rows in B %" PetscInt_FMT,A->cmap->n,B->rmap->n); 1606 PetscCheck(A->rmap->n == C->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in C %" PetscInt_FMT " not equal rows in A %" PetscInt_FMT,C->rmap->n,A->rmap->n); 1607 PetscCheck(B->cmap->n == C->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in B %" PetscInt_FMT " not equal columns in C %" PetscInt_FMT,B->cmap->n,C->cmap->n); 1608 1609 PetscCall(MatMatMultNumericAdd_SeqAIJ_SeqDense(A,B,C,PETSC_FALSE)); 1610 PetscFunctionReturn(0); 1611 } 1612 1613 /* ------------------------------------------------------- */ 1614 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_AB(Mat C) 1615 { 1616 PetscFunctionBegin; 1617 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqDense; 1618 C->ops->productsymbolic = MatProductSymbolic_AB; 1619 PetscFunctionReturn(0); 1620 } 1621 1622 PETSC_INTERN PetscErrorCode MatTMatTMultSymbolic_SeqAIJ_SeqDense(Mat,Mat,PetscReal,Mat); 1623 1624 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_AtB(Mat C) 1625 { 1626 PetscFunctionBegin; 1627 C->ops->transposematmultsymbolic = MatTMatTMultSymbolic_SeqAIJ_SeqDense; 1628 C->ops->productsymbolic = MatProductSymbolic_AtB; 1629 PetscFunctionReturn(0); 1630 } 1631 1632 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_ABt(Mat C) 1633 { 1634 PetscFunctionBegin; 1635 C->ops->mattransposemultsymbolic = MatTMatTMultSymbolic_SeqAIJ_SeqDense; 1636 C->ops->productsymbolic = MatProductSymbolic_ABt; 1637 PetscFunctionReturn(0); 1638 } 1639 1640 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat C) 1641 { 1642 Mat_Product *product = C->product; 1643 1644 PetscFunctionBegin; 1645 switch (product->type) { 1646 case MATPRODUCT_AB: 1647 PetscCall(MatProductSetFromOptions_SeqAIJ_SeqDense_AB(C)); 1648 break; 1649 case MATPRODUCT_AtB: 1650 PetscCall(MatProductSetFromOptions_SeqAIJ_SeqDense_AtB(C)); 1651 break; 1652 case MATPRODUCT_ABt: 1653 PetscCall(MatProductSetFromOptions_SeqAIJ_SeqDense_ABt(C)); 1654 break; 1655 default: 1656 break; 1657 } 1658 PetscFunctionReturn(0); 1659 } 1660 /* ------------------------------------------------------- */ 1661 static PetscErrorCode MatProductSetFromOptions_SeqXBAIJ_SeqDense_AB(Mat C) 1662 { 1663 Mat_Product *product = C->product; 1664 Mat A = product->A; 1665 PetscBool baij; 1666 1667 PetscFunctionBegin; 1668 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&baij)); 1669 if (!baij) { /* A is seqsbaij */ 1670 PetscBool sbaij; 1671 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&sbaij)); 1672 PetscCheck(sbaij,PetscObjectComm((PetscObject)C),PETSC_ERR_ARG_WRONGSTATE,"Mat must be either seqbaij or seqsbaij format"); 1673 1674 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqSBAIJ_SeqDense; 1675 } else { /* A is seqbaij */ 1676 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqBAIJ_SeqDense; 1677 } 1678 1679 C->ops->productsymbolic = MatProductSymbolic_AB; 1680 PetscFunctionReturn(0); 1681 } 1682 1683 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqXBAIJ_SeqDense(Mat C) 1684 { 1685 Mat_Product *product = C->product; 1686 1687 PetscFunctionBegin; 1688 MatCheckProduct(C,1); 1689 PetscCheck(product->A,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing A"); 1690 if (product->type == MATPRODUCT_AB || (product->type == MATPRODUCT_AtB && product->A->symmetric == PETSC_BOOL3_TRUE)) PetscCall(MatProductSetFromOptions_SeqXBAIJ_SeqDense_AB(C)); 1691 PetscFunctionReturn(0); 1692 } 1693 1694 /* ------------------------------------------------------- */ 1695 static PetscErrorCode MatProductSetFromOptions_SeqDense_SeqAIJ_AB(Mat C) 1696 { 1697 PetscFunctionBegin; 1698 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqDense_SeqAIJ; 1699 C->ops->productsymbolic = MatProductSymbolic_AB; 1700 PetscFunctionReturn(0); 1701 } 1702 1703 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqDense_SeqAIJ(Mat C) 1704 { 1705 Mat_Product *product = C->product; 1706 1707 PetscFunctionBegin; 1708 if (product->type == MATPRODUCT_AB) { 1709 PetscCall(MatProductSetFromOptions_SeqDense_SeqAIJ_AB(C)); 1710 } 1711 PetscFunctionReturn(0); 1712 } 1713 /* ------------------------------------------------------- */ 1714 1715 PetscErrorCode MatTransColoringApplySpToDen_SeqAIJ(MatTransposeColoring coloring,Mat B,Mat Btdense) 1716 { 1717 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 1718 Mat_SeqDense *btdense = (Mat_SeqDense*)Btdense->data; 1719 PetscInt *bi = b->i,*bj=b->j; 1720 PetscInt m = Btdense->rmap->n,n=Btdense->cmap->n,j,k,l,col,anz,*btcol,brow,ncolumns; 1721 MatScalar *btval,*btval_den,*ba=b->a; 1722 PetscInt *columns=coloring->columns,*colorforcol=coloring->colorforcol,ncolors=coloring->ncolors; 1723 1724 PetscFunctionBegin; 1725 btval_den=btdense->v; 1726 PetscCall(PetscArrayzero(btval_den,m*n)); 1727 for (k=0; k<ncolors; k++) { 1728 ncolumns = coloring->ncolumns[k]; 1729 for (l=0; l<ncolumns; l++) { /* insert a row of B to a column of Btdense */ 1730 col = *(columns + colorforcol[k] + l); 1731 btcol = bj + bi[col]; 1732 btval = ba + bi[col]; 1733 anz = bi[col+1] - bi[col]; 1734 for (j=0; j<anz; j++) { 1735 brow = btcol[j]; 1736 btval_den[brow] = btval[j]; 1737 } 1738 } 1739 btval_den += m; 1740 } 1741 PetscFunctionReturn(0); 1742 } 1743 1744 PetscErrorCode MatTransColoringApplyDenToSp_SeqAIJ(MatTransposeColoring matcoloring,Mat Cden,Mat Csp) 1745 { 1746 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)Csp->data; 1747 const PetscScalar *ca_den,*ca_den_ptr; 1748 PetscScalar *ca=csp->a; 1749 PetscInt k,l,m=Cden->rmap->n,ncolors=matcoloring->ncolors; 1750 PetscInt brows=matcoloring->brows,*den2sp=matcoloring->den2sp; 1751 PetscInt nrows,*row,*idx; 1752 PetscInt *rows=matcoloring->rows,*colorforrow=matcoloring->colorforrow; 1753 1754 PetscFunctionBegin; 1755 PetscCall(MatDenseGetArrayRead(Cden,&ca_den)); 1756 1757 if (brows > 0) { 1758 PetscInt *lstart,row_end,row_start; 1759 lstart = matcoloring->lstart; 1760 PetscCall(PetscArrayzero(lstart,ncolors)); 1761 1762 row_end = brows; 1763 if (row_end > m) row_end = m; 1764 for (row_start=0; row_start<m; row_start+=brows) { /* loop over row blocks of Csp */ 1765 ca_den_ptr = ca_den; 1766 for (k=0; k<ncolors; k++) { /* loop over colors (columns of Cden) */ 1767 nrows = matcoloring->nrows[k]; 1768 row = rows + colorforrow[k]; 1769 idx = den2sp + colorforrow[k]; 1770 for (l=lstart[k]; l<nrows; l++) { 1771 if (row[l] >= row_end) { 1772 lstart[k] = l; 1773 break; 1774 } else { 1775 ca[idx[l]] = ca_den_ptr[row[l]]; 1776 } 1777 } 1778 ca_den_ptr += m; 1779 } 1780 row_end += brows; 1781 if (row_end > m) row_end = m; 1782 } 1783 } else { /* non-blocked impl: loop over columns of Csp - slow if Csp is large */ 1784 ca_den_ptr = ca_den; 1785 for (k=0; k<ncolors; k++) { 1786 nrows = matcoloring->nrows[k]; 1787 row = rows + colorforrow[k]; 1788 idx = den2sp + colorforrow[k]; 1789 for (l=0; l<nrows; l++) { 1790 ca[idx[l]] = ca_den_ptr[row[l]]; 1791 } 1792 ca_den_ptr += m; 1793 } 1794 } 1795 1796 PetscCall(MatDenseRestoreArrayRead(Cden,&ca_den)); 1797 #if defined(PETSC_USE_INFO) 1798 if (matcoloring->brows > 0) { 1799 PetscCall(PetscInfo(Csp,"Loop over %" PetscInt_FMT " row blocks for den2sp\n",brows)); 1800 } else { 1801 PetscCall(PetscInfo(Csp,"Loop over colors/columns of Cden, inefficient for large sparse matrix product \n")); 1802 } 1803 #endif 1804 PetscFunctionReturn(0); 1805 } 1806 1807 PetscErrorCode MatTransposeColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatTransposeColoring c) 1808 { 1809 PetscInt i,n,nrows,Nbs,j,k,m,ncols,col,cm; 1810 const PetscInt *is,*ci,*cj,*row_idx; 1811 PetscInt nis = iscoloring->n,*rowhit,bs = 1; 1812 IS *isa; 1813 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 1814 PetscInt *colorforrow,*rows,*rows_i,*idxhit,*spidx,*den2sp,*den2sp_i; 1815 PetscInt *colorforcol,*columns,*columns_i,brows; 1816 PetscBool flg; 1817 1818 PetscFunctionBegin; 1819 PetscCall(ISColoringGetIS(iscoloring,PETSC_USE_POINTER,PETSC_IGNORE,&isa)); 1820 1821 /* bs >1 is not being tested yet! */ 1822 Nbs = mat->cmap->N/bs; 1823 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 1824 c->N = Nbs; 1825 c->m = c->M; 1826 c->rstart = 0; 1827 c->brows = 100; 1828 1829 c->ncolors = nis; 1830 PetscCall(PetscMalloc3(nis,&c->ncolumns,nis,&c->nrows,nis+1,&colorforrow)); 1831 PetscCall(PetscMalloc1(csp->nz+1,&rows)); 1832 PetscCall(PetscMalloc1(csp->nz+1,&den2sp)); 1833 1834 brows = c->brows; 1835 PetscCall(PetscOptionsGetInt(NULL,NULL,"-matden2sp_brows",&brows,&flg)); 1836 if (flg) c->brows = brows; 1837 if (brows > 0) { 1838 PetscCall(PetscMalloc1(nis+1,&c->lstart)); 1839 } 1840 1841 colorforrow[0] = 0; 1842 rows_i = rows; 1843 den2sp_i = den2sp; 1844 1845 PetscCall(PetscMalloc1(nis+1,&colorforcol)); 1846 PetscCall(PetscMalloc1(Nbs+1,&columns)); 1847 1848 colorforcol[0] = 0; 1849 columns_i = columns; 1850 1851 /* get column-wise storage of mat */ 1852 PetscCall(MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL)); 1853 1854 cm = c->m; 1855 PetscCall(PetscMalloc1(cm+1,&rowhit)); 1856 PetscCall(PetscMalloc1(cm+1,&idxhit)); 1857 for (i=0; i<nis; i++) { /* loop over color */ 1858 PetscCall(ISGetLocalSize(isa[i],&n)); 1859 PetscCall(ISGetIndices(isa[i],&is)); 1860 1861 c->ncolumns[i] = n; 1862 if (n) PetscCall(PetscArraycpy(columns_i,is,n)); 1863 colorforcol[i+1] = colorforcol[i] + n; 1864 columns_i += n; 1865 1866 /* fast, crude version requires O(N*N) work */ 1867 PetscCall(PetscArrayzero(rowhit,cm)); 1868 1869 for (j=0; j<n; j++) { /* loop over columns*/ 1870 col = is[j]; 1871 row_idx = cj + ci[col]; 1872 m = ci[col+1] - ci[col]; 1873 for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */ 1874 idxhit[*row_idx] = spidx[ci[col] + k]; 1875 rowhit[*row_idx++] = col + 1; 1876 } 1877 } 1878 /* count the number of hits */ 1879 nrows = 0; 1880 for (j=0; j<cm; j++) { 1881 if (rowhit[j]) nrows++; 1882 } 1883 c->nrows[i] = nrows; 1884 colorforrow[i+1] = colorforrow[i] + nrows; 1885 1886 nrows = 0; 1887 for (j=0; j<cm; j++) { /* loop over rows */ 1888 if (rowhit[j]) { 1889 rows_i[nrows] = j; 1890 den2sp_i[nrows] = idxhit[j]; 1891 nrows++; 1892 } 1893 } 1894 den2sp_i += nrows; 1895 1896 PetscCall(ISRestoreIndices(isa[i],&is)); 1897 rows_i += nrows; 1898 } 1899 PetscCall(MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL)); 1900 PetscCall(PetscFree(rowhit)); 1901 PetscCall(ISColoringRestoreIS(iscoloring,PETSC_USE_POINTER,&isa)); 1902 PetscCheck(csp->nz == colorforrow[nis],PETSC_COMM_SELF,PETSC_ERR_PLIB,"csp->nz %" PetscInt_FMT " != colorforrow[nis] %" PetscInt_FMT,csp->nz,colorforrow[nis]); 1903 1904 c->colorforrow = colorforrow; 1905 c->rows = rows; 1906 c->den2sp = den2sp; 1907 c->colorforcol = colorforcol; 1908 c->columns = columns; 1909 1910 PetscCall(PetscFree(idxhit)); 1911 PetscFunctionReturn(0); 1912 } 1913 1914 /* --------------------------------------------------------------- */ 1915 static PetscErrorCode MatProductNumeric_AtB_SeqAIJ_SeqAIJ(Mat C) 1916 { 1917 Mat_Product *product = C->product; 1918 Mat A=product->A,B=product->B; 1919 1920 PetscFunctionBegin; 1921 if (C->ops->mattransposemultnumeric) { 1922 /* Alg: "outerproduct" */ 1923 PetscCall((*C->ops->mattransposemultnumeric)(A,B,C)); 1924 } else { 1925 /* Alg: "matmatmult" -- C = At*B */ 1926 Mat_MatTransMatMult *atb = (Mat_MatTransMatMult *)product->data; 1927 1928 PetscCheck(atb,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing product struct"); 1929 if (atb->At) { 1930 /* At is computed in MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(); 1931 user may have called MatProductReplaceMats() to get this A=product->A */ 1932 PetscCall(MatTransposeSetPrecursor(A,atb->At)); 1933 PetscCall(MatTranspose(A,MAT_REUSE_MATRIX,&atb->At)); 1934 } 1935 PetscCall(MatMatMultNumeric_SeqAIJ_SeqAIJ(atb->At ? atb->At : A,B,C)); 1936 } 1937 PetscFunctionReturn(0); 1938 } 1939 1940 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJ_SeqAIJ(Mat C) 1941 { 1942 Mat_Product *product = C->product; 1943 Mat A=product->A,B=product->B; 1944 PetscReal fill=product->fill; 1945 1946 PetscFunctionBegin; 1947 PetscCall(MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C)); 1948 1949 C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJ_SeqAIJ; 1950 PetscFunctionReturn(0); 1951 } 1952 1953 /* --------------------------------------------------------------- */ 1954 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_AB(Mat C) 1955 { 1956 Mat_Product *product = C->product; 1957 PetscInt alg = 0; /* default algorithm */ 1958 PetscBool flg = PETSC_FALSE; 1959 #if !defined(PETSC_HAVE_HYPRE) 1960 const char *algTypes[7] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge"}; 1961 PetscInt nalg = 7; 1962 #else 1963 const char *algTypes[8] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge","hypre"}; 1964 PetscInt nalg = 8; 1965 #endif 1966 1967 PetscFunctionBegin; 1968 /* Set default algorithm */ 1969 PetscCall(PetscStrcmp(C->product->alg,"default",&flg)); 1970 if (flg) { 1971 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 1972 } 1973 1974 /* Get runtime option */ 1975 if (product->api_user) { 1976 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat"); 1977 PetscCall(PetscOptionsEList("-matmatmult_via","Algorithmic approach","MatMatMult",algTypes,nalg,algTypes[0],&alg,&flg)); 1978 PetscOptionsEnd(); 1979 } else { 1980 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat"); 1981 PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_AB",algTypes,nalg,algTypes[0],&alg,&flg)); 1982 PetscOptionsEnd(); 1983 } 1984 if (flg) { 1985 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 1986 } 1987 1988 C->ops->productsymbolic = MatProductSymbolic_AB; 1989 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ; 1990 PetscFunctionReturn(0); 1991 } 1992 1993 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_AtB(Mat C) 1994 { 1995 Mat_Product *product = C->product; 1996 PetscInt alg = 0; /* default algorithm */ 1997 PetscBool flg = PETSC_FALSE; 1998 const char *algTypes[3] = {"default","at*b","outerproduct"}; 1999 PetscInt nalg = 3; 2000 2001 PetscFunctionBegin; 2002 /* Get runtime option */ 2003 if (product->api_user) { 2004 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatTransposeMatMult","Mat"); 2005 PetscCall(PetscOptionsEList("-mattransposematmult_via","Algorithmic approach","MatTransposeMatMult",algTypes,nalg,algTypes[alg],&alg,&flg)); 2006 PetscOptionsEnd(); 2007 } else { 2008 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AtB","Mat"); 2009 PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_AtB",algTypes,nalg,algTypes[alg],&alg,&flg)); 2010 PetscOptionsEnd(); 2011 } 2012 if (flg) { 2013 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2014 } 2015 2016 C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJ_SeqAIJ; 2017 PetscFunctionReturn(0); 2018 } 2019 2020 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_ABt(Mat C) 2021 { 2022 Mat_Product *product = C->product; 2023 PetscInt alg = 0; /* default algorithm */ 2024 PetscBool flg = PETSC_FALSE; 2025 const char *algTypes[2] = {"default","color"}; 2026 PetscInt nalg = 2; 2027 2028 PetscFunctionBegin; 2029 /* Set default algorithm */ 2030 PetscCall(PetscStrcmp(C->product->alg,"default",&flg)); 2031 if (!flg) { 2032 alg = 1; 2033 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2034 } 2035 2036 /* Get runtime option */ 2037 if (product->api_user) { 2038 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat"); 2039 PetscCall(PetscOptionsEList("-matmattransmult_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg)); 2040 PetscOptionsEnd(); 2041 } else { 2042 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat"); 2043 PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg)); 2044 PetscOptionsEnd(); 2045 } 2046 if (flg) { 2047 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2048 } 2049 2050 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ; 2051 C->ops->productsymbolic = MatProductSymbolic_ABt; 2052 PetscFunctionReturn(0); 2053 } 2054 2055 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_PtAP(Mat C) 2056 { 2057 Mat_Product *product = C->product; 2058 PetscBool flg = PETSC_FALSE; 2059 PetscInt alg = 0; /* default algorithm -- alg=1 should be default!!! */ 2060 #if !defined(PETSC_HAVE_HYPRE) 2061 const char *algTypes[2] = {"scalable","rap"}; 2062 PetscInt nalg = 2; 2063 #else 2064 const char *algTypes[3] = {"scalable","rap","hypre"}; 2065 PetscInt nalg = 3; 2066 #endif 2067 2068 PetscFunctionBegin; 2069 /* Set default algorithm */ 2070 PetscCall(PetscStrcmp(product->alg,"default",&flg)); 2071 if (flg) { 2072 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2073 } 2074 2075 /* Get runtime option */ 2076 if (product->api_user) { 2077 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat"); 2078 PetscCall(PetscOptionsEList("-matptap_via","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[0],&alg,&flg)); 2079 PetscOptionsEnd(); 2080 } else { 2081 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat"); 2082 PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_PtAP",algTypes,nalg,algTypes[0],&alg,&flg)); 2083 PetscOptionsEnd(); 2084 } 2085 if (flg) { 2086 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2087 } 2088 2089 C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqAIJ; 2090 PetscFunctionReturn(0); 2091 } 2092 2093 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_RARt(Mat C) 2094 { 2095 Mat_Product *product = C->product; 2096 PetscBool flg = PETSC_FALSE; 2097 PetscInt alg = 0; /* default algorithm */ 2098 const char *algTypes[3] = {"r*a*rt","r*art","coloring_rart"}; 2099 PetscInt nalg = 3; 2100 2101 PetscFunctionBegin; 2102 /* Set default algorithm */ 2103 PetscCall(PetscStrcmp(product->alg,"default",&flg)); 2104 if (flg) { 2105 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2106 } 2107 2108 /* Get runtime option */ 2109 if (product->api_user) { 2110 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatRARt","Mat"); 2111 PetscCall(PetscOptionsEList("-matrart_via","Algorithmic approach","MatRARt",algTypes,nalg,algTypes[0],&alg,&flg)); 2112 PetscOptionsEnd(); 2113 } else { 2114 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_RARt","Mat"); 2115 PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_RARt",algTypes,nalg,algTypes[0],&alg,&flg)); 2116 PetscOptionsEnd(); 2117 } 2118 if (flg) { 2119 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2120 } 2121 2122 C->ops->productsymbolic = MatProductSymbolic_RARt_SeqAIJ_SeqAIJ; 2123 PetscFunctionReturn(0); 2124 } 2125 2126 /* ABC = A*B*C = A*(B*C); ABC's algorithm must be chosen from AB's algorithm */ 2127 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_ABC(Mat C) 2128 { 2129 Mat_Product *product = C->product; 2130 PetscInt alg = 0; /* default algorithm */ 2131 PetscBool flg = PETSC_FALSE; 2132 const char *algTypes[7] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge"}; 2133 PetscInt nalg = 7; 2134 2135 PetscFunctionBegin; 2136 /* Set default algorithm */ 2137 PetscCall(PetscStrcmp(product->alg,"default",&flg)); 2138 if (flg) { 2139 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2140 } 2141 2142 /* Get runtime option */ 2143 if (product->api_user) { 2144 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMatMult","Mat"); 2145 PetscCall(PetscOptionsEList("-matmatmatmult_via","Algorithmic approach","MatMatMatMult",algTypes,nalg,algTypes[alg],&alg,&flg)); 2146 PetscOptionsEnd(); 2147 } else { 2148 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABC","Mat"); 2149 PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_ABC",algTypes,nalg,algTypes[alg],&alg,&flg)); 2150 PetscOptionsEnd(); 2151 } 2152 if (flg) { 2153 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2154 } 2155 2156 C->ops->matmatmultsymbolic = MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ; 2157 C->ops->productsymbolic = MatProductSymbolic_ABC; 2158 PetscFunctionReturn(0); 2159 } 2160 2161 PetscErrorCode MatProductSetFromOptions_SeqAIJ(Mat C) 2162 { 2163 Mat_Product *product = C->product; 2164 2165 PetscFunctionBegin; 2166 switch (product->type) { 2167 case MATPRODUCT_AB: 2168 PetscCall(MatProductSetFromOptions_SeqAIJ_AB(C)); 2169 break; 2170 case MATPRODUCT_AtB: 2171 PetscCall(MatProductSetFromOptions_SeqAIJ_AtB(C)); 2172 break; 2173 case MATPRODUCT_ABt: 2174 PetscCall(MatProductSetFromOptions_SeqAIJ_ABt(C)); 2175 break; 2176 case MATPRODUCT_PtAP: 2177 PetscCall(MatProductSetFromOptions_SeqAIJ_PtAP(C)); 2178 break; 2179 case MATPRODUCT_RARt: 2180 PetscCall(MatProductSetFromOptions_SeqAIJ_RARt(C)); 2181 break; 2182 case MATPRODUCT_ABC: 2183 PetscCall(MatProductSetFromOptions_SeqAIJ_ABC(C)); 2184 break; 2185 default: 2186 break; 2187 } 2188 PetscFunctionReturn(0); 2189 } 2190