1 2 /* 3 Defines matrix-matrix product routines for pairs of SeqAIJ matrices 4 C = A * B 5 */ 6 7 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/utils/freespace.h> 9 #include <petscbt.h> 10 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/ 11 /* 12 #define DEBUG_MATMATMULT 13 */ 14 EXTERN_C_BEGIN 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ" 17 PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 18 { 19 PetscErrorCode ierr; 20 21 PetscFunctionBegin; 22 if (scall == MAT_INITIAL_MATRIX){ 23 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 24 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 25 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 26 } 27 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 28 ierr = (*(*C)->ops->matmultnumeric)(A,B,*C);CHKERRQ(ierr); 29 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 30 PetscFunctionReturn(0); 31 } 32 EXTERN_C_END 33 34 /* 35 MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ - Get symbolic structure of C=A*B 36 Input Parameter: 37 . am, Ai, Aj - number of rows and structure of A 38 . bm, bn, Bi, Bj - number of rows, columns, and structure of B 39 . fill - filll ratio See MatMatMult() 40 41 Output Parameter: 42 . Ci, Cj - structure of C = A*B 43 . nspacedouble - number of extra mallocs 44 */ 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ" 47 PetscErrorCode MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ(PetscInt am,PetscInt *Ai,PetscInt *Aj,PetscInt bm,PetscInt bn,PetscInt *Bi,PetscInt *Bj,PetscReal fill,PetscInt *Ci[],PetscInt *Cj[],PetscInt *nspacedouble) 48 { 49 PetscErrorCode ierr; 50 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 51 PetscInt *ai=Ai,*aj=Aj,*bi=Bi,*bj=Bj,*bjj,*ci,*cj; 52 PetscInt i,j,anzi,brow,bnzj,cnzi,nlnk,*lnk,ndouble=0; 53 PetscBT lnkbt; 54 55 PetscFunctionBegin; 56 /* Allocate ci array, arrays for fill computation and */ 57 /* free space for accumulating nonzero column info */ 58 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 59 ci[0] = 0; 60 61 /* create and initialize a linked list */ 62 nlnk = bn+1; 63 ierr = PetscLLCreate(bn,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 64 65 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 66 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr); 67 current_space = free_space; 68 69 /* Determine symbolic info for each row of the product: */ 70 for (i=0; i<am; i++) { 71 anzi = ai[i+1] - ai[i]; 72 cnzi = 0; 73 aj = Aj + ai[i]; 74 for (j=0; j<anzi; j++){ 75 brow = aj[j]; 76 bnzj = bi[brow+1] - bi[brow]; 77 bjj = bj + bi[brow]; 78 /* add non-zero cols of B into the sorted linked list lnk */ 79 ierr = PetscLLAddSorted(bnzj,bjj,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 80 cnzi += nlnk; 81 } 82 83 /* If free space is not available, make more free space */ 84 /* Double the amount of total space in the list */ 85 if (current_space->local_remaining<cnzi) { 86 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 87 ndouble++; 88 } 89 90 /* Copy data into free space, then initialize lnk */ 91 ierr = PetscLLClean(bn,bn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 92 current_space->array += cnzi; 93 current_space->local_used += cnzi; 94 current_space->local_remaining -= cnzi; 95 ci[i+1] = ci[i] + cnzi; 96 } 97 98 /* Column indices are in the list of free space */ 99 /* Allocate space for cj, initialize cj, and */ 100 /* destroy list of free space and other temporary array(s) */ 101 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 102 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 103 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 104 105 *Ci = ci; 106 *Cj = cj; 107 *nspacedouble = ndouble; 108 PetscFunctionReturn(0); 109 } 110 111 /* 112 MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_SparseAxpy - same as MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ() 113 but replaces O(bn) array 'lnkbt' with a scalable array 'abj' of size Crmax. 114 */ 115 #undef __FUNCT__ 116 #define __FUNCT__ "MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_SparseAxpy" 117 PetscErrorCode MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_SparseAxpy(PetscInt am,PetscInt *Ai,PetscInt *Aj,PetscInt bm,PetscInt bn,PetscInt *Bi,PetscInt *Bj,PetscReal fill,PetscInt *Ci[],PetscInt *Cj[],PetscInt *nspacedouble) 118 { 119 PetscErrorCode ierr; 120 PetscInt *aj=Aj,*bi=Bi,*bj,*ci,*cj,rmax=0,*abj,*cj_tmp,nextabj; 121 PetscInt i,j,anzi,brow,bnzj,cnzi,k; 122 PetscBT bt; 123 124 PetscFunctionBegin; 125 /* Allocate ci array and bt */ 126 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 127 ci[0] = 0; 128 129 /* Get ci and count rmax of C */ 130 ierr = PetscBTCreate(bn,bt);CHKERRQ(ierr); 131 ierr = PetscBTMemzero(bn,bt);CHKERRQ(ierr); 132 for (i=0; i<am; i++) { 133 anzi = Ai[i+1] - Ai[i]; 134 cnzi = 0; 135 aj = Aj + Ai[i]; 136 for (j=0; j<anzi; j++){ 137 brow = aj[j]; 138 bnzj = bi[brow+1] - bi[brow]; 139 bj = Bj + bi[brow]; 140 for (k=0; k<bnzj; k++){ 141 if (!PetscBTLookupSet(bt,bj[k])) cnzi++; /* new entry */ 142 } 143 } 144 ierr = PetscBTMemzero(bn,bt);CHKERRQ(ierr); 145 ci[i+1] = ci[i] + cnzi; 146 if (rmax < cnzi) rmax = cnzi; 147 } 148 149 /* Allocate space for cj */ 150 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 151 152 /* Allocate a temp array for storing column indices of A*B */ 153 ierr = PetscMalloc((rmax+1)*sizeof(PetscInt),&abj);CHKERRQ(ierr); 154 155 /* Determine cj */ 156 for (i=0; i<am; i++) { 157 anzi = Ai[i+1] - Ai[i]; 158 cnzi = 0; 159 nextabj = 0; 160 aj = Aj + Ai[i]; 161 for (j=0; j<anzi; j++){ 162 brow = aj[j]; 163 bnzj = bi[brow+1] - bi[brow]; 164 bj = Bj + bi[brow]; 165 for (k=0; k<bnzj; k++){ 166 if (!PetscBTLookupSet(bt,bj[k])){ /* new entry */ 167 abj[nextabj] = bj[k]; nextabj++; 168 } 169 } 170 } 171 172 /* Sort abj, then copy it to cj */ 173 cnzi = ci[i+1] - ci[i]; 174 ierr = PetscSortInt(cnzi,abj);CHKERRQ(ierr); 175 cj_tmp = cj + ci[i]; 176 for (k=0; k< cnzi; k++){ 177 cj_tmp[k] = abj[k]; 178 ierr = PetscBTClear(bt,abj[k]);CHKERRQ(ierr); 179 } 180 } 181 182 ierr = PetscBTDestroy(bt);CHKERRQ(ierr); 183 ierr = PetscFree(abj);CHKERRQ(ierr); 184 185 *Ci = ci; 186 *Cj = cj; 187 *nspacedouble = 0; 188 PetscFunctionReturn(0); 189 } 190 191 #undef __FUNCT__ 192 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ" 193 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 194 { 195 PetscErrorCode ierr; 196 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 197 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*ci,*cj; 198 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N,nspacedouble; 199 MatScalar *ca; 200 PetscReal afill; 201 PetscBool dense_axpy; /* false: use sparse axpy; otherwise use dense axpy in MatMatMultNumeric_SeqAIJ_SeqAIJ() */ 202 203 PetscFunctionBegin; 204 /* Get ci and cj */ 205 ierr = MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ(am,ai,aj,bm,bn,bi,bj,fill,&ci,&cj,&nspacedouble);CHKERRQ(ierr); 206 #if defined(DEBUG_MATMATMULT) 207 ierr = PetscPrintf(PETSC_COMM_SELF,"MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ() is done \n");CHKERRQ(ierr); 208 #endif 209 210 /* Allocate space for ca */ 211 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 212 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 213 214 /* put together the new symbolic matrix */ 215 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 216 217 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 218 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 219 c = (Mat_SeqAIJ *)((*C)->data); 220 c->free_a = PETSC_TRUE; 221 c->free_ij = PETSC_TRUE; 222 c->nonew = 0; 223 (*C)->ops->matmult = MatMatMult_SeqAIJ_SeqAIJ; 224 225 /* Determine which MatMatMultNumeric_SeqAIJ_SeqAIJ() to be used */ 226 dense_axpy = PETSC_TRUE; 227 ierr = PetscOptionsGetBool(PETSC_NULL,"-matmatmult_denseaxpy",&dense_axpy,PETSC_NULL);CHKERRQ(ierr); 228 if (dense_axpy){ 229 ierr = PetscMalloc(bn*sizeof(PetscScalar),&c->matmult_abdense);CHKERRQ(ierr); 230 ierr = PetscMemzero(c->matmult_abdense,dense_axpy*bn*sizeof(PetscScalar));CHKERRQ(ierr); 231 (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; /* fast, takes additional dense_axpy*bn*sizeof(PetscScalar) space */ 232 } else { /* slower, but use less memory */ 233 #if defined(DEBUG_MATMATMULT) 234 ierr = PetscPrintf(PETSC_COMM_SELF,"call MatMatMultNumeric_SeqAIJ_SeqAIJ_SparseAxpy \n"); 235 #endif 236 (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_SparseAxpy; /* slower, less memory */ 237 } 238 239 /* set MatInfo */ 240 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 241 if (afill < 1.0) afill = 1.0; 242 c->maxnz = ci[am]; 243 c->nz = ci[am]; 244 (*C)->info.mallocs = nspacedouble; 245 (*C)->info.fill_ratio_given = fill; 246 (*C)->info.fill_ratio_needed = afill; 247 248 #if defined(PETSC_USE_INFO) 249 if (ci[am]) { 250 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); 251 ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 252 } else { 253 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 254 } 255 #endif 256 PetscFunctionReturn(0); 257 } 258 259 #undef __FUNCT__ 260 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ" 261 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 262 { 263 PetscErrorCode ierr; 264 PetscLogDouble flops=0.0; 265 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 266 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 267 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 268 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 269 PetscInt am=A->rmap->n,cm=C->rmap->n; 270 PetscInt i,j,k,anzi,bnzi,cnzi,brow; 271 PetscScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,valtmp; 272 PetscScalar *ab_dense=c->matmult_abdense; 273 274 PetscFunctionBegin; 275 /* clean old values in C */ 276 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 277 /* Traverse A row-wise. */ 278 /* Build the ith row in C by summing over nonzero columns in A, */ 279 /* the rows of B corresponding to nonzeros of A. */ 280 for (i=0; i<am; i++) { 281 anzi = ai[i+1] - ai[i]; 282 for (j=0; j<anzi; j++) { 283 brow = aj[j]; 284 bnzi = bi[brow+1] - bi[brow]; 285 bjj = bj + bi[brow]; 286 baj = ba + bi[brow]; 287 /* perform dense axpy */ 288 valtmp = aa[j]; 289 for (k=0; k<bnzi; k++) { 290 ab_dense[bjj[k]] += valtmp*baj[k]; 291 } 292 flops += 2*bnzi; 293 } 294 aj += anzi; aa += anzi; 295 296 cnzi = ci[i+1] - ci[i]; 297 for (k=0; k<cnzi; k++) { 298 ca[k] += ab_dense[cj[k]]; 299 ab_dense[cj[k]] = 0.0; /* zero ab_dense */ 300 } 301 flops += cnzi; 302 cj += cnzi; ca += cnzi; 303 } 304 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 305 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 306 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 307 C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 308 PetscFunctionReturn(0); 309 } 310 311 #undef __FUNCT__ 312 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ_SparseAxpy" 313 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_SparseAxpy(Mat A,Mat B,Mat C) 314 { 315 PetscErrorCode ierr; 316 PetscLogDouble flops=0.0; 317 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 318 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 319 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 320 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 321 PetscInt am=A->rmap->N,cm=C->rmap->N; 322 PetscInt i,j,k,anzi,bnzi,cnzi,brow; 323 PetscScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,valtmp; 324 PetscInt nextb; 325 326 PetscFunctionBegin; 327 #if defined(DEBUG_MATMATMULT) 328 //ierr = PetscPrintf(PETSC_COMM_SELF,"MatMatMultNumeric_SeqAIJ_SeqAIJ_SparseAxpy...\n");CHKERRQ(ierr); 329 #endif 330 /* clean old values in C */ 331 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 332 /* Traverse A row-wise. */ 333 /* Build the ith row in C by summing over nonzero columns in A, */ 334 /* the rows of B corresponding to nonzeros of A. */ 335 for (i=0;i<am;i++) { 336 anzi = ai[i+1] - ai[i]; 337 cnzi = ci[i+1] - ci[i]; 338 for (j=0;j<anzi;j++) { 339 brow = aj[j]; 340 bnzi = bi[brow+1] - bi[brow]; 341 bjj = bj + bi[brow]; 342 baj = ba + bi[brow]; 343 /* perform sparse axpy */ 344 valtmp = aa[j]; 345 nextb = 0; 346 for (k=0; nextb<bnzi; k++) { 347 if (cj[k] == bjj[nextb]){ /* ccol == bcol */ 348 ca[k] += valtmp*baj[nextb++]; 349 } 350 } 351 flops += 2*bnzi; 352 } 353 aj += anzi; aa += anzi; 354 cj += cnzi; ca += cnzi; 355 } 356 357 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 358 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 359 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 360 PetscFunctionReturn(0); 361 } 362 363 #undef __FUNCT__ 364 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ_SparseAxpy" 365 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_SparseAxpy(Mat A,Mat B,PetscReal fill,Mat *C) 366 { 367 PetscErrorCode ierr; 368 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 369 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*ci,*cj; 370 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N,nspacedouble; 371 MatScalar *ca; 372 PetscReal afill; 373 374 PetscFunctionBegin; 375 #if defined(DEBUG_MATMATMULT) 376 ierr = PetscPrintf(PETSC_COMM_SELF,"MatMatMultSymbolic_SeqAIJ_SeqAIJ_SparseAxpy \n");CHKERRQ(ierr); 377 #endif 378 /* Get ci and cj */ 379 ierr = MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_SparseAxpy(am,ai,aj,bm,bn,bi,bj,fill,&ci,&cj,&nspacedouble);CHKERRQ(ierr); 380 #if defined(DEBUG_MATMATMULT) 381 ierr = PetscPrintf(PETSC_COMM_SELF,"MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_SparseAxpy() is done \n");CHKERRQ(ierr); 382 #endif 383 384 /* Allocate space for ca */ 385 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 386 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 387 388 /* put together the new symbolic matrix */ 389 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 390 391 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 392 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 393 c = (Mat_SeqAIJ *)((*C)->data); 394 c->free_a = PETSC_TRUE; 395 c->free_ij = PETSC_TRUE; 396 c->nonew = 0; 397 (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_SparseAxpy; /* slower, less memory */ 398 399 /* set MatInfo */ 400 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 401 if (afill < 1.0) afill = 1.0; 402 c->maxnz = ci[am]; 403 c->nz = ci[am]; 404 (*C)->info.mallocs = nspacedouble; 405 (*C)->info.fill_ratio_given = fill; 406 (*C)->info.fill_ratio_needed = afill; 407 408 #if defined(PETSC_USE_INFO) 409 if (ci[am]) { 410 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); 411 ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 412 } else { 413 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 414 } 415 #endif 416 PetscFunctionReturn(0); 417 } 418 419 420 /* This routine is not used. Should be removed! */ 421 #undef __FUNCT__ 422 #define __FUNCT__ "MatMatTransposeMult_SeqAIJ_SeqAIJ" 423 PetscErrorCode MatMatTransposeMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 424 { 425 PetscErrorCode ierr; 426 427 PetscFunctionBegin; 428 if (scall == MAT_INITIAL_MATRIX){ 429 ierr = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 430 } 431 ierr = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 432 PetscFunctionReturn(0); 433 } 434 435 #undef __FUNCT__ 436 #define __FUNCT__ "PetscContainerDestroy_Mat_MatMatTransMult" 437 PetscErrorCode PetscContainerDestroy_Mat_MatMatTransMult(void *ptr) 438 { 439 PetscErrorCode ierr; 440 Mat_MatMatTransMult *multtrans=(Mat_MatMatTransMult*)ptr; 441 442 PetscFunctionBegin; 443 ierr = MatTransposeColoringDestroy(&multtrans->matcoloring);CHKERRQ(ierr); 444 ierr = MatDestroy(&multtrans->Bt_den);CHKERRQ(ierr); 445 ierr = MatDestroy(&multtrans->ABt_den);CHKERRQ(ierr); 446 ierr = PetscFree(multtrans);CHKERRQ(ierr); 447 PetscFunctionReturn(0); 448 } 449 450 #undef __FUNCT__ 451 #define __FUNCT__ "MatDestroy_SeqAIJ_MatMatMultTrans" 452 PetscErrorCode MatDestroy_SeqAIJ_MatMatMultTrans(Mat A) 453 { 454 PetscErrorCode ierr; 455 PetscContainer container; 456 Mat_MatMatTransMult *multtrans=PETSC_NULL; 457 458 PetscFunctionBegin; 459 ierr = PetscObjectQuery((PetscObject)A,"Mat_MatMatTransMult",(PetscObject *)&container);CHKERRQ(ierr); 460 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 461 ierr = PetscContainerGetPointer(container,(void **)&multtrans);CHKERRQ(ierr); 462 A->ops->destroy = multtrans->destroy; 463 if (A->ops->destroy) { 464 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 465 } 466 ierr = PetscObjectCompose((PetscObject)A,"Mat_MatMatTransMult",0);CHKERRQ(ierr); 467 PetscFunctionReturn(0); 468 } 469 470 #undef __FUNCT__ 471 #define __FUNCT__ "MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ" 472 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 473 { 474 PetscErrorCode ierr; 475 Mat Bt; 476 PetscInt *bti,*btj; 477 Mat_MatMatTransMult *multtrans; 478 PetscContainer container; 479 PetscLogDouble t0,tf,etime2=0.0; 480 481 PetscFunctionBegin; 482 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 483 /* create symbolic Bt */ 484 ierr = MatGetSymbolicTranspose_SeqAIJ(B,&bti,&btj);CHKERRQ(ierr); 485 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,B->cmap->n,B->rmap->n,bti,btj,PETSC_NULL,&Bt);CHKERRQ(ierr); 486 487 /* get symbolic C=A*Bt */ 488 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,Bt,fill,C);CHKERRQ(ierr); 489 490 /* create a supporting struct for reuse intermidiate dense matrices with matcoloring */ 491 ierr = PetscNew(Mat_MatMatTransMult,&multtrans);CHKERRQ(ierr); 492 493 /* attach the supporting struct to C */ 494 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 495 ierr = PetscContainerSetPointer(container,multtrans);CHKERRQ(ierr); 496 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_Mat_MatMatTransMult);CHKERRQ(ierr); 497 ierr = PetscObjectCompose((PetscObject)(*C),"Mat_MatMatTransMult",(PetscObject)container);CHKERRQ(ierr); 498 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 499 500 multtrans->usecoloring = PETSC_FALSE; 501 multtrans->destroy = (*C)->ops->destroy; 502 (*C)->ops->destroy = MatDestroy_SeqAIJ_MatMatMultTrans; 503 504 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 505 etime2 += tf - t0; 506 507 ierr = PetscOptionsGetBool(PETSC_NULL,"-matmattransmult_color",&multtrans->usecoloring,PETSC_NULL);CHKERRQ(ierr); 508 if (multtrans->usecoloring){ 509 /* Create MatTransposeColoring from symbolic C=A*B^T */ 510 MatTransposeColoring matcoloring; 511 ISColoring iscoloring; 512 Mat Bt_dense,C_dense; 513 PetscLogDouble etime0=0.0,etime01=0.0,etime1=0.0; 514 515 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 516 ierr = MatGetColoring(*C,MATCOLORINGLF,&iscoloring);CHKERRQ(ierr); 517 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 518 etime0 += tf - t0; 519 520 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 521 ierr = MatTransposeColoringCreate(*C,iscoloring,&matcoloring);CHKERRQ(ierr); 522 multtrans->matcoloring = matcoloring; 523 ierr = ISColoringDestroy(&iscoloring);CHKERRQ(ierr); 524 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 525 etime01 += tf - t0; 526 527 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 528 /* Create Bt_dense and C_dense = A*Bt_dense */ 529 ierr = MatCreate(PETSC_COMM_SELF,&Bt_dense);CHKERRQ(ierr); 530 ierr = MatSetSizes(Bt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors);CHKERRQ(ierr); 531 ierr = MatSetType(Bt_dense,MATSEQDENSE);CHKERRQ(ierr); 532 ierr = MatSeqDenseSetPreallocation(Bt_dense,PETSC_NULL);CHKERRQ(ierr); 533 Bt_dense->assembled = PETSC_TRUE; 534 multtrans->Bt_den = Bt_dense; 535 536 ierr = MatCreate(PETSC_COMM_SELF,&C_dense);CHKERRQ(ierr); 537 ierr = MatSetSizes(C_dense,A->rmap->n,matcoloring->ncolors,A->rmap->n,matcoloring->ncolors);CHKERRQ(ierr); 538 ierr = MatSetType(C_dense,MATSEQDENSE);CHKERRQ(ierr); 539 ierr = MatSeqDenseSetPreallocation(C_dense,PETSC_NULL);CHKERRQ(ierr); 540 Bt_dense->assembled = PETSC_TRUE; 541 multtrans->ABt_den = C_dense; 542 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 543 etime1 += tf - t0; 544 545 #if defined(PETSC_USE_INFO) 546 { 547 Mat_SeqAIJ *c=(Mat_SeqAIJ*)(*C)->data; 548 ierr = PetscInfo5(*C,"Bt_dense: %D,%D; Cnz %D / (cm*ncolors %D) = %g\n",A->cmap->n,matcoloring->ncolors,c->nz,A->rmap->n*matcoloring->ncolors,(PetscReal)(c->nz)/(A->rmap->n*matcoloring->ncolors)); 549 ierr = PetscInfo5(*C,"Sym = GetColor %g + ColorCreate %g + MatDenseCreate %g + non-colorSym %g = %g\n",etime0,etime01,etime1,etime2,etime0+etime01+etime1+etime2); 550 } 551 #endif 552 } 553 /* clean up */ 554 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 555 ierr = MatRestoreSymbolicTranspose_SeqAIJ(B,&bti,&btj);CHKERRQ(ierr); 556 557 558 559 #if defined(INEFFICIENT_ALGORITHM) 560 /* The algorithm below computes am*bm sparse inner-product - inefficient! It will be deleted later. */ 561 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 562 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 563 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*ci,*cj,*acol,*bcol; 564 PetscInt am=A->rmap->N,bm=B->rmap->N; 565 PetscInt i,j,anzi,bnzj,cnzi,nlnk,*lnk,nspacedouble=0,ka,kb,index[1]; 566 MatScalar *ca; 567 PetscBT lnkbt; 568 PetscReal afill; 569 570 /* Allocate row pointer array ci */ 571 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 572 ci[0] = 0; 573 574 /* Create and initialize a linked list for C columns */ 575 nlnk = bm+1; 576 ierr = PetscLLCreate(bm,bm,nlnk,lnk,lnkbt);CHKERRQ(ierr); 577 578 /* Initial FreeSpace with size fill*(nnz(A)+nnz(B)) */ 579 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr); 580 current_space = free_space; 581 582 /* Determine symbolic info for each row of the product A*B^T: */ 583 for (i=0; i<am; i++) { 584 anzi = ai[i+1] - ai[i]; 585 cnzi = 0; 586 acol = aj + ai[i]; 587 for (j=0; j<bm; j++){ 588 bnzj = bi[j+1] - bi[j]; 589 bcol= bj + bi[j]; 590 /* sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */ 591 ka = 0; kb = 0; 592 while (ka < anzi && kb < bnzj){ 593 while (acol[ka] < bcol[kb] && ka < anzi) ka++; 594 if (ka == anzi) break; 595 while (acol[ka] > bcol[kb] && kb < bnzj) kb++; 596 if (kb == bnzj) break; 597 if (acol[ka] == bcol[kb]){ /* add nonzero c(i,j) to lnk */ 598 index[0] = j; 599 ierr = PetscLLAdd(1,index,bm,nlnk,lnk,lnkbt);CHKERRQ(ierr); 600 cnzi++; 601 break; 602 } 603 } 604 } 605 606 /* If free space is not available, make more free space */ 607 /* Double the amount of total space in the list */ 608 if (current_space->local_remaining<cnzi) { 609 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 610 nspacedouble++; 611 } 612 613 /* Copy data into free space, then initialize lnk */ 614 ierr = PetscLLClean(bm,bm,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 615 current_space->array += cnzi; 616 current_space->local_used += cnzi; 617 current_space->local_remaining -= cnzi; 618 619 ci[i+1] = ci[i] + cnzi; 620 } 621 622 623 /* Column indices are in the list of free space. 624 Allocate array cj, copy column indices to cj, and destroy list of free space */ 625 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 626 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 627 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 628 629 /* Allocate space for ca */ 630 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 631 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 632 633 /* put together the new symbolic matrix */ 634 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bm,ci,cj,ca,C);CHKERRQ(ierr); 635 636 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 637 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 638 c = (Mat_SeqAIJ *)((*C)->data); 639 c->free_a = PETSC_TRUE; 640 c->free_ij = PETSC_TRUE; 641 c->nonew = 0; 642 643 /* set MatInfo */ 644 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 645 if (afill < 1.0) afill = 1.0; 646 c->maxnz = ci[am]; 647 c->nz = ci[am]; 648 (*C)->info.mallocs = nspacedouble; 649 (*C)->info.fill_ratio_given = fill; 650 (*C)->info.fill_ratio_needed = afill; 651 652 #if defined(PETSC_USE_INFO) 653 if (ci[am]) { 654 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); 655 ierr = PetscInfo1((*C),"Use MatMatTransposeMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 656 } else { 657 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 658 } 659 #endif 660 #endif 661 PetscFunctionReturn(0); 662 } 663 664 /* #define USE_ARRAY - for sparse dot product. Slower than !USE_ARRAY */ 665 #undef __FUNCT__ 666 #define __FUNCT__ "MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ" 667 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 668 { 669 PetscErrorCode ierr; 670 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 671 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,anzi,bnzj,nexta,nextb,*acol,*bcol,brow; 672 PetscInt cm=C->rmap->n,*ci=c->i,*cj=c->j,i,j,cnzi,*ccol; 673 PetscLogDouble flops=0.0; 674 MatScalar *aa=a->a,*aval,*ba=b->a,*bval,*ca=c->a,*cval; 675 Mat_MatMatTransMult *multtrans; 676 PetscContainer container; 677 #if defined(USE_ARRAY) 678 MatScalar *spdot; 679 #endif 680 681 PetscFunctionBegin; 682 ierr = PetscObjectQuery((PetscObject)C,"Mat_MatMatTransMult",(PetscObject *)&container);CHKERRQ(ierr); 683 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 684 ierr = PetscContainerGetPointer(container,(void **)&multtrans);CHKERRQ(ierr); 685 if (multtrans->usecoloring){ 686 MatTransposeColoring matcoloring = multtrans->matcoloring; 687 Mat Bt_dense; 688 PetscInt m,n; 689 PetscLogDouble t0,tf,etime0=0.0,etime1=0.0,etime2=0.0; 690 Mat C_dense = multtrans->ABt_den; 691 692 Bt_dense = multtrans->Bt_den; 693 ierr = MatGetLocalSize(Bt_dense,&m,&n);CHKERRQ(ierr); 694 695 /* Get Bt_dense by Apply MatTransposeColoring to B */ 696 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 697 ierr = MatTransColoringApplySpToDen(matcoloring,B,Bt_dense);CHKERRQ(ierr); 698 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 699 etime0 += tf - t0; 700 701 /* C_dense = A*Bt_dense */ 702 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 703 ierr = MatMatMultNumeric_SeqAIJ_SeqDense(A,Bt_dense,C_dense);CHKERRQ(ierr); 704 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 705 etime2 += tf - t0; 706 707 /* Recover C from C_dense */ 708 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 709 ierr = MatTransColoringApplyDenToSp(matcoloring,C_dense,C);CHKERRQ(ierr); 710 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 711 etime1 += tf - t0; 712 #if defined(PETSC_USE_INFO) 713 ierr = PetscInfo4(C,"Num = ColoringApply: %g %g + Mult_sp_dense %g = %g\n",etime0,etime1,etime2,etime0+etime1+etime2); 714 #endif 715 PetscFunctionReturn(0); 716 } 717 718 #if defined(USE_ARRAY) 719 /* allocate an array for implementing sparse inner-product */ 720 ierr = PetscMalloc((A->cmap->n+1)*sizeof(MatScalar),&spdot);CHKERRQ(ierr); 721 ierr = PetscMemzero(spdot,(A->cmap->n+1)*sizeof(MatScalar));CHKERRQ(ierr); 722 #endif 723 724 /* clear old values in C */ 725 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 726 727 for (i=0; i<cm; i++) { 728 anzi = ai[i+1] - ai[i]; 729 acol = aj + ai[i]; 730 aval = aa + ai[i]; 731 cnzi = ci[i+1] - ci[i]; 732 ccol = cj + ci[i]; 733 cval = ca + ci[i]; 734 for (j=0; j<cnzi; j++){ 735 brow = ccol[j]; 736 bnzj = bi[brow+1] - bi[brow]; 737 bcol = bj + bi[brow]; 738 bval = ba + bi[brow]; 739 740 /* perform sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */ 741 #if defined(USE_ARRAY) 742 /* put ba to spdot array */ 743 for (nextb=0; nextb<bnzj; nextb++) spdot[bcol[nextb]] = bval[nextb]; 744 /* c(i,j)=A[i,:]*B[j,:]^T */ 745 for (nexta=0; nexta<anzi; nexta++){ 746 cval[j] += spdot[acol[nexta]]*aval[nexta]; 747 } 748 /* zero spdot array */ 749 for (nextb=0; nextb<bnzj; nextb++) spdot[bcol[nextb]] = 0.0; 750 #else 751 nexta = 0; nextb = 0; 752 while (nexta<anzi && nextb<bnzj){ 753 while (acol[nexta] < bcol[nextb] && nexta < anzi) nexta++; 754 if (nexta == anzi) break; 755 while (acol[nexta] > bcol[nextb] && nextb < bnzj) nextb++; 756 if (nextb == bnzj) break; 757 if (acol[nexta] == bcol[nextb]){ 758 cval[j] += aval[nexta]*bval[nextb]; 759 nexta++; nextb++; 760 flops += 2; 761 } 762 } 763 #endif 764 } 765 } 766 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 767 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 768 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 769 #if defined(USE_ARRAY) 770 ierr = PetscFree(spdot); 771 #endif 772 PetscFunctionReturn(0); 773 } 774 775 #undef __FUNCT__ 776 #define __FUNCT__ "MatTransposeMatMult_SeqAIJ_SeqAIJ" 777 PetscErrorCode MatTransposeMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) { 778 PetscErrorCode ierr; 779 780 PetscFunctionBegin; 781 if (scall == MAT_INITIAL_MATRIX){ 782 ierr = MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 783 } 784 ierr = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 785 PetscFunctionReturn(0); 786 } 787 788 #undef __FUNCT__ 789 #define __FUNCT__ "MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ" 790 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 791 { 792 PetscErrorCode ierr; 793 Mat At; 794 PetscInt *ati,*atj; 795 796 PetscFunctionBegin; 797 /* create symbolic At */ 798 ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 799 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->cmap->n,A->rmap->n,ati,atj,PETSC_NULL,&At);CHKERRQ(ierr); 800 801 /* get symbolic C=At*B */ 802 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr); 803 804 /* clean up */ 805 ierr = MatDestroy(&At);CHKERRQ(ierr); 806 ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 807 PetscFunctionReturn(0); 808 } 809 810 #undef __FUNCT__ 811 #define __FUNCT__ "MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ" 812 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 813 { 814 PetscErrorCode ierr; 815 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 816 PetscInt am=A->rmap->n,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb; 817 PetscInt cm=C->rmap->n,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k; 818 PetscLogDouble flops=0.0; 819 MatScalar *aa=a->a,*ba,*ca=c->a,*caj; 820 821 PetscFunctionBegin; 822 /* clear old values in C */ 823 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 824 825 /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */ 826 for (i=0;i<am;i++) { 827 bj = b->j + bi[i]; 828 ba = b->a + bi[i]; 829 bnzi = bi[i+1] - bi[i]; 830 anzi = ai[i+1] - ai[i]; 831 for (j=0; j<anzi; j++) { 832 nextb = 0; 833 crow = *aj++; 834 cjj = cj + ci[crow]; 835 caj = ca + ci[crow]; 836 /* perform sparse axpy operation. Note cjj includes bj. */ 837 for (k=0; nextb<bnzi; k++) { 838 if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */ 839 caj[k] += (*aa)*(*(ba+nextb)); 840 nextb++; 841 } 842 } 843 flops += 2*bnzi; 844 aa++; 845 } 846 } 847 848 /* Assemble the final matrix and clean up */ 849 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 850 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 851 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 852 PetscFunctionReturn(0); 853 } 854 855 EXTERN_C_BEGIN 856 #undef __FUNCT__ 857 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqDense" 858 PetscErrorCode MatMatMult_SeqAIJ_SeqDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 859 { 860 PetscErrorCode ierr; 861 862 PetscFunctionBegin; 863 if (scall == MAT_INITIAL_MATRIX){ 864 ierr = MatMatMultSymbolic_SeqAIJ_SeqDense(A,B,fill,C);CHKERRQ(ierr); 865 } 866 ierr = MatMatMultNumeric_SeqAIJ_SeqDense(A,B,*C);CHKERRQ(ierr); 867 PetscFunctionReturn(0); 868 } 869 EXTERN_C_END 870 871 #undef __FUNCT__ 872 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqDense" 873 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqDense(Mat A,Mat B,PetscReal fill,Mat *C) 874 { 875 PetscErrorCode ierr; 876 877 PetscFunctionBegin; 878 ierr = MatMatMultSymbolic_SeqDense_SeqDense(A,B,0.0,C);CHKERRQ(ierr); 879 (*C)->ops->matmult = MatMatMult_SeqAIJ_SeqDense; 880 PetscFunctionReturn(0); 881 } 882 883 #undef __FUNCT__ 884 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqDense" 885 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqDense(Mat A,Mat B,Mat C) 886 { 887 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 888 PetscErrorCode ierr; 889 PetscScalar *b,*c,r1,r2,r3,r4,*b1,*b2,*b3,*b4; 890 MatScalar *aa; 891 PetscInt cm=C->rmap->n, cn=B->cmap->n, bm=B->rmap->n, col, i,j,n,*aj, am = A->rmap->n; 892 PetscInt am2 = 2*am, am3 = 3*am, bm4 = 4*bm,colam; 893 894 PetscFunctionBegin; 895 if (!cm || !cn) PetscFunctionReturn(0); 896 if (bm != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %D not equal rows in B %D\n",A->cmap->n,bm); 897 if (A->rmap->n != C->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in C %D not equal rows in A %D\n",C->rmap->n,A->rmap->n); 898 if (B->cmap->n != C->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in B %D not equal columns in C %D\n",B->cmap->n,C->cmap->n); 899 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 900 ierr = MatGetArray(C,&c);CHKERRQ(ierr); 901 b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 902 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 903 colam = col*am; 904 for (i=0; i<am; i++) { /* over rows of C in those columns */ 905 r1 = r2 = r3 = r4 = 0.0; 906 n = a->i[i+1] - a->i[i]; 907 aj = a->j + a->i[i]; 908 aa = a->a + a->i[i]; 909 for (j=0; j<n; j++) { 910 r1 += (*aa)*b1[*aj]; 911 r2 += (*aa)*b2[*aj]; 912 r3 += (*aa)*b3[*aj]; 913 r4 += (*aa++)*b4[*aj++]; 914 } 915 c[colam + i] = r1; 916 c[colam + am + i] = r2; 917 c[colam + am2 + i] = r3; 918 c[colam + am3 + i] = r4; 919 } 920 b1 += bm4; 921 b2 += bm4; 922 b3 += bm4; 923 b4 += bm4; 924 } 925 for (;col<cn; col++){ /* over extra columns of C */ 926 for (i=0; i<am; i++) { /* over rows of C in those columns */ 927 r1 = 0.0; 928 n = a->i[i+1] - a->i[i]; 929 aj = a->j + a->i[i]; 930 aa = a->a + a->i[i]; 931 932 for (j=0; j<n; j++) { 933 r1 += (*aa++)*b1[*aj++]; 934 } 935 c[col*am + i] = r1; 936 } 937 b1 += bm; 938 } 939 ierr = PetscLogFlops(cn*(2.0*a->nz));CHKERRQ(ierr); 940 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 941 ierr = MatRestoreArray(C,&c);CHKERRQ(ierr); 942 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 943 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 944 PetscFunctionReturn(0); 945 } 946 947 /* 948 Note very similar to MatMult_SeqAIJ(), should generate both codes from same base 949 */ 950 #undef __FUNCT__ 951 #define __FUNCT__ "MatMatMultNumericAdd_SeqAIJ_SeqDense" 952 PetscErrorCode MatMatMultNumericAdd_SeqAIJ_SeqDense(Mat A,Mat B,Mat C) 953 { 954 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 955 PetscErrorCode ierr; 956 PetscScalar *b,*c,r1,r2,r3,r4,*b1,*b2,*b3,*b4; 957 MatScalar *aa; 958 PetscInt cm=C->rmap->n, cn=B->cmap->n, bm=B->rmap->n, col, i,j,n,*aj, am = A->rmap->n,*ii,arm; 959 PetscInt am2 = 2*am, am3 = 3*am, bm4 = 4*bm,colam,*ridx; 960 961 PetscFunctionBegin; 962 if (!cm || !cn) PetscFunctionReturn(0); 963 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 964 ierr = MatGetArray(C,&c);CHKERRQ(ierr); 965 b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 966 967 if (a->compressedrow.use){ /* use compressed row format */ 968 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 969 colam = col*am; 970 arm = a->compressedrow.nrows; 971 ii = a->compressedrow.i; 972 ridx = a->compressedrow.rindex; 973 for (i=0; i<arm; i++) { /* over rows of C in those columns */ 974 r1 = r2 = r3 = r4 = 0.0; 975 n = ii[i+1] - ii[i]; 976 aj = a->j + ii[i]; 977 aa = a->a + ii[i]; 978 for (j=0; j<n; j++) { 979 r1 += (*aa)*b1[*aj]; 980 r2 += (*aa)*b2[*aj]; 981 r3 += (*aa)*b3[*aj]; 982 r4 += (*aa++)*b4[*aj++]; 983 } 984 c[colam + ridx[i]] += r1; 985 c[colam + am + ridx[i]] += r2; 986 c[colam + am2 + ridx[i]] += r3; 987 c[colam + am3 + ridx[i]] += r4; 988 } 989 b1 += bm4; 990 b2 += bm4; 991 b3 += bm4; 992 b4 += bm4; 993 } 994 for (;col<cn; col++){ /* over extra columns of C */ 995 colam = col*am; 996 arm = a->compressedrow.nrows; 997 ii = a->compressedrow.i; 998 ridx = a->compressedrow.rindex; 999 for (i=0; i<arm; i++) { /* over rows of C in those columns */ 1000 r1 = 0.0; 1001 n = ii[i+1] - ii[i]; 1002 aj = a->j + ii[i]; 1003 aa = a->a + ii[i]; 1004 1005 for (j=0; j<n; j++) { 1006 r1 += (*aa++)*b1[*aj++]; 1007 } 1008 c[col*am + ridx[i]] += r1; 1009 } 1010 b1 += bm; 1011 } 1012 } else { 1013 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 1014 colam = col*am; 1015 for (i=0; i<am; i++) { /* over rows of C in those columns */ 1016 r1 = r2 = r3 = r4 = 0.0; 1017 n = a->i[i+1] - a->i[i]; 1018 aj = a->j + a->i[i]; 1019 aa = a->a + a->i[i]; 1020 for (j=0; j<n; j++) { 1021 r1 += (*aa)*b1[*aj]; 1022 r2 += (*aa)*b2[*aj]; 1023 r3 += (*aa)*b3[*aj]; 1024 r4 += (*aa++)*b4[*aj++]; 1025 } 1026 c[colam + i] += r1; 1027 c[colam + am + i] += r2; 1028 c[colam + am2 + i] += r3; 1029 c[colam + am3 + i] += r4; 1030 } 1031 b1 += bm4; 1032 b2 += bm4; 1033 b3 += bm4; 1034 b4 += bm4; 1035 } 1036 for (;col<cn; col++){ /* over extra columns of C */ 1037 for (i=0; i<am; i++) { /* over rows of C in those columns */ 1038 r1 = 0.0; 1039 n = a->i[i+1] - a->i[i]; 1040 aj = a->j + a->i[i]; 1041 aa = a->a + a->i[i]; 1042 1043 for (j=0; j<n; j++) { 1044 r1 += (*aa++)*b1[*aj++]; 1045 } 1046 c[col*am + i] += r1; 1047 } 1048 b1 += bm; 1049 } 1050 } 1051 ierr = PetscLogFlops(cn*2.0*a->nz);CHKERRQ(ierr); 1052 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 1053 ierr = MatRestoreArray(C,&c);CHKERRQ(ierr); 1054 PetscFunctionReturn(0); 1055 } 1056 1057 #undef __FUNCT__ 1058 #define __FUNCT__ "MatTransColoringApplySpToDen_SeqAIJ" 1059 PetscErrorCode MatTransColoringApplySpToDen_SeqAIJ(MatTransposeColoring coloring,Mat B,Mat Btdense) 1060 { 1061 PetscErrorCode ierr; 1062 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 1063 Mat_SeqDense *btdense = (Mat_SeqDense*)Btdense->data; 1064 PetscInt *bi=b->i,*bj=b->j; 1065 PetscInt m=Btdense->rmap->n,n=Btdense->cmap->n,j,k,l,col,anz,*btcol,brow,ncolumns; 1066 MatScalar *btval,*btval_den,*ba=b->a; 1067 PetscInt *columns=coloring->columns,*colorforcol=coloring->colorforcol,ncolors=coloring->ncolors; 1068 1069 PetscFunctionBegin; 1070 btval_den=btdense->v; 1071 ierr = PetscMemzero(btval_den,(m*n)*sizeof(MatScalar));CHKERRQ(ierr); 1072 for (k=0; k<ncolors; k++) { 1073 ncolumns = coloring->ncolumns[k]; 1074 for (l=0; l<ncolumns; l++) { /* insert a row of B to a column of Btdense */ 1075 col = *(columns + colorforcol[k] + l); 1076 btcol = bj + bi[col]; 1077 btval = ba + bi[col]; 1078 anz = bi[col+1] - bi[col]; 1079 for (j=0; j<anz; j++){ 1080 brow = btcol[j]; 1081 btval_den[brow] = btval[j]; 1082 } 1083 } 1084 btval_den += m; 1085 } 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatTransColoringApplyDenToSp_SeqAIJ" 1091 PetscErrorCode MatTransColoringApplyDenToSp_SeqAIJ(MatTransposeColoring matcoloring,Mat Cden,Mat Csp) 1092 { 1093 PetscErrorCode ierr; 1094 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)Csp->data; 1095 PetscInt k,l,*row,*idx,m,ncolors=matcoloring->ncolors,nrows; 1096 PetscScalar *ca_den,*cp_den,*ca=csp->a; 1097 PetscInt *rows=matcoloring->rows,*spidx=matcoloring->columnsforspidx,*colorforrow=matcoloring->colorforrow; 1098 1099 PetscFunctionBegin; 1100 ierr = MatGetLocalSize(Csp,&m,PETSC_NULL);CHKERRQ(ierr); 1101 ierr = MatGetArray(Cden,&ca_den);CHKERRQ(ierr); 1102 cp_den = ca_den; 1103 for (k=0; k<ncolors; k++) { 1104 nrows = matcoloring->nrows[k]; 1105 row = rows + colorforrow[k]; 1106 idx = spidx + colorforrow[k]; 1107 for (l=0; l<nrows; l++){ 1108 ca[idx[l]] = cp_den[row[l]]; 1109 } 1110 cp_den += m; 1111 } 1112 ierr = MatRestoreArray(Cden,&ca_den);CHKERRQ(ierr); 1113 PetscFunctionReturn(0); 1114 } 1115 1116 /* 1117 MatGetColumnIJ_SeqAIJ_Color() and MatRestoreColumnIJ_SeqAIJ_Color() are customized from 1118 MatGetColumnIJ_SeqAIJ() and MatRestoreColumnIJ_SeqAIJ() by adding an output 1119 spidx[], index of a->j, to be used for setting 'columnsforspidx' in MatTransposeColoringCreate_SeqAIJ(). 1120 */ 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "MatGetColumnIJ_SeqAIJ_Color" 1123 PetscErrorCode MatGetColumnIJ_SeqAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 1124 { 1125 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1126 PetscErrorCode ierr; 1127 PetscInt i,*collengths,*cia,*cja,n = A->cmap->n,m = A->rmap->n; 1128 PetscInt nz = a->i[m],row,*jj,mr,col; 1129 PetscInt *cspidx; 1130 1131 PetscFunctionBegin; 1132 *nn = n; 1133 if (!ia) PetscFunctionReturn(0); 1134 if (symmetric) { 1135 SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"MatGetColumnIJ_SeqAIJ_Color() not supported for the case symmetric"); 1136 ierr = MatToSymmetricIJ_SeqAIJ(A->rmap->n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 1137 } else { 1138 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 1139 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 1140 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 1141 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 1142 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cspidx);CHKERRQ(ierr); 1143 jj = a->j; 1144 for (i=0; i<nz; i++) { 1145 collengths[jj[i]]++; 1146 } 1147 cia[0] = oshift; 1148 for (i=0; i<n; i++) { 1149 cia[i+1] = cia[i] + collengths[i]; 1150 } 1151 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 1152 jj = a->j; 1153 for (row=0; row<m; row++) { 1154 mr = a->i[row+1] - a->i[row]; 1155 for (i=0; i<mr; i++) { 1156 col = *jj++; 1157 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 1158 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 1159 } 1160 } 1161 ierr = PetscFree(collengths);CHKERRQ(ierr); 1162 *ia = cia; *ja = cja; 1163 *spidx = cspidx; 1164 } 1165 PetscFunctionReturn(0); 1166 } 1167 1168 #undef __FUNCT__ 1169 #define __FUNCT__ "MatRestoreColumnIJ_SeqAIJ_Color" 1170 PetscErrorCode MatRestoreColumnIJ_SeqAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 1171 { 1172 PetscErrorCode ierr; 1173 1174 PetscFunctionBegin; 1175 if (!ia) PetscFunctionReturn(0); 1176 1177 ierr = PetscFree(*ia);CHKERRQ(ierr); 1178 ierr = PetscFree(*ja);CHKERRQ(ierr); 1179 ierr = PetscFree(*spidx);CHKERRQ(ierr); 1180 PetscFunctionReturn(0); 1181 } 1182 1183 #undef __FUNCT__ 1184 #define __FUNCT__ "MatTransposeColoringCreate_SeqAIJ" 1185 PetscErrorCode MatTransposeColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatTransposeColoring c) 1186 { 1187 PetscErrorCode ierr; 1188 PetscInt i,n,nrows,N,j,k,m,*row_idx,*ci,*cj,ncols,col,cm; 1189 const PetscInt *is; 1190 PetscInt nis = iscoloring->n,*rowhit,bs = 1; 1191 IS *isa; 1192 PetscBool done; 1193 PetscBool flg1,flg2; 1194 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 1195 PetscInt *colorforrow,*rows,*rows_i,*columnsforspidx,*columnsforspidx_i,*idxhit,*spidx; 1196 PetscInt *colorforcol,*columns,*columns_i; 1197 1198 PetscFunctionBegin; 1199 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 1200 1201 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1 */ 1202 ierr = PetscTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 1203 ierr = PetscTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg2);CHKERRQ(ierr); 1204 if (flg1 || flg2) { 1205 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 1206 } 1207 1208 N = mat->cmap->N/bs; 1209 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 1210 c->N = mat->cmap->N/bs; 1211 c->m = mat->rmap->N/bs; 1212 c->rstart = 0; 1213 1214 c->ncolors = nis; 1215 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 1216 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 1217 ierr = PetscMalloc2(csp->nz+1,PetscInt,&rows,csp->nz+1,PetscInt,&columnsforspidx);CHKERRQ(ierr); 1218 ierr = PetscMalloc((nis+1)*sizeof(PetscInt),&colorforrow);CHKERRQ(ierr); 1219 colorforrow[0] = 0; 1220 rows_i = rows; 1221 columnsforspidx_i = columnsforspidx; 1222 1223 ierr = PetscMalloc((nis+1)*sizeof(PetscInt),&colorforcol);CHKERRQ(ierr); 1224 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columns);CHKERRQ(ierr); 1225 colorforcol[0] = 0; 1226 columns_i = columns; 1227 1228 ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,&done);CHKERRQ(ierr); /* column-wise storage of mat */ 1229 if (!done) SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name); 1230 1231 cm = c->m; 1232 ierr = PetscMalloc((cm+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 1233 ierr = PetscMalloc((cm+1)*sizeof(PetscInt),&idxhit);CHKERRQ(ierr); 1234 for (i=0; i<nis; i++) { 1235 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 1236 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 1237 c->ncolumns[i] = n; 1238 if (n) { 1239 ierr = PetscMemcpy(columns_i,is,n*sizeof(PetscInt));CHKERRQ(ierr); 1240 } 1241 colorforcol[i+1] = colorforcol[i] + n; 1242 columns_i += n; 1243 1244 /* fast, crude version requires O(N*N) work */ 1245 ierr = PetscMemzero(rowhit,cm*sizeof(PetscInt));CHKERRQ(ierr); 1246 1247 /* loop over columns*/ 1248 for (j=0; j<n; j++) { 1249 col = is[j]; 1250 row_idx = cj + ci[col]; 1251 m = ci[col+1] - ci[col]; 1252 /* loop over columns marking them in rowhit */ 1253 for (k=0; k<m; k++) { 1254 idxhit[*row_idx] = spidx[ci[col] + k]; 1255 rowhit[*row_idx++] = col + 1; 1256 } 1257 } 1258 /* count the number of hits */ 1259 nrows = 0; 1260 for (j=0; j<cm; j++) { 1261 if (rowhit[j]) nrows++; 1262 } 1263 c->nrows[i] = nrows; 1264 colorforrow[i+1] = colorforrow[i] + nrows; 1265 1266 nrows = 0; 1267 for (j=0; j<cm; j++) { 1268 if (rowhit[j]) { 1269 rows_i[nrows] = j; 1270 columnsforspidx_i[nrows] = idxhit[j]; 1271 nrows++; 1272 } 1273 } 1274 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 1275 rows_i += nrows; columnsforspidx_i += nrows; 1276 } 1277 ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,&done);CHKERRQ(ierr); 1278 ierr = PetscFree(rowhit);CHKERRQ(ierr); 1279 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 1280 #if defined(PETSC_USE_DEBUG) 1281 if (csp->nz != colorforrow[nis]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"csp->nz %d != colorforrow[nis] %d",csp->nz,colorforrow[nis]); 1282 #endif 1283 1284 c->colorforrow = colorforrow; 1285 c->rows = rows; 1286 c->columnsforspidx = columnsforspidx; 1287 c->colorforcol = colorforcol; 1288 c->columns = columns; 1289 ierr = PetscFree(idxhit);CHKERRQ(ierr); 1290 PetscFunctionReturn(0); 1291 } 1292