1 2 /* 3 Defines matrix-matrix product routines for pairs of SeqAIJ matrices 4 C = A * B 5 */ 6 7 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/utils/freespace.h> 9 #include <petscbt.h> 10 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/ 11 /* 12 #define DEBUG_MATMATMULT 13 */ 14 EXTERN_C_BEGIN 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ" 17 PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 18 { 19 PetscErrorCode ierr; 20 21 PetscFunctionBegin; 22 if (scall == MAT_INITIAL_MATRIX){ 23 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 24 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 25 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 26 } 27 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 28 ierr = (*(*C)->ops->matmultnumeric)(A,B,*C);CHKERRQ(ierr); 29 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 30 PetscFunctionReturn(0); 31 } 32 EXTERN_C_END 33 34 /* 35 MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ - Get symbolic structure of C=A*B 36 Input Parameter: 37 . am, Ai, Aj - number of rows and structure of A 38 . bm, bn, Bi, Bj - number of rows, columns, and structure of B 39 . fill - filll ratio See MatMatMult() 40 41 Output Parameter: 42 . Ci, Cj - structure of C = A*B 43 . nspacedouble - number of extra mallocs 44 */ 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ" 47 PetscErrorCode MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ(PetscInt am,PetscInt *Ai,PetscInt *Aj,PetscInt bm,PetscInt bn,PetscInt *Bi,PetscInt *Bj,PetscReal fill,PetscInt *Ci[],PetscInt *Cj[],PetscInt *nspacedouble) 48 { 49 PetscErrorCode ierr; 50 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 51 PetscInt *ai=Ai,*aj=Aj,*bi=Bi,*bj=Bj,*bjj,*ci,*cj; 52 PetscInt i,j,anzi,brow,bnzj,cnzi,nlnk,*lnk,ndouble=0; 53 PetscBT lnkbt; 54 55 PetscFunctionBegin; 56 /* Allocate ci array, arrays for fill computation and */ 57 /* free space for accumulating nonzero column info */ 58 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 59 ci[0] = 0; 60 61 /* create and initialize a linked list */ 62 nlnk = bn+1; 63 ierr = PetscLLCreate(bn,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 64 65 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 66 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr); 67 current_space = free_space; 68 69 /* Determine symbolic info for each row of the product: */ 70 for (i=0; i<am; i++) { 71 anzi = ai[i+1] - ai[i]; 72 cnzi = 0; 73 aj = Aj + ai[i]; 74 for (j=0; j<anzi; j++){ 75 brow = aj[j]; 76 bnzj = bi[brow+1] - bi[brow]; 77 bjj = bj + bi[brow]; 78 /* add non-zero cols of B into the sorted linked list lnk */ 79 ierr = PetscLLAddSorted(bnzj,bjj,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 80 cnzi += nlnk; 81 } 82 83 /* If free space is not available, make more free space */ 84 /* Double the amount of total space in the list */ 85 if (current_space->local_remaining<cnzi) { 86 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 87 ndouble++; 88 } 89 90 /* Copy data into free space, then initialize lnk */ 91 ierr = PetscLLClean(bn,bn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 92 current_space->array += cnzi; 93 current_space->local_used += cnzi; 94 current_space->local_remaining -= cnzi; 95 ci[i+1] = ci[i] + cnzi; 96 } 97 98 /* Column indices are in the list of free space */ 99 /* Allocate space for cj, initialize cj, and */ 100 /* destroy list of free space and other temporary array(s) */ 101 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 102 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 103 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 104 105 *Ci = ci; 106 *Cj = cj; 107 *nspacedouble = ndouble; 108 PetscFunctionReturn(0); 109 } 110 111 #undef __FUNCT__ 112 #define __FUNCT__ "MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_SparseAxpy" 113 PetscErrorCode MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_SparseAxpy(PetscInt am,PetscInt *Ai,PetscInt *Aj,PetscInt bm,PetscInt bn,PetscInt *Bi,PetscInt *Bj,PetscReal fill,PetscInt *Ci[],PetscInt *Cj[],PetscInt *nspacedouble) 114 { 115 PetscErrorCode ierr; 116 PetscInt *ai=Ai,*aj=Aj,*bi=Bi,*bj=Bj,*bjj,*ci,*cj,rmax=0,*abj,*cj_tmp,nextabj; 117 PetscInt i,j,anzi,brow,bnzj,cnzi,k; 118 PetscBT bt; 119 120 PetscFunctionBegin; 121 /* Allocate ci array, arrays for fill computation and */ 122 /* free space for accumulating nonzero column info */ 123 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 124 ci[0] = 0; 125 126 /* Get ci and rmax for C */ 127 ierr = PetscBTCreate(bn,bt);CHKERRQ(ierr); 128 ierr = PetscBTMemzero(bn,bt);CHKERRQ(ierr); 129 for (i=0; i<am; i++) { 130 anzi = ai[i+1] - ai[i]; 131 cnzi = 0; 132 aj = Aj + ai[i]; 133 for (j=0; j<anzi; j++){ 134 brow = aj[j]; 135 bnzj = bi[brow+1] - bi[brow]; 136 bjj = bj + bi[brow]; 137 for (k=0; k<bnzj; k++){ 138 if (!PetscBTLookupSet(bt,bjj[k])){ /* new entry */ 139 cnzi++; 140 } 141 } 142 } 143 ierr = PetscBTMemzero(bn,bt);CHKERRQ(ierr); /* optimize this? */ 144 ci[i+1] = ci[i] + cnzi; 145 if (rmax < cnzi) rmax = cnzi; 146 } 147 148 /* Allocate space for cj */ 149 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 150 151 /* allocate a temp array for storing column indices of A*B */ 152 ierr = PetscMalloc((rmax+1)*sizeof(PetscInt),&abj);CHKERRQ(ierr); 153 154 /* Determine cj */ 155 for (i=0; i<am; i++) { 156 anzi = ai[i+1] - ai[i]; 157 cnzi = 0; 158 nextabj=0; 159 aj = Aj + ai[i]; 160 for (j=0; j<anzi; j++){ 161 brow = aj[j]; 162 bnzj = bi[brow+1] - bi[brow]; 163 bjj = bj + bi[brow]; 164 for (k=0; k<bnzj; k++){ 165 if (!PetscBTLookupSet(bt,bjj[k])){ /* new entry */ 166 abj[nextabj] = bjj[k]; nextabj++; 167 } 168 } 169 } 170 171 /* sort abj, then copy it to cj */ 172 cnzi = ci[i+1] - ci[i]; 173 ierr = PetscSortInt(cnzi,abj);CHKERRQ(ierr); 174 ierr = PetscBTMemzero(bn,bt);CHKERRQ(ierr); 175 cj_tmp = cj + ci[i]; 176 for (k=0; k< cnzi; k++){ 177 cj_tmp[k] = abj[k]; 178 } 179 } 180 181 ierr = PetscBTDestroy(bt);CHKERRQ(ierr); 182 ierr = PetscFree(abj);CHKERRQ(ierr); 183 184 *Ci = ci; 185 *Cj = cj; 186 *nspacedouble = 0; 187 PetscFunctionReturn(0); 188 } 189 190 #undef __FUNCT__ 191 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ" 192 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 193 { 194 PetscErrorCode ierr; 195 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 196 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*ci,*cj; 197 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N,nspacedouble; 198 MatScalar *ca; 199 PetscReal afill; 200 PetscBool dense_axpy; /* false: use sparse axpy; otherwise use dense axpy in MatMatMultNumeric_SeqAIJ_SeqAIJ() */ 201 202 PetscFunctionBegin; 203 /* Get ci and cj */ 204 ierr = MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ(am,ai,aj,bm,bn,bi,bj,fill,&ci,&cj,&nspacedouble);CHKERRQ(ierr); 205 #if defined(DEBUG_MATMATMULT) 206 ierr = PetscPrintf(PETSC_COMM_SELF,"MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ() is done \n");CHKERRQ(ierr); 207 #endif 208 209 /* Allocate space for ca */ 210 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 211 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 212 213 /* put together the new symbolic matrix */ 214 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 215 216 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 217 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 218 c = (Mat_SeqAIJ *)((*C)->data); 219 c->free_a = PETSC_TRUE; 220 c->free_ij = PETSC_TRUE; 221 c->nonew = 0; 222 (*C)->ops->matmult = MatMatMult_SeqAIJ_SeqAIJ; 223 224 /* Determine which MatMatMultNumeric_SeqAIJ_SeqAIJ() to be used */ 225 dense_axpy = PETSC_TRUE; 226 ierr = PetscOptionsGetBool(PETSC_NULL,"-matmatmult_denseaxpy",&dense_axpy,PETSC_NULL);CHKERRQ(ierr); 227 if (dense_axpy){ 228 ierr = PetscMalloc(bn*sizeof(PetscScalar),&c->matmult_abdense);CHKERRQ(ierr); 229 ierr = PetscMemzero(c->matmult_abdense,dense_axpy*bn*sizeof(PetscScalar));CHKERRQ(ierr); 230 (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; /* fast, takes additional dense_axpy*bn*sizeof(PetscScalar) space */ 231 } else { /* slower, but use less memory */ 232 #if defined(DEBUG_MATMATMULT) 233 ierr = PetscPrintf(PETSC_COMM_SELF,"call MatMatMultNumeric_SeqAIJ_SeqAIJ_SparseAxpy \n"); 234 #endif 235 (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_SparseAxpy; /* slower, less memory */ 236 } 237 238 /* set MatInfo */ 239 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 240 if (afill < 1.0) afill = 1.0; 241 c->maxnz = ci[am]; 242 c->nz = ci[am]; 243 (*C)->info.mallocs = nspacedouble; 244 (*C)->info.fill_ratio_given = fill; 245 (*C)->info.fill_ratio_needed = afill; 246 247 #if defined(PETSC_USE_INFO) 248 if (ci[am]) { 249 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); 250 ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 251 } else { 252 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 253 } 254 #endif 255 PetscFunctionReturn(0); 256 } 257 258 #undef __FUNCT__ 259 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ" 260 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 261 { 262 PetscErrorCode ierr; 263 PetscLogDouble flops=0.0; 264 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 265 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 266 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 267 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 268 PetscInt am=A->rmap->n,cm=C->rmap->n; 269 PetscInt i,j,k,anzi,bnzi,cnzi,brow; 270 PetscScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,valtmp; 271 PetscScalar *ab_dense=c->matmult_abdense; 272 273 PetscFunctionBegin; 274 /* clean old values in C */ 275 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 276 /* Traverse A row-wise. */ 277 /* Build the ith row in C by summing over nonzero columns in A, */ 278 /* the rows of B corresponding to nonzeros of A. */ 279 for (i=0; i<am; i++) { 280 anzi = ai[i+1] - ai[i]; 281 for (j=0; j<anzi; j++) { 282 brow = aj[j]; 283 bnzi = bi[brow+1] - bi[brow]; 284 bjj = bj + bi[brow]; 285 baj = ba + bi[brow]; 286 /* perform dense axpy */ 287 valtmp = aa[j]; 288 for (k=0; k<bnzi; k++) { 289 ab_dense[bjj[k]] += valtmp*baj[k]; 290 } 291 flops += 2*bnzi; 292 } 293 aj += anzi; aa += anzi; 294 295 cnzi = ci[i+1] - ci[i]; 296 for (k=0; k<cnzi; k++) { 297 ca[k] += ab_dense[cj[k]]; 298 ab_dense[cj[k]] = 0.0; /* zero ab_dense */ 299 } 300 flops += cnzi; 301 cj += cnzi; ca += cnzi; 302 } 303 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 304 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 305 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 306 C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 307 PetscFunctionReturn(0); 308 } 309 310 #undef __FUNCT__ 311 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ_SparseAxpy" 312 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_SparseAxpy(Mat A,Mat B,Mat C) 313 { 314 PetscErrorCode ierr; 315 PetscLogDouble flops=0.0; 316 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 317 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 318 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 319 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 320 PetscInt am=A->rmap->N,cm=C->rmap->N; 321 PetscInt i,j,k,anzi,bnzi,cnzi,brow; 322 PetscScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,valtmp; 323 PetscInt nextb; 324 325 PetscFunctionBegin; 326 #if defined(DEBUG_MATMATMULT) 327 //ierr = PetscPrintf(PETSC_COMM_SELF,"MatMatMultNumeric_SeqAIJ_SeqAIJ_SparseAxpy...\n");CHKERRQ(ierr); 328 #endif 329 /* clean old values in C */ 330 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 331 /* Traverse A row-wise. */ 332 /* Build the ith row in C by summing over nonzero columns in A, */ 333 /* the rows of B corresponding to nonzeros of A. */ 334 for (i=0;i<am;i++) { 335 anzi = ai[i+1] - ai[i]; 336 cnzi = ci[i+1] - ci[i]; 337 for (j=0;j<anzi;j++) { 338 brow = aj[j]; 339 bnzi = bi[brow+1] - bi[brow]; 340 bjj = bj + bi[brow]; 341 baj = ba + bi[brow]; 342 /* perform sparse axpy */ 343 valtmp = aa[j]; 344 nextb = 0; 345 for (k=0; nextb<bnzi; k++) { 346 if (cj[k] == bjj[nextb]){ /* ccol == bcol */ 347 ca[k] += valtmp*baj[nextb++]; 348 } 349 } 350 flops += 2*bnzi; 351 } 352 aj += anzi; aa += anzi; 353 cj += cnzi; ca += cnzi; 354 } 355 356 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 357 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 358 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 359 PetscFunctionReturn(0); 360 } 361 362 #undef __FUNCT__ 363 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ_SparseAxpy" 364 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_SparseAxpy(Mat A,Mat B,PetscReal fill,Mat *C) 365 { 366 PetscErrorCode ierr; 367 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 368 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*ci,*cj; 369 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N,nspacedouble; 370 MatScalar *ca; 371 PetscReal afill; 372 373 PetscFunctionBegin; 374 #if defined(DEBUG_MATMATMULT) 375 ierr = PetscPrintf(PETSC_COMM_SELF,"MatMatMultSymbolic_SeqAIJ_SeqAIJ_SparseAxpy \n");CHKERRQ(ierr); 376 #endif 377 /* Get ci and cj */ 378 ierr = MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_SparseAxpy(am,ai,aj,bm,bn,bi,bj,fill,&ci,&cj,&nspacedouble);CHKERRQ(ierr); 379 #if defined(DEBUG_MATMATMULT) 380 ierr = PetscPrintf(PETSC_COMM_SELF,"MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_SparseAxpy() is done \n");CHKERRQ(ierr); 381 #endif 382 383 /* Allocate space for ca */ 384 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 385 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 386 387 /* put together the new symbolic matrix */ 388 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 389 390 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 391 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 392 c = (Mat_SeqAIJ *)((*C)->data); 393 c->free_a = PETSC_TRUE; 394 c->free_ij = PETSC_TRUE; 395 c->nonew = 0; 396 (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_SparseAxpy; /* slower, less memory */ 397 398 /* set MatInfo */ 399 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 400 if (afill < 1.0) afill = 1.0; 401 c->maxnz = ci[am]; 402 c->nz = ci[am]; 403 (*C)->info.mallocs = nspacedouble; 404 (*C)->info.fill_ratio_given = fill; 405 (*C)->info.fill_ratio_needed = afill; 406 407 #if defined(PETSC_USE_INFO) 408 if (ci[am]) { 409 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); 410 ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 411 } else { 412 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 413 } 414 #endif 415 PetscFunctionReturn(0); 416 } 417 418 419 /* This routine is not used. Should be removed! */ 420 #undef __FUNCT__ 421 #define __FUNCT__ "MatMatTransposeMult_SeqAIJ_SeqAIJ" 422 PetscErrorCode MatMatTransposeMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 423 { 424 PetscErrorCode ierr; 425 426 PetscFunctionBegin; 427 if (scall == MAT_INITIAL_MATRIX){ 428 ierr = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 429 } 430 ierr = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 431 PetscFunctionReturn(0); 432 } 433 434 #undef __FUNCT__ 435 #define __FUNCT__ "PetscContainerDestroy_Mat_MatMatTransMult" 436 PetscErrorCode PetscContainerDestroy_Mat_MatMatTransMult(void *ptr) 437 { 438 PetscErrorCode ierr; 439 Mat_MatMatTransMult *multtrans=(Mat_MatMatTransMult*)ptr; 440 441 PetscFunctionBegin; 442 ierr = MatTransposeColoringDestroy(&multtrans->matcoloring);CHKERRQ(ierr); 443 ierr = MatDestroy(&multtrans->Bt_den);CHKERRQ(ierr); 444 ierr = MatDestroy(&multtrans->ABt_den);CHKERRQ(ierr); 445 ierr = PetscFree(multtrans);CHKERRQ(ierr); 446 PetscFunctionReturn(0); 447 } 448 449 #undef __FUNCT__ 450 #define __FUNCT__ "MatDestroy_SeqAIJ_MatMatMultTrans" 451 PetscErrorCode MatDestroy_SeqAIJ_MatMatMultTrans(Mat A) 452 { 453 PetscErrorCode ierr; 454 PetscContainer container; 455 Mat_MatMatTransMult *multtrans=PETSC_NULL; 456 457 PetscFunctionBegin; 458 ierr = PetscObjectQuery((PetscObject)A,"Mat_MatMatTransMult",(PetscObject *)&container);CHKERRQ(ierr); 459 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 460 ierr = PetscContainerGetPointer(container,(void **)&multtrans);CHKERRQ(ierr); 461 A->ops->destroy = multtrans->destroy; 462 if (A->ops->destroy) { 463 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 464 } 465 ierr = PetscObjectCompose((PetscObject)A,"Mat_MatMatTransMult",0);CHKERRQ(ierr); 466 PetscFunctionReturn(0); 467 } 468 469 #undef __FUNCT__ 470 #define __FUNCT__ "MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ" 471 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 472 { 473 PetscErrorCode ierr; 474 Mat Bt; 475 PetscInt *bti,*btj; 476 Mat_MatMatTransMult *multtrans; 477 PetscContainer container; 478 PetscLogDouble t0,tf,etime2=0.0; 479 480 PetscFunctionBegin; 481 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 482 /* create symbolic Bt */ 483 ierr = MatGetSymbolicTranspose_SeqAIJ(B,&bti,&btj);CHKERRQ(ierr); 484 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,B->cmap->n,B->rmap->n,bti,btj,PETSC_NULL,&Bt);CHKERRQ(ierr); 485 486 /* get symbolic C=A*Bt */ 487 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,Bt,fill,C);CHKERRQ(ierr); 488 489 /* create a supporting struct for reuse intermidiate dense matrices with matcoloring */ 490 ierr = PetscNew(Mat_MatMatTransMult,&multtrans);CHKERRQ(ierr); 491 492 /* attach the supporting struct to C */ 493 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 494 ierr = PetscContainerSetPointer(container,multtrans);CHKERRQ(ierr); 495 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_Mat_MatMatTransMult);CHKERRQ(ierr); 496 ierr = PetscObjectCompose((PetscObject)(*C),"Mat_MatMatTransMult",(PetscObject)container);CHKERRQ(ierr); 497 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 498 499 multtrans->usecoloring = PETSC_FALSE; 500 multtrans->destroy = (*C)->ops->destroy; 501 (*C)->ops->destroy = MatDestroy_SeqAIJ_MatMatMultTrans; 502 503 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 504 etime2 += tf - t0; 505 506 ierr = PetscOptionsGetBool(PETSC_NULL,"-matmattransmult_color",&multtrans->usecoloring,PETSC_NULL);CHKERRQ(ierr); 507 if (multtrans->usecoloring){ 508 /* Create MatTransposeColoring from symbolic C=A*B^T */ 509 MatTransposeColoring matcoloring; 510 ISColoring iscoloring; 511 Mat Bt_dense,C_dense; 512 PetscLogDouble etime0=0.0,etime01=0.0,etime1=0.0; 513 514 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 515 ierr = MatGetColoring(*C,MATCOLORINGLF,&iscoloring);CHKERRQ(ierr); 516 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 517 etime0 += tf - t0; 518 519 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 520 ierr = MatTransposeColoringCreate(*C,iscoloring,&matcoloring);CHKERRQ(ierr); 521 multtrans->matcoloring = matcoloring; 522 ierr = ISColoringDestroy(&iscoloring);CHKERRQ(ierr); 523 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 524 etime01 += tf - t0; 525 526 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 527 /* Create Bt_dense and C_dense = A*Bt_dense */ 528 ierr = MatCreate(PETSC_COMM_SELF,&Bt_dense);CHKERRQ(ierr); 529 ierr = MatSetSizes(Bt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors);CHKERRQ(ierr); 530 ierr = MatSetType(Bt_dense,MATSEQDENSE);CHKERRQ(ierr); 531 ierr = MatSeqDenseSetPreallocation(Bt_dense,PETSC_NULL);CHKERRQ(ierr); 532 Bt_dense->assembled = PETSC_TRUE; 533 multtrans->Bt_den = Bt_dense; 534 535 ierr = MatCreate(PETSC_COMM_SELF,&C_dense);CHKERRQ(ierr); 536 ierr = MatSetSizes(C_dense,A->rmap->n,matcoloring->ncolors,A->rmap->n,matcoloring->ncolors);CHKERRQ(ierr); 537 ierr = MatSetType(C_dense,MATSEQDENSE);CHKERRQ(ierr); 538 ierr = MatSeqDenseSetPreallocation(C_dense,PETSC_NULL);CHKERRQ(ierr); 539 Bt_dense->assembled = PETSC_TRUE; 540 multtrans->ABt_den = C_dense; 541 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 542 etime1 += tf - t0; 543 544 #if defined(PETSC_USE_INFO) 545 { 546 Mat_SeqAIJ *c=(Mat_SeqAIJ*)(*C)->data; 547 ierr = PetscInfo5(*C,"Bt_dense: %D,%D; Cnz %D / (cm*ncolors %D) = %g\n",A->cmap->n,matcoloring->ncolors,c->nz,A->rmap->n*matcoloring->ncolors,(PetscReal)(c->nz)/(A->rmap->n*matcoloring->ncolors)); 548 ierr = PetscInfo5(*C,"Sym = GetColor %g + ColorCreate %g + MatDenseCreate %g + non-colorSym %g = %g\n",etime0,etime01,etime1,etime2,etime0+etime01+etime1+etime2); 549 } 550 #endif 551 } 552 /* clean up */ 553 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 554 ierr = MatRestoreSymbolicTranspose_SeqAIJ(B,&bti,&btj);CHKERRQ(ierr); 555 556 557 558 #if defined(INEFFICIENT_ALGORITHM) 559 /* The algorithm below computes am*bm sparse inner-product - inefficient! It will be deleted later. */ 560 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 561 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 562 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*ci,*cj,*acol,*bcol; 563 PetscInt am=A->rmap->N,bm=B->rmap->N; 564 PetscInt i,j,anzi,bnzj,cnzi,nlnk,*lnk,nspacedouble=0,ka,kb,index[1]; 565 MatScalar *ca; 566 PetscBT lnkbt; 567 PetscReal afill; 568 569 /* Allocate row pointer array ci */ 570 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 571 ci[0] = 0; 572 573 /* Create and initialize a linked list for C columns */ 574 nlnk = bm+1; 575 ierr = PetscLLCreate(bm,bm,nlnk,lnk,lnkbt);CHKERRQ(ierr); 576 577 /* Initial FreeSpace with size fill*(nnz(A)+nnz(B)) */ 578 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr); 579 current_space = free_space; 580 581 /* Determine symbolic info for each row of the product A*B^T: */ 582 for (i=0; i<am; i++) { 583 anzi = ai[i+1] - ai[i]; 584 cnzi = 0; 585 acol = aj + ai[i]; 586 for (j=0; j<bm; j++){ 587 bnzj = bi[j+1] - bi[j]; 588 bcol= bj + bi[j]; 589 /* sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */ 590 ka = 0; kb = 0; 591 while (ka < anzi && kb < bnzj){ 592 while (acol[ka] < bcol[kb] && ka < anzi) ka++; 593 if (ka == anzi) break; 594 while (acol[ka] > bcol[kb] && kb < bnzj) kb++; 595 if (kb == bnzj) break; 596 if (acol[ka] == bcol[kb]){ /* add nonzero c(i,j) to lnk */ 597 index[0] = j; 598 ierr = PetscLLAdd(1,index,bm,nlnk,lnk,lnkbt);CHKERRQ(ierr); 599 cnzi++; 600 break; 601 } 602 } 603 } 604 605 /* If free space is not available, make more free space */ 606 /* Double the amount of total space in the list */ 607 if (current_space->local_remaining<cnzi) { 608 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 609 nspacedouble++; 610 } 611 612 /* Copy data into free space, then initialize lnk */ 613 ierr = PetscLLClean(bm,bm,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 614 current_space->array += cnzi; 615 current_space->local_used += cnzi; 616 current_space->local_remaining -= cnzi; 617 618 ci[i+1] = ci[i] + cnzi; 619 } 620 621 622 /* Column indices are in the list of free space. 623 Allocate array cj, copy column indices to cj, and destroy list of free space */ 624 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 625 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 626 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 627 628 /* Allocate space for ca */ 629 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 630 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 631 632 /* put together the new symbolic matrix */ 633 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bm,ci,cj,ca,C);CHKERRQ(ierr); 634 635 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 636 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 637 c = (Mat_SeqAIJ *)((*C)->data); 638 c->free_a = PETSC_TRUE; 639 c->free_ij = PETSC_TRUE; 640 c->nonew = 0; 641 642 /* set MatInfo */ 643 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 644 if (afill < 1.0) afill = 1.0; 645 c->maxnz = ci[am]; 646 c->nz = ci[am]; 647 (*C)->info.mallocs = nspacedouble; 648 (*C)->info.fill_ratio_given = fill; 649 (*C)->info.fill_ratio_needed = afill; 650 651 #if defined(PETSC_USE_INFO) 652 if (ci[am]) { 653 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); 654 ierr = PetscInfo1((*C),"Use MatMatTransposeMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 655 } else { 656 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 657 } 658 #endif 659 #endif 660 PetscFunctionReturn(0); 661 } 662 663 /* #define USE_ARRAY - for sparse dot product. Slower than !USE_ARRAY */ 664 #undef __FUNCT__ 665 #define __FUNCT__ "MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ" 666 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 667 { 668 PetscErrorCode ierr; 669 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 670 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,anzi,bnzj,nexta,nextb,*acol,*bcol,brow; 671 PetscInt cm=C->rmap->n,*ci=c->i,*cj=c->j,i,j,cnzi,*ccol; 672 PetscLogDouble flops=0.0; 673 MatScalar *aa=a->a,*aval,*ba=b->a,*bval,*ca=c->a,*cval; 674 Mat_MatMatTransMult *multtrans; 675 PetscContainer container; 676 #if defined(USE_ARRAY) 677 MatScalar *spdot; 678 #endif 679 680 PetscFunctionBegin; 681 ierr = PetscObjectQuery((PetscObject)C,"Mat_MatMatTransMult",(PetscObject *)&container);CHKERRQ(ierr); 682 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 683 ierr = PetscContainerGetPointer(container,(void **)&multtrans);CHKERRQ(ierr); 684 if (multtrans->usecoloring){ 685 MatTransposeColoring matcoloring = multtrans->matcoloring; 686 Mat Bt_dense; 687 PetscInt m,n; 688 PetscLogDouble t0,tf,etime0=0.0,etime1=0.0,etime2=0.0; 689 Mat C_dense = multtrans->ABt_den; 690 691 Bt_dense = multtrans->Bt_den; 692 ierr = MatGetLocalSize(Bt_dense,&m,&n);CHKERRQ(ierr); 693 694 /* Get Bt_dense by Apply MatTransposeColoring to B */ 695 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 696 ierr = MatTransColoringApplySpToDen(matcoloring,B,Bt_dense);CHKERRQ(ierr); 697 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 698 etime0 += tf - t0; 699 700 /* C_dense = A*Bt_dense */ 701 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 702 ierr = MatMatMultNumeric_SeqAIJ_SeqDense(A,Bt_dense,C_dense);CHKERRQ(ierr); 703 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 704 etime2 += tf - t0; 705 706 /* Recover C from C_dense */ 707 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 708 ierr = MatTransColoringApplyDenToSp(matcoloring,C_dense,C);CHKERRQ(ierr); 709 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 710 etime1 += tf - t0; 711 #if defined(PETSC_USE_INFO) 712 ierr = PetscInfo4(C,"Num = ColoringApply: %g %g + Mult_sp_dense %g = %g\n",etime0,etime1,etime2,etime0+etime1+etime2); 713 #endif 714 PetscFunctionReturn(0); 715 } 716 717 #if defined(USE_ARRAY) 718 /* allocate an array for implementing sparse inner-product */ 719 ierr = PetscMalloc((A->cmap->n+1)*sizeof(MatScalar),&spdot);CHKERRQ(ierr); 720 ierr = PetscMemzero(spdot,(A->cmap->n+1)*sizeof(MatScalar));CHKERRQ(ierr); 721 #endif 722 723 /* clear old values in C */ 724 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 725 726 for (i=0; i<cm; i++) { 727 anzi = ai[i+1] - ai[i]; 728 acol = aj + ai[i]; 729 aval = aa + ai[i]; 730 cnzi = ci[i+1] - ci[i]; 731 ccol = cj + ci[i]; 732 cval = ca + ci[i]; 733 for (j=0; j<cnzi; j++){ 734 brow = ccol[j]; 735 bnzj = bi[brow+1] - bi[brow]; 736 bcol = bj + bi[brow]; 737 bval = ba + bi[brow]; 738 739 /* perform sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */ 740 #if defined(USE_ARRAY) 741 /* put ba to spdot array */ 742 for (nextb=0; nextb<bnzj; nextb++) spdot[bcol[nextb]] = bval[nextb]; 743 /* c(i,j)=A[i,:]*B[j,:]^T */ 744 for (nexta=0; nexta<anzi; nexta++){ 745 cval[j] += spdot[acol[nexta]]*aval[nexta]; 746 } 747 /* zero spdot array */ 748 for (nextb=0; nextb<bnzj; nextb++) spdot[bcol[nextb]] = 0.0; 749 #else 750 nexta = 0; nextb = 0; 751 while (nexta<anzi && nextb<bnzj){ 752 while (acol[nexta] < bcol[nextb] && nexta < anzi) nexta++; 753 if (nexta == anzi) break; 754 while (acol[nexta] > bcol[nextb] && nextb < bnzj) nextb++; 755 if (nextb == bnzj) break; 756 if (acol[nexta] == bcol[nextb]){ 757 cval[j] += aval[nexta]*bval[nextb]; 758 nexta++; nextb++; 759 flops += 2; 760 } 761 } 762 #endif 763 } 764 } 765 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 766 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 767 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 768 #if defined(USE_ARRAY) 769 ierr = PetscFree(spdot); 770 #endif 771 PetscFunctionReturn(0); 772 } 773 774 #undef __FUNCT__ 775 #define __FUNCT__ "MatTransposeMatMult_SeqAIJ_SeqAIJ" 776 PetscErrorCode MatTransposeMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) { 777 PetscErrorCode ierr; 778 779 PetscFunctionBegin; 780 if (scall == MAT_INITIAL_MATRIX){ 781 ierr = MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 782 } 783 ierr = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 784 PetscFunctionReturn(0); 785 } 786 787 #undef __FUNCT__ 788 #define __FUNCT__ "MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ" 789 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 790 { 791 PetscErrorCode ierr; 792 Mat At; 793 PetscInt *ati,*atj; 794 795 PetscFunctionBegin; 796 /* create symbolic At */ 797 ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 798 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->cmap->n,A->rmap->n,ati,atj,PETSC_NULL,&At);CHKERRQ(ierr); 799 800 /* get symbolic C=At*B */ 801 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr); 802 803 /* clean up */ 804 ierr = MatDestroy(&At);CHKERRQ(ierr); 805 ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 806 PetscFunctionReturn(0); 807 } 808 809 #undef __FUNCT__ 810 #define __FUNCT__ "MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ" 811 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 812 { 813 PetscErrorCode ierr; 814 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 815 PetscInt am=A->rmap->n,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb; 816 PetscInt cm=C->rmap->n,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k; 817 PetscLogDouble flops=0.0; 818 MatScalar *aa=a->a,*ba,*ca=c->a,*caj; 819 820 PetscFunctionBegin; 821 /* clear old values in C */ 822 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 823 824 /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */ 825 for (i=0;i<am;i++) { 826 bj = b->j + bi[i]; 827 ba = b->a + bi[i]; 828 bnzi = bi[i+1] - bi[i]; 829 anzi = ai[i+1] - ai[i]; 830 for (j=0; j<anzi; j++) { 831 nextb = 0; 832 crow = *aj++; 833 cjj = cj + ci[crow]; 834 caj = ca + ci[crow]; 835 /* perform sparse axpy operation. Note cjj includes bj. */ 836 for (k=0; nextb<bnzi; k++) { 837 if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */ 838 caj[k] += (*aa)*(*(ba+nextb)); 839 nextb++; 840 } 841 } 842 flops += 2*bnzi; 843 aa++; 844 } 845 } 846 847 /* Assemble the final matrix and clean up */ 848 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 849 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 850 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 851 PetscFunctionReturn(0); 852 } 853 854 EXTERN_C_BEGIN 855 #undef __FUNCT__ 856 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqDense" 857 PetscErrorCode MatMatMult_SeqAIJ_SeqDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 858 { 859 PetscErrorCode ierr; 860 861 PetscFunctionBegin; 862 if (scall == MAT_INITIAL_MATRIX){ 863 ierr = MatMatMultSymbolic_SeqAIJ_SeqDense(A,B,fill,C);CHKERRQ(ierr); 864 } 865 ierr = MatMatMultNumeric_SeqAIJ_SeqDense(A,B,*C);CHKERRQ(ierr); 866 PetscFunctionReturn(0); 867 } 868 EXTERN_C_END 869 870 #undef __FUNCT__ 871 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqDense" 872 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqDense(Mat A,Mat B,PetscReal fill,Mat *C) 873 { 874 PetscErrorCode ierr; 875 876 PetscFunctionBegin; 877 ierr = MatMatMultSymbolic_SeqDense_SeqDense(A,B,0.0,C);CHKERRQ(ierr); 878 (*C)->ops->matmult = MatMatMult_SeqAIJ_SeqDense; 879 PetscFunctionReturn(0); 880 } 881 882 #undef __FUNCT__ 883 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqDense" 884 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqDense(Mat A,Mat B,Mat C) 885 { 886 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 887 PetscErrorCode ierr; 888 PetscScalar *b,*c,r1,r2,r3,r4,*b1,*b2,*b3,*b4; 889 MatScalar *aa; 890 PetscInt cm=C->rmap->n, cn=B->cmap->n, bm=B->rmap->n, col, i,j,n,*aj, am = A->rmap->n; 891 PetscInt am2 = 2*am, am3 = 3*am, bm4 = 4*bm,colam; 892 893 PetscFunctionBegin; 894 if (!cm || !cn) PetscFunctionReturn(0); 895 if (bm != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %D not equal rows in B %D\n",A->cmap->n,bm); 896 if (A->rmap->n != C->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in C %D not equal rows in A %D\n",C->rmap->n,A->rmap->n); 897 if (B->cmap->n != C->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in B %D not equal columns in C %D\n",B->cmap->n,C->cmap->n); 898 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 899 ierr = MatGetArray(C,&c);CHKERRQ(ierr); 900 b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 901 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 902 colam = col*am; 903 for (i=0; i<am; i++) { /* over rows of C in those columns */ 904 r1 = r2 = r3 = r4 = 0.0; 905 n = a->i[i+1] - a->i[i]; 906 aj = a->j + a->i[i]; 907 aa = a->a + a->i[i]; 908 for (j=0; j<n; j++) { 909 r1 += (*aa)*b1[*aj]; 910 r2 += (*aa)*b2[*aj]; 911 r3 += (*aa)*b3[*aj]; 912 r4 += (*aa++)*b4[*aj++]; 913 } 914 c[colam + i] = r1; 915 c[colam + am + i] = r2; 916 c[colam + am2 + i] = r3; 917 c[colam + am3 + i] = r4; 918 } 919 b1 += bm4; 920 b2 += bm4; 921 b3 += bm4; 922 b4 += bm4; 923 } 924 for (;col<cn; col++){ /* over extra columns of C */ 925 for (i=0; i<am; i++) { /* over rows of C in those columns */ 926 r1 = 0.0; 927 n = a->i[i+1] - a->i[i]; 928 aj = a->j + a->i[i]; 929 aa = a->a + a->i[i]; 930 931 for (j=0; j<n; j++) { 932 r1 += (*aa++)*b1[*aj++]; 933 } 934 c[col*am + i] = r1; 935 } 936 b1 += bm; 937 } 938 ierr = PetscLogFlops(cn*(2.0*a->nz));CHKERRQ(ierr); 939 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 940 ierr = MatRestoreArray(C,&c);CHKERRQ(ierr); 941 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 942 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 943 PetscFunctionReturn(0); 944 } 945 946 /* 947 Note very similar to MatMult_SeqAIJ(), should generate both codes from same base 948 */ 949 #undef __FUNCT__ 950 #define __FUNCT__ "MatMatMultNumericAdd_SeqAIJ_SeqDense" 951 PetscErrorCode MatMatMultNumericAdd_SeqAIJ_SeqDense(Mat A,Mat B,Mat C) 952 { 953 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 954 PetscErrorCode ierr; 955 PetscScalar *b,*c,r1,r2,r3,r4,*b1,*b2,*b3,*b4; 956 MatScalar *aa; 957 PetscInt cm=C->rmap->n, cn=B->cmap->n, bm=B->rmap->n, col, i,j,n,*aj, am = A->rmap->n,*ii,arm; 958 PetscInt am2 = 2*am, am3 = 3*am, bm4 = 4*bm,colam,*ridx; 959 960 PetscFunctionBegin; 961 if (!cm || !cn) PetscFunctionReturn(0); 962 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 963 ierr = MatGetArray(C,&c);CHKERRQ(ierr); 964 b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 965 966 if (a->compressedrow.use){ /* use compressed row format */ 967 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 968 colam = col*am; 969 arm = a->compressedrow.nrows; 970 ii = a->compressedrow.i; 971 ridx = a->compressedrow.rindex; 972 for (i=0; i<arm; i++) { /* over rows of C in those columns */ 973 r1 = r2 = r3 = r4 = 0.0; 974 n = ii[i+1] - ii[i]; 975 aj = a->j + ii[i]; 976 aa = a->a + ii[i]; 977 for (j=0; j<n; j++) { 978 r1 += (*aa)*b1[*aj]; 979 r2 += (*aa)*b2[*aj]; 980 r3 += (*aa)*b3[*aj]; 981 r4 += (*aa++)*b4[*aj++]; 982 } 983 c[colam + ridx[i]] += r1; 984 c[colam + am + ridx[i]] += r2; 985 c[colam + am2 + ridx[i]] += r3; 986 c[colam + am3 + ridx[i]] += r4; 987 } 988 b1 += bm4; 989 b2 += bm4; 990 b3 += bm4; 991 b4 += bm4; 992 } 993 for (;col<cn; col++){ /* over extra columns of C */ 994 colam = col*am; 995 arm = a->compressedrow.nrows; 996 ii = a->compressedrow.i; 997 ridx = a->compressedrow.rindex; 998 for (i=0; i<arm; i++) { /* over rows of C in those columns */ 999 r1 = 0.0; 1000 n = ii[i+1] - ii[i]; 1001 aj = a->j + ii[i]; 1002 aa = a->a + ii[i]; 1003 1004 for (j=0; j<n; j++) { 1005 r1 += (*aa++)*b1[*aj++]; 1006 } 1007 c[col*am + ridx[i]] += r1; 1008 } 1009 b1 += bm; 1010 } 1011 } else { 1012 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 1013 colam = col*am; 1014 for (i=0; i<am; i++) { /* over rows of C in those columns */ 1015 r1 = r2 = r3 = r4 = 0.0; 1016 n = a->i[i+1] - a->i[i]; 1017 aj = a->j + a->i[i]; 1018 aa = a->a + a->i[i]; 1019 for (j=0; j<n; j++) { 1020 r1 += (*aa)*b1[*aj]; 1021 r2 += (*aa)*b2[*aj]; 1022 r3 += (*aa)*b3[*aj]; 1023 r4 += (*aa++)*b4[*aj++]; 1024 } 1025 c[colam + i] += r1; 1026 c[colam + am + i] += r2; 1027 c[colam + am2 + i] += r3; 1028 c[colam + am3 + i] += r4; 1029 } 1030 b1 += bm4; 1031 b2 += bm4; 1032 b3 += bm4; 1033 b4 += bm4; 1034 } 1035 for (;col<cn; col++){ /* over extra columns of C */ 1036 for (i=0; i<am; i++) { /* over rows of C in those columns */ 1037 r1 = 0.0; 1038 n = a->i[i+1] - a->i[i]; 1039 aj = a->j + a->i[i]; 1040 aa = a->a + a->i[i]; 1041 1042 for (j=0; j<n; j++) { 1043 r1 += (*aa++)*b1[*aj++]; 1044 } 1045 c[col*am + i] += r1; 1046 } 1047 b1 += bm; 1048 } 1049 } 1050 ierr = PetscLogFlops(cn*2.0*a->nz);CHKERRQ(ierr); 1051 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 1052 ierr = MatRestoreArray(C,&c);CHKERRQ(ierr); 1053 PetscFunctionReturn(0); 1054 } 1055 1056 #undef __FUNCT__ 1057 #define __FUNCT__ "MatTransColoringApplySpToDen_SeqAIJ" 1058 PetscErrorCode MatTransColoringApplySpToDen_SeqAIJ(MatTransposeColoring coloring,Mat B,Mat Btdense) 1059 { 1060 PetscErrorCode ierr; 1061 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 1062 Mat_SeqDense *btdense = (Mat_SeqDense*)Btdense->data; 1063 PetscInt *bi=b->i,*bj=b->j; 1064 PetscInt m=Btdense->rmap->n,n=Btdense->cmap->n,j,k,l,col,anz,*btcol,brow,ncolumns; 1065 MatScalar *btval,*btval_den,*ba=b->a; 1066 PetscInt *columns=coloring->columns,*colorforcol=coloring->colorforcol,ncolors=coloring->ncolors; 1067 1068 PetscFunctionBegin; 1069 btval_den=btdense->v; 1070 ierr = PetscMemzero(btval_den,(m*n)*sizeof(MatScalar));CHKERRQ(ierr); 1071 for (k=0; k<ncolors; k++) { 1072 ncolumns = coloring->ncolumns[k]; 1073 for (l=0; l<ncolumns; l++) { /* insert a row of B to a column of Btdense */ 1074 col = *(columns + colorforcol[k] + l); 1075 btcol = bj + bi[col]; 1076 btval = ba + bi[col]; 1077 anz = bi[col+1] - bi[col]; 1078 for (j=0; j<anz; j++){ 1079 brow = btcol[j]; 1080 btval_den[brow] = btval[j]; 1081 } 1082 } 1083 btval_den += m; 1084 } 1085 PetscFunctionReturn(0); 1086 } 1087 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "MatTransColoringApplyDenToSp_SeqAIJ" 1090 PetscErrorCode MatTransColoringApplyDenToSp_SeqAIJ(MatTransposeColoring matcoloring,Mat Cden,Mat Csp) 1091 { 1092 PetscErrorCode ierr; 1093 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)Csp->data; 1094 PetscInt k,l,*row,*idx,m,ncolors=matcoloring->ncolors,nrows; 1095 PetscScalar *ca_den,*cp_den,*ca=csp->a; 1096 PetscInt *rows=matcoloring->rows,*spidx=matcoloring->columnsforspidx,*colorforrow=matcoloring->colorforrow; 1097 1098 PetscFunctionBegin; 1099 ierr = MatGetLocalSize(Csp,&m,PETSC_NULL);CHKERRQ(ierr); 1100 ierr = MatGetArray(Cden,&ca_den);CHKERRQ(ierr); 1101 cp_den = ca_den; 1102 for (k=0; k<ncolors; k++) { 1103 nrows = matcoloring->nrows[k]; 1104 row = rows + colorforrow[k]; 1105 idx = spidx + colorforrow[k]; 1106 for (l=0; l<nrows; l++){ 1107 ca[idx[l]] = cp_den[row[l]]; 1108 } 1109 cp_den += m; 1110 } 1111 ierr = MatRestoreArray(Cden,&ca_den);CHKERRQ(ierr); 1112 PetscFunctionReturn(0); 1113 } 1114 1115 /* 1116 MatGetColumnIJ_SeqAIJ_Color() and MatRestoreColumnIJ_SeqAIJ_Color() are customized from 1117 MatGetColumnIJ_SeqAIJ() and MatRestoreColumnIJ_SeqAIJ() by adding an output 1118 spidx[], index of a->j, to be used for setting 'columnsforspidx' in MatTransposeColoringCreate_SeqAIJ(). 1119 */ 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "MatGetColumnIJ_SeqAIJ_Color" 1122 PetscErrorCode MatGetColumnIJ_SeqAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 1123 { 1124 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1125 PetscErrorCode ierr; 1126 PetscInt i,*collengths,*cia,*cja,n = A->cmap->n,m = A->rmap->n; 1127 PetscInt nz = a->i[m],row,*jj,mr,col; 1128 PetscInt *cspidx; 1129 1130 PetscFunctionBegin; 1131 *nn = n; 1132 if (!ia) PetscFunctionReturn(0); 1133 if (symmetric) { 1134 SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"MatGetColumnIJ_SeqAIJ_Color() not supported for the case symmetric"); 1135 ierr = MatToSymmetricIJ_SeqAIJ(A->rmap->n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 1136 } else { 1137 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 1138 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 1139 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 1140 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 1141 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cspidx);CHKERRQ(ierr); 1142 jj = a->j; 1143 for (i=0; i<nz; i++) { 1144 collengths[jj[i]]++; 1145 } 1146 cia[0] = oshift; 1147 for (i=0; i<n; i++) { 1148 cia[i+1] = cia[i] + collengths[i]; 1149 } 1150 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 1151 jj = a->j; 1152 for (row=0; row<m; row++) { 1153 mr = a->i[row+1] - a->i[row]; 1154 for (i=0; i<mr; i++) { 1155 col = *jj++; 1156 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 1157 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 1158 } 1159 } 1160 ierr = PetscFree(collengths);CHKERRQ(ierr); 1161 *ia = cia; *ja = cja; 1162 *spidx = cspidx; 1163 } 1164 PetscFunctionReturn(0); 1165 } 1166 1167 #undef __FUNCT__ 1168 #define __FUNCT__ "MatRestoreColumnIJ_SeqAIJ_Color" 1169 PetscErrorCode MatRestoreColumnIJ_SeqAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 1170 { 1171 PetscErrorCode ierr; 1172 1173 PetscFunctionBegin; 1174 if (!ia) PetscFunctionReturn(0); 1175 1176 ierr = PetscFree(*ia);CHKERRQ(ierr); 1177 ierr = PetscFree(*ja);CHKERRQ(ierr); 1178 ierr = PetscFree(*spidx);CHKERRQ(ierr); 1179 PetscFunctionReturn(0); 1180 } 1181 1182 #undef __FUNCT__ 1183 #define __FUNCT__ "MatTransposeColoringCreate_SeqAIJ" 1184 PetscErrorCode MatTransposeColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatTransposeColoring c) 1185 { 1186 PetscErrorCode ierr; 1187 PetscInt i,n,nrows,N,j,k,m,*row_idx,*ci,*cj,ncols,col,cm; 1188 const PetscInt *is; 1189 PetscInt nis = iscoloring->n,*rowhit,bs = 1; 1190 IS *isa; 1191 PetscBool done; 1192 PetscBool flg1,flg2; 1193 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 1194 PetscInt *colorforrow,*rows,*rows_i,*columnsforspidx,*columnsforspidx_i,*idxhit,*spidx; 1195 PetscInt *colorforcol,*columns,*columns_i; 1196 1197 PetscFunctionBegin; 1198 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 1199 1200 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1 */ 1201 ierr = PetscTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 1202 ierr = PetscTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg2);CHKERRQ(ierr); 1203 if (flg1 || flg2) { 1204 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 1205 } 1206 1207 N = mat->cmap->N/bs; 1208 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 1209 c->N = mat->cmap->N/bs; 1210 c->m = mat->rmap->N/bs; 1211 c->rstart = 0; 1212 1213 c->ncolors = nis; 1214 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 1215 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 1216 ierr = PetscMalloc2(csp->nz+1,PetscInt,&rows,csp->nz+1,PetscInt,&columnsforspidx);CHKERRQ(ierr); 1217 ierr = PetscMalloc((nis+1)*sizeof(PetscInt),&colorforrow);CHKERRQ(ierr); 1218 colorforrow[0] = 0; 1219 rows_i = rows; 1220 columnsforspidx_i = columnsforspidx; 1221 1222 ierr = PetscMalloc((nis+1)*sizeof(PetscInt),&colorforcol);CHKERRQ(ierr); 1223 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columns);CHKERRQ(ierr); 1224 colorforcol[0] = 0; 1225 columns_i = columns; 1226 1227 ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,&done);CHKERRQ(ierr); /* column-wise storage of mat */ 1228 if (!done) SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name); 1229 1230 cm = c->m; 1231 ierr = PetscMalloc((cm+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 1232 ierr = PetscMalloc((cm+1)*sizeof(PetscInt),&idxhit);CHKERRQ(ierr); 1233 for (i=0; i<nis; i++) { 1234 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 1235 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 1236 c->ncolumns[i] = n; 1237 if (n) { 1238 ierr = PetscMemcpy(columns_i,is,n*sizeof(PetscInt));CHKERRQ(ierr); 1239 } 1240 colorforcol[i+1] = colorforcol[i] + n; 1241 columns_i += n; 1242 1243 /* fast, crude version requires O(N*N) work */ 1244 ierr = PetscMemzero(rowhit,cm*sizeof(PetscInt));CHKERRQ(ierr); 1245 1246 /* loop over columns*/ 1247 for (j=0; j<n; j++) { 1248 col = is[j]; 1249 row_idx = cj + ci[col]; 1250 m = ci[col+1] - ci[col]; 1251 /* loop over columns marking them in rowhit */ 1252 for (k=0; k<m; k++) { 1253 idxhit[*row_idx] = spidx[ci[col] + k]; 1254 rowhit[*row_idx++] = col + 1; 1255 } 1256 } 1257 /* count the number of hits */ 1258 nrows = 0; 1259 for (j=0; j<cm; j++) { 1260 if (rowhit[j]) nrows++; 1261 } 1262 c->nrows[i] = nrows; 1263 colorforrow[i+1] = colorforrow[i] + nrows; 1264 1265 nrows = 0; 1266 for (j=0; j<cm; j++) { 1267 if (rowhit[j]) { 1268 rows_i[nrows] = j; 1269 columnsforspidx_i[nrows] = idxhit[j]; 1270 nrows++; 1271 } 1272 } 1273 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 1274 rows_i += nrows; columnsforspidx_i += nrows; 1275 } 1276 ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,&done);CHKERRQ(ierr); 1277 ierr = PetscFree(rowhit);CHKERRQ(ierr); 1278 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 1279 #if defined(PETSC_USE_DEBUG) 1280 if (csp->nz != colorforrow[nis]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"csp->nz %d != colorforrow[nis] %d",csp->nz,colorforrow[nis]); 1281 #endif 1282 1283 c->colorforrow = colorforrow; 1284 c->rows = rows; 1285 c->columnsforspidx = columnsforspidx; 1286 c->colorforcol = colorforcol; 1287 c->columns = columns; 1288 ierr = PetscFree(idxhit);CHKERRQ(ierr); 1289 PetscFunctionReturn(0); 1290 } 1291