1 /*$Id: matmatmult.c,v 1.15 2001/09/07 20:04:44 buschelm Exp $*/ 2 /* 3 Defines matrix-matrix product routines for pairs of SeqAIJ matrices 4 C = A * B 5 C = P * A * P^T 6 */ 7 8 #include "src/mat/impls/aij/seq/aij.h" 9 #include "src/mat/utils/freespace.h" 10 11 static int logkey_matmatmult = 0; 12 static int logkey_matmatmult_symbolic = 0; 13 static int logkey_matmatmult_numeric = 0; 14 15 static int logkey_matapplypapt = 0; 16 static int logkey_matapplypapt_symbolic = 0; 17 static int logkey_matapplypapt_numeric = 0; 18 19 /* 20 MatMatMult_Symbolic_SeqAIJ_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices 21 C = A * B; 22 23 Note: C is assumed to be uncreated. 24 If this is not the case, Destroy C before calling this routine. 25 */ 26 #ifdef USE_INTSORT 27 /* 28 This roution is modified by the one below for better performance. 29 The changes are: 30 -- PetscSortInt() is replace by a linked list 31 -- malloc larger Initial FreeSpace 32 */ 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMatMult_Symbolic_SeqAIJ_SeqAIJ" 35 int MatMatMult_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C) 36 { 37 int ierr; 38 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 39 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 40 int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj; 41 int *ci,*cj,*denserow,*sparserow; 42 int an=A->N,am=A->M,bn=B->N,bm=B->M; 43 int i,j,k,anzi,brow,bnzj,cnzi; 44 MatScalar *ca; 45 46 PetscFunctionBegin; 47 /* some error checking which could be moved into interface layer */ 48 if (aishift || bishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 49 if (an!=bm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",an,bm); 50 51 /* Set up timers */ 52 if (!logkey_matmatmult_symbolic) { 53 ierr = PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMult_Symbolic",MAT_COOKIE);CHKERRQ(ierr); 54 } 55 ierr = PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr); 56 57 /* Set up */ 58 /* Allocate ci array, arrays for fill computation and */ 59 /* free space for accumulating nonzero column info */ 60 ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr); 61 ci[0] = 0; 62 63 ierr = PetscMalloc((2*bn+1)*sizeof(int),&denserow);CHKERRQ(ierr); 64 ierr = PetscMemzero(denserow,(2*bn+1)*sizeof(int));CHKERRQ(ierr); 65 sparserow = denserow + bn; 66 67 /* Initial FreeSpace size is nnz(B)=bi[bm] */ 68 ierr = GetMoreSpace(bi[bm],&free_space);CHKERRQ(ierr); 69 current_space = free_space; 70 71 /* Determine symbolic info for each row of the product: */ 72 for (i=0;i<am;i++) { 73 anzi = ai[i+1] - ai[i]; 74 cnzi = 0; 75 for (j=0;j<anzi;j++) { 76 brow = *aj++; 77 bnzj = bi[brow+1] - bi[brow]; 78 bjj = bj + bi[brow]; 79 for (k=0;k<bnzj;k++) { 80 /* If column is not marked, mark it in compressed and uncompressed locations. */ 81 /* For simplicity, leave uncompressed row unsorted until finished with row, */ 82 /* and increment nonzero count for this row. */ 83 if (!denserow[bjj[k]]) { 84 denserow[bjj[k]] = -1; 85 sparserow[cnzi++] = bjj[k]; 86 } 87 } 88 } 89 90 /* sort sparserow */ 91 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 92 93 /* If free space is not available, make more free space */ 94 /* Double the amount of total space in the list */ 95 if (current_space->local_remaining<cnzi) { 96 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 97 } 98 99 /* Copy data into free space, and zero out denserow */ 100 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 101 current_space->array += cnzi; 102 current_space->local_used += cnzi; 103 current_space->local_remaining -= cnzi; 104 for (j=0;j<cnzi;j++) { 105 denserow[sparserow[j]] = 0; 106 } 107 ci[i+1] = ci[i] + cnzi; 108 } 109 110 /* Column indices are in the list of free space */ 111 /* Allocate space for cj, initialize cj, and */ 112 /* destroy list of free space and other temporary array(s) */ 113 ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr); 114 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 115 ierr = PetscFree(denserow);CHKERRQ(ierr); 116 117 /* Allocate space for ca */ 118 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 119 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 120 121 /* put together the new matrix */ 122 ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 123 124 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 125 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 126 c = (Mat_SeqAIJ *)((*C)->data); 127 c->freedata = PETSC_TRUE; 128 c->nonew = 0; 129 130 ierr = PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr); 131 PetscFunctionReturn(0); 132 } 133 #endif /* USE_INTSORT */ 134 135 #undef __FUNCT__ 136 #define __FUNCT__ "MatMatMult_Symbolic_SeqAIJ_SeqAIJ" 137 int MatMatMult_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C) 138 { 139 int ierr; 140 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 141 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 142 int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj; 143 int *ci,*cj,*lnk,idx0,idx,bcol; 144 int an=A->N,am=A->M,bn=B->N,bm=B->M; 145 int i,j,k,anzi,brow,bnzj,cnzi; 146 MatScalar *ca; 147 148 PetscFunctionBegin; 149 /* some error checking which could be moved into interface layer */ 150 if (an!=bm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",an,bm); 151 152 /* Set up timers */ 153 if (!logkey_matmatmult_symbolic) { 154 ierr = PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMult_Symbolic",MAT_COOKIE);CHKERRQ(ierr); 155 } 156 ierr = PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr); 157 158 /* Set up */ 159 /* Allocate ci array, arrays for fill computation and */ 160 /* free space for accumulating nonzero column info */ 161 ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr); 162 ci[0] = 0; 163 164 ierr = PetscMalloc((bn+1)*sizeof(int),&lnk);CHKERRQ(ierr); 165 for (i=0; i<bn; i++) lnk[i] = -1; 166 167 /* Initial FreeSpace size is nnz(B)=4*bi[bm] */ 168 ierr = GetMoreSpace(4*bi[bm],&free_space);CHKERRQ(ierr); 169 current_space = free_space; 170 171 /* Determine symbolic info for each row of the product: */ 172 for (i=0;i<am;i++) { 173 anzi = ai[i+1] - ai[i]; 174 cnzi = 0; 175 lnk[bn] = bn; 176 for (j=0;j<anzi;j++) { 177 brow = *aj++; 178 bnzj = bi[brow+1] - bi[brow]; 179 bjj = bj + bi[brow]; 180 idx = bn; 181 for (k=0;k<bnzj;k++) { 182 bcol = bjj[k]; 183 if (lnk[bcol] == -1) { /* new col */ 184 if (k>0) idx = bjj[k-1]; 185 do { 186 idx0 = idx; 187 idx = lnk[idx0]; 188 } while (bcol > idx); 189 lnk[idx0] = bcol; 190 lnk[bcol] = idx; 191 cnzi++; 192 } 193 } 194 } 195 196 /* If free space is not available, make more free space */ 197 /* Double the amount of total space in the list */ 198 if (current_space->local_remaining<cnzi) { 199 printf("...%d -th row, double space ...\n",i); 200 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 201 } 202 203 /* Copy data into free space, and zero out denserow and lnk */ 204 idx = bn; 205 for (j=0; j<cnzi; j++){ 206 idx0 = idx; 207 idx = lnk[idx0]; 208 *current_space->array++ = idx; 209 lnk[idx0] = -1; 210 } 211 lnk[idx] = -1; 212 213 current_space->local_used += cnzi; 214 current_space->local_remaining -= cnzi; 215 216 ci[i+1] = ci[i] + cnzi; 217 } 218 219 /* Column indices are in the list of free space */ 220 /* Allocate space for cj, initialize cj, and */ 221 /* destroy list of free space and other temporary array(s) */ 222 ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr); 223 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 224 ierr = PetscFree(lnk);CHKERRQ(ierr); 225 226 /* Allocate space for ca */ 227 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 228 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 229 230 /* put together the new matrix */ 231 ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 232 233 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 234 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 235 c = (Mat_SeqAIJ *)((*C)->data); 236 c->freedata = PETSC_TRUE; 237 c->nonew = 0; 238 239 ierr = PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr); 240 PetscFunctionReturn(0); 241 } 242 243 /* 244 MatMatMult_Numeric_SeqAIJ_SeqAIJ - Forms the numeric product of two SeqAIJ matrices 245 C=A*B; 246 Note: C must have been created by calling MatMatMult_Symbolic_SeqAIJ_SeqAIJ. 247 */ 248 #undef __FUNCT__ 249 #define __FUNCT__ "MatMatMult_Numeric_SeqAIJ_SeqAIJ" 250 int MatMatMult_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 251 { 252 int ierr,flops=0; 253 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 254 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 255 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 256 int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 257 int an=A->N,am=A->M,bn=B->N,bm=B->M,cn=C->N,cm=C->M; 258 int i,j,k,anzi,bnzi,cnzi,brow; 259 MatScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp; 260 261 PetscFunctionBegin; 262 263 /* This error checking should be unnecessary if the symbolic was performed */ 264 if (am!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",am,cm); 265 if (an!=bm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",an,bm); 266 if (bn!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",bn,cn); 267 268 /* Set up timers */ 269 if (!logkey_matmatmult_numeric) { 270 ierr = PetscLogEventRegister(&logkey_matmatmult_numeric,"MatMatMult_Numeric",MAT_COOKIE);CHKERRQ(ierr); 271 } 272 ierr = PetscLogEventBegin(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr); 273 274 /* Allocate temp accumulation space to avoid searching for nonzero columns in C */ 275 ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr); 276 ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr); 277 /* Traverse A row-wise. */ 278 /* Build the ith row in C by summing over nonzero columns in A, */ 279 /* the rows of B corresponding to nonzeros of A. */ 280 for (i=0;i<am;i++) { 281 anzi = ai[i+1] - ai[i]; 282 for (j=0;j<anzi;j++) { 283 brow = *aj++; 284 bnzi = bi[brow+1] - bi[brow]; 285 bjj = bj + bi[brow]; 286 baj = ba + bi[brow]; 287 for (k=0;k<bnzi;k++) { 288 temp[bjj[k]] += (*aa)*baj[k]; 289 } 290 flops += 2*bnzi; 291 aa++; 292 } 293 /* Store row back into C, and re-zero temp */ 294 cnzi = ci[i+1] - ci[i]; 295 for (j=0;j<cnzi;j++) { 296 ca[j] = temp[cj[j]]; 297 temp[cj[j]] = 0.0; 298 } 299 ca += cnzi; 300 cj += cnzi; 301 } 302 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 303 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 304 305 /* Free temp */ 306 ierr = PetscFree(temp);CHKERRQ(ierr); 307 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 308 ierr = PetscLogEventEnd(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr); 309 PetscFunctionReturn(0); 310 } 311 312 #undef __FUNCT__ 313 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ" 314 int MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C) { 315 int ierr; 316 317 PetscFunctionBegin; 318 if (!logkey_matmatmult) { 319 ierr = PetscLogEventRegister(&logkey_matmatmult,"MatMatMult",MAT_COOKIE);CHKERRQ(ierr); 320 } 321 ierr = PetscLogEventBegin(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr); 322 ierr = MatMatMult_Symbolic_SeqAIJ_SeqAIJ(A,B,C);CHKERRQ(ierr); 323 ierr = MatMatMult_Numeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 324 ierr = PetscLogEventEnd(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr); 325 PetscFunctionReturn(0); 326 } 327 328 329 /* 330 MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices 331 C = P * A * P^T; 332 333 Note: C is assumed to be uncreated. 334 If this is not the case, Destroy C before calling this routine. 335 */ 336 #undef __FUNCT__ 337 #define __FUNCT__ "MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ" 338 int MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) { 339 /* Note: This code is virtually identical to that of MatApplyPtAP_SeqAIJ_Symbolic */ 340 /* and MatMatMult_SeqAIJ_SeqAIJ_Symbolic. Perhaps they could be merged nicely. */ 341 int ierr; 342 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 343 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 344 int *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pti,*ptj,*ptjj; 345 int *ci,*cj,*paj,*padenserow,*pasparserow,*denserow,*sparserow; 346 int an=A->N,am=A->M,pn=P->N,pm=P->M; 347 int i,j,k,pnzi,arow,anzj,panzi,ptrow,ptnzj,cnzi; 348 MatScalar *ca; 349 350 PetscFunctionBegin; 351 352 /* some error checking which could be moved into interface layer */ 353 if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn,am); 354 if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an); 355 356 /* Set up timers */ 357 if (!logkey_matapplypapt_symbolic) { 358 ierr = PetscLogEventRegister(&logkey_matapplypapt_symbolic,"MatApplyPAPt_Symbolic",MAT_COOKIE);CHKERRQ(ierr); 359 } 360 ierr = PetscLogEventBegin(logkey_matapplypapt_symbolic,A,P,0,0);CHKERRQ(ierr); 361 362 /* Create ij structure of P^T */ 363 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 364 365 /* Allocate ci array, arrays for fill computation and */ 366 /* free space for accumulating nonzero column info */ 367 ierr = PetscMalloc(((pm+1)*1)*sizeof(int),&ci);CHKERRQ(ierr); 368 ci[0] = 0; 369 370 ierr = PetscMalloc((2*an+2*pm+1)*sizeof(int),&padenserow);CHKERRQ(ierr); 371 ierr = PetscMemzero(padenserow,(2*an+2*pm+1)*sizeof(int));CHKERRQ(ierr); 372 pasparserow = padenserow + an; 373 denserow = pasparserow + an; 374 sparserow = denserow + pm; 375 376 /* Set initial free space to be nnz(A) scaled by aspect ratio of Pt. */ 377 /* This should be reasonable if sparsity of PAPt is similar to that of A. */ 378 ierr = GetMoreSpace((ai[am]/pn)*pm,&free_space); 379 current_space = free_space; 380 381 /* Determine fill for each row of C: */ 382 for (i=0;i<pm;i++) { 383 pnzi = pi[i+1] - pi[i]; 384 panzi = 0; 385 /* Get symbolic sparse row of PA: */ 386 for (j=0;j<pnzi;j++) { 387 arow = *pj++; 388 anzj = ai[arow+1] - ai[arow]; 389 ajj = aj + ai[arow]; 390 for (k=0;k<anzj;k++) { 391 if (!padenserow[ajj[k]]) { 392 padenserow[ajj[k]] = -1; 393 pasparserow[panzi++] = ajj[k]; 394 } 395 } 396 } 397 /* Using symbolic row of PA, determine symbolic row of C: */ 398 paj = pasparserow; 399 cnzi = 0; 400 for (j=0;j<panzi;j++) { 401 ptrow = *paj++; 402 ptnzj = pti[ptrow+1] - pti[ptrow]; 403 ptjj = ptj + pti[ptrow]; 404 for (k=0;k<ptnzj;k++) { 405 if (!denserow[ptjj[k]]) { 406 denserow[ptjj[k]] = -1; 407 sparserow[cnzi++] = ptjj[k]; 408 } 409 } 410 } 411 412 /* sort sparse representation */ 413 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 414 415 /* If free space is not available, make more free space */ 416 /* Double the amount of total space in the list */ 417 if (current_space->local_remaining<cnzi) { 418 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 419 } 420 421 /* Copy data into free space, and zero out dense row */ 422 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 423 current_space->array += cnzi; 424 current_space->local_used += cnzi; 425 current_space->local_remaining -= cnzi; 426 427 for (j=0;j<panzi;j++) { 428 padenserow[pasparserow[j]] = 0; 429 } 430 for (j=0;j<cnzi;j++) { 431 denserow[sparserow[j]] = 0; 432 } 433 ci[i+1] = ci[i] + cnzi; 434 } 435 /* column indices are in the list of free space */ 436 /* Allocate space for cj, initialize cj, and */ 437 /* destroy list of free space and other temporary array(s) */ 438 ierr = PetscMalloc((ci[pm]+1)*sizeof(int),&cj);CHKERRQ(ierr); 439 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 440 ierr = PetscFree(padenserow);CHKERRQ(ierr); 441 442 /* Allocate space for ca */ 443 ierr = PetscMalloc((ci[pm]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 444 ierr = PetscMemzero(ca,(ci[pm]+1)*sizeof(MatScalar));CHKERRQ(ierr); 445 446 /* put together the new matrix */ 447 ierr = MatCreateSeqAIJWithArrays(A->comm,pm,pm,ci,cj,ca,C);CHKERRQ(ierr); 448 449 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 450 /* Since these are PETSc arrays, change flags to free them as necessary. */ 451 c = (Mat_SeqAIJ *)((*C)->data); 452 c->freedata = PETSC_TRUE; 453 c->nonew = 0; 454 455 /* Clean up. */ 456 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 457 458 ierr = PetscLogEventEnd(logkey_matapplypapt_symbolic,A,P,0,0);CHKERRQ(ierr); 459 PetscFunctionReturn(0); 460 } 461 462 /* 463 MatApplyPAPt_Numeric_SeqAIJ - Forms the numeric product of two SeqAIJ matrices 464 C = P * A * P^T; 465 Note: C must have been created by calling MatApplyPAPt_Symbolic_SeqAIJ. 466 */ 467 #undef __FUNCT__ 468 #define __FUNCT__ "MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ" 469 int MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) { 470 int ierr,flops=0; 471 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 472 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 473 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 474 int *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj=p->j,*paj,*pajdense,*ptj; 475 int *ci=c->i,*cj=c->j; 476 int an=A->N,am=A->M,pn=P->N,pm=P->M,cn=C->N,cm=C->M; 477 int i,j,k,k1,k2,pnzi,anzj,panzj,arow,ptcol,ptnzj,cnzi; 478 MatScalar *aa=a->a,*pa=p->a,*pta=p->a,*ptaj,*paa,*aaj,*ca=c->a,sum; 479 480 PetscFunctionBegin; 481 482 /* This error checking should be unnecessary if the symbolic was performed */ 483 if (pm!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm,cm); 484 if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn,am); 485 if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an); 486 if (pm!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm, cn); 487 488 /* Set up timers */ 489 if (!logkey_matapplypapt_numeric) { 490 ierr = PetscLogEventRegister(&logkey_matapplypapt_numeric,"MatApplyPAPt_Numeric",MAT_COOKIE);CHKERRQ(ierr); 491 } 492 ierr = PetscLogEventBegin(logkey_matapplypapt_numeric,A,P,C,0);CHKERRQ(ierr); 493 494 ierr = PetscMalloc(an*(sizeof(MatScalar)+2*sizeof(int)),&paa);CHKERRQ(ierr); 495 ierr = PetscMemzero(paa,an*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr); 496 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 497 498 paj = (int *)(paa + an); 499 pajdense = paj + an; 500 501 for (i=0;i<pm;i++) { 502 /* Form sparse row of P*A */ 503 pnzi = pi[i+1] - pi[i]; 504 panzj = 0; 505 for (j=0;j<pnzi;j++) { 506 arow = *pj++; 507 anzj = ai[arow+1] - ai[arow]; 508 ajj = aj + ai[arow]; 509 aaj = aa + ai[arow]; 510 for (k=0;k<anzj;k++) { 511 if (!pajdense[ajj[k]]) { 512 pajdense[ajj[k]] = -1; 513 paj[panzj++] = ajj[k]; 514 } 515 paa[ajj[k]] += (*pa)*aaj[k]; 516 } 517 flops += 2*anzj; 518 pa++; 519 } 520 521 /* Sort the j index array for quick sparse axpy. */ 522 ierr = PetscSortInt(panzj,paj);CHKERRQ(ierr); 523 524 /* Compute P*A*P^T using sparse inner products. */ 525 /* Take advantage of pre-computed (i,j) of C for locations of non-zeros. */ 526 cnzi = ci[i+1] - ci[i]; 527 for (j=0;j<cnzi;j++) { 528 /* Form sparse inner product of current row of P*A with (*cj++) col of P^T. */ 529 ptcol = *cj++; 530 ptnzj = pi[ptcol+1] - pi[ptcol]; 531 ptj = pjj + pi[ptcol]; 532 ptaj = pta + pi[ptcol]; 533 sum = 0.; 534 k1 = 0; 535 k2 = 0; 536 while ((k1<panzj) && (k2<ptnzj)) { 537 if (paj[k1]==ptj[k2]) { 538 sum += paa[paj[k1++]]*ptaj[k2++]; 539 } else if (paj[k1] < ptj[k2]) { 540 k1++; 541 } else /* if (paj[k1] > ptj[k2]) */ { 542 k2++; 543 } 544 } 545 *ca++ = sum; 546 } 547 548 /* Zero the current row info for P*A */ 549 for (j=0;j<panzj;j++) { 550 paa[paj[j]] = 0.; 551 pajdense[paj[j]] = 0; 552 } 553 } 554 555 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 556 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 557 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 558 ierr = PetscLogEventEnd(logkey_matapplypapt_numeric,A,P,C,0);CHKERRQ(ierr); 559 PetscFunctionReturn(0); 560 } 561 562 #undef __FUNCT__ 563 #define __FUNCT__ "MatApplyPAPt_SeqAIJ_SeqAIJ" 564 int MatApplyPAPt_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) { 565 int ierr; 566 567 PetscFunctionBegin; 568 if (!logkey_matapplypapt) { 569 ierr = PetscLogEventRegister(&logkey_matapplypapt,"MatApplyPAPt",MAT_COOKIE);CHKERRQ(ierr); 570 } 571 ierr = PetscLogEventBegin(logkey_matapplypapt,A,P,0,0);CHKERRQ(ierr); 572 ierr = MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(A,P,C);CHKERRQ(ierr); 573 ierr = MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 574 ierr = PetscLogEventEnd(logkey_matapplypapt,A,P,0,0);CHKERRQ(ierr); 575 PetscFunctionReturn(0); 576 } 577