1 /*$Id: matmatmult.c,v 1.15 2001/09/07 20:04:44 buschelm Exp $*/ 2 /* 3 Defines matrix-matrix product routines for pairs of SeqAIJ matrices 4 C = A * B 5 C = P^T * A * P 6 C = P * A * P^T 7 */ 8 9 #include "src/mat/impls/aij/seq/aij.h" 10 #include "src/mat/utils/freespace.h" 11 12 static int logkey_matmatmult = 0; 13 static int logkey_matmatmult_symbolic = 0; 14 static int logkey_matmatmult_numeric = 0; 15 16 static int logkey_matapplyptap = 0; 17 static int logkey_matapplyptap_symbolic = 0; 18 static int logkey_matapplyptap_numeric = 0; 19 20 static int logkey_matapplypapt = 0; 21 static int logkey_matapplypapt_symbolic = 0; 22 static int logkey_matapplypapt_numeric = 0; 23 24 /* 25 MatMatMult_Symbolic_SeqAIJ_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices 26 C = A * B; 27 28 Note: C is assumed to be uncreated. 29 If this is not the case, Destroy C before calling this routine. 30 */ 31 #undef __FUNCT__ 32 #define __FUNCT__ "MatMatMult_Symbolic_SeqAIJ_SeqAIJ" 33 int MatMatMult_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C) 34 { 35 int ierr; 36 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 37 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 38 int aishift=a->indexshift,bishift=b->indexshift; 39 int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj; 40 int *ci,*cj,*denserow,*sparserow; 41 int an=A->N,am=A->M,bn=B->N,bm=B->M; 42 int i,j,k,anzi,brow,bnzj,cnzi; 43 MatScalar *ca; 44 45 PetscFunctionBegin; 46 /* some error checking which could be moved into interface layer */ 47 if (aishift || bishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 48 if (an!=bm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",an,bm); 49 50 /* Set up timers */ 51 if (!logkey_matmatmult_symbolic) { 52 ierr = PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMult_Symbolic",MAT_COOKIE);CHKERRQ(ierr); 53 } 54 ierr = PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr); 55 56 /* Set up */ 57 /* Allocate ci array, arrays for fill computation and */ 58 /* free space for accumulating nonzero column info */ 59 ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr); 60 ci[0] = 0; 61 62 ierr = PetscMalloc((2*bn+1)*sizeof(int),&denserow);CHKERRQ(ierr); 63 ierr = PetscMemzero(denserow,(2*bn+1)*sizeof(int));CHKERRQ(ierr); 64 sparserow = denserow + bn; 65 66 /* Initial FreeSpace size is nnz(B)=bi[bm] */ 67 ierr = GetMoreSpace(bi[bm],&free_space);CHKERRQ(ierr); 68 current_space = free_space; 69 70 /* Determine symbolic info for each row of the product: */ 71 for (i=0;i<am;i++) { 72 anzi = ai[i+1] - ai[i]; 73 cnzi = 0; 74 for (j=0;j<anzi;j++) { 75 brow = *aj++; 76 bnzj = bi[brow+1] - bi[brow]; 77 bjj = bj + bi[brow]; 78 for (k=0;k<bnzj;k++) { 79 /* If column is not marked, mark it in compressed and uncompressed locations. */ 80 /* For simplicity, leave uncompressed row unsorted until finished with row, */ 81 /* and increment nonzero count for this row. */ 82 if (!denserow[bjj[k]]) { 83 denserow[bjj[k]] = -1; 84 sparserow[cnzi++] = bjj[k]; 85 } 86 } 87 } 88 89 /* sort sparserow */ 90 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 91 92 /* If free space is not available, make more free space */ 93 /* Double the amount of total space in the list */ 94 if (current_space->local_remaining<cnzi) { 95 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 96 } 97 98 /* Copy data into free space, and zero out denserow */ 99 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 100 current_space->array += cnzi; 101 current_space->local_used += cnzi; 102 current_space->local_remaining -= cnzi; 103 for (j=0;j<cnzi;j++) { 104 denserow[sparserow[j]] = 0; 105 } 106 ci[i+1] = ci[i] + cnzi; 107 } 108 109 /* Column indices are in the list of free space */ 110 /* Allocate space for cj, initialize cj, and */ 111 /* destroy list of free space and other temporary array(s) */ 112 ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr); 113 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 114 ierr = PetscFree(denserow);CHKERRQ(ierr); 115 116 /* Allocate space for ca */ 117 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 118 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 119 120 /* put together the new matrix */ 121 ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 122 123 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 124 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 125 c = (Mat_SeqAIJ *)((*C)->data); 126 c->freedata = PETSC_TRUE; 127 c->nonew = 0; 128 129 ierr = PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 /* 134 MatMatMult_Numeric_SeqAIJ_SeqAIJ - Forms the numeric product of two SeqAIJ matrices 135 C=A*B; 136 Note: C must have been created by calling MatMatMult_Symbolic_SeqAIJ_SeqAIJ. 137 */ 138 #undef __FUNCT__ 139 #define __FUNCT__ "MatMatMult_Numeric_SeqAIJ_SeqAIJ" 140 int MatMatMult_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 141 { 142 int ierr,flops=0; 143 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 144 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 145 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 146 int aishift=a->indexshift,bishift=b->indexshift,cishift=c->indexshift; 147 int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 148 int an=A->N,am=A->M,bn=B->N,bm=B->M,cn=C->N,cm=C->M; 149 int i,j,k,anzi,bnzi,cnzi,brow; 150 MatScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp; 151 152 PetscFunctionBegin; 153 154 /* This error checking should be unnecessary if the symbolic was performed */ 155 if (aishift || bishift || cishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 156 if (am!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",am,cm); 157 if (an!=bm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",an,bm); 158 if (bn!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",bn,cn); 159 160 /* Set up timers */ 161 if (!logkey_matmatmult_numeric) { 162 ierr = PetscLogEventRegister(&logkey_matmatmult_numeric,"MatMatMult_Numeric",MAT_COOKIE);CHKERRQ(ierr); 163 } 164 ierr = PetscLogEventBegin(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr); 165 166 /* Allocate temp accumulation space to avoid searching for nonzero columns in C */ 167 ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr); 168 ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr); 169 /* Traverse A row-wise. */ 170 /* Build the ith row in C by summing over nonzero columns in A, */ 171 /* the rows of B corresponding to nonzeros of A. */ 172 for (i=0;i<am;i++) { 173 anzi = ai[i+1] - ai[i]; 174 for (j=0;j<anzi;j++) { 175 brow = *aj++; 176 bnzi = bi[brow+1] - bi[brow]; 177 bjj = bj + bi[brow]; 178 baj = ba + bi[brow]; 179 for (k=0;k<bnzi;k++) { 180 temp[bjj[k]] += (*aa)*baj[k]; 181 } 182 flops += 2*bnzi; 183 aa++; 184 } 185 /* Store row back into C, and re-zero temp */ 186 cnzi = ci[i+1] - ci[i]; 187 for (j=0;j<cnzi;j++) { 188 ca[j] = temp[cj[j]]; 189 temp[cj[j]] = 0.0; 190 } 191 ca += cnzi; 192 cj += cnzi; 193 } 194 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 195 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 196 197 /* Free temp */ 198 ierr = PetscFree(temp);CHKERRQ(ierr); 199 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 200 ierr = PetscLogEventEnd(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr); 201 PetscFunctionReturn(0); 202 } 203 204 #undef __FUNCT__ 205 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ" 206 int MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C) { 207 int ierr; 208 209 PetscFunctionBegin; 210 if (!logkey_matmatmult) { 211 ierr = PetscLogEventRegister(&logkey_matmatmult,"MatMatMult",MAT_COOKIE);CHKERRQ(ierr); 212 } 213 ierr = PetscLogEventBegin(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr); 214 ierr = MatMatMult_Symbolic_SeqAIJ_SeqAIJ(A,B,C);CHKERRQ(ierr); 215 ierr = MatMatMult_Numeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 216 ierr = PetscLogEventEnd(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr); 217 PetscFunctionReturn(0); 218 } 219 /* 220 MatApplyPtAP_Symbolic_SeqAIJ_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices 221 C = P^T * A * P; 222 223 Note: C is assumed to be uncreated. 224 If this is not the case, Destroy C before calling this routine. 225 */ 226 #undef __FUNCT__ 227 #define __FUNCT__ "MatApplyPtAP_Symbolic_SeqAIJ_SeqAIJ" 228 int MatApplyPtAP_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) { 229 int ierr; 230 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 231 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 232 int aishift=a->indexshift,pishift=p->indexshift; 233 int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 234 int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 235 int an=A->N,am=A->M,pn=P->N,pm=P->M; 236 int i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 237 MatScalar *ca; 238 239 PetscFunctionBegin; 240 241 /* some error checking which could be moved into interface layer */ 242 if (aishift || pishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 243 if (pm!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm,an); 244 if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an); 245 246 /* Set up timers */ 247 if (!logkey_matapplyptap_symbolic) { 248 ierr = PetscLogEventRegister(&logkey_matapplyptap_symbolic,"MatApplyPtAP_Symbolic",MAT_COOKIE);CHKERRQ(ierr); 249 } 250 ierr = PetscLogEventBegin(logkey_matapplyptap_symbolic,A,P,0,0);CHKERRQ(ierr); 251 252 /* Get ij structure of P^T */ 253 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 254 ptJ=ptj; 255 256 /* Allocate ci array, arrays for fill computation and */ 257 /* free space for accumulating nonzero column info */ 258 ierr = PetscMalloc(((pn+1)*1)*sizeof(int),&ci);CHKERRQ(ierr); 259 ci[0] = 0; 260 261 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 262 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 263 ptasparserow = ptadenserow + an; 264 denserow = ptasparserow + an; 265 sparserow = denserow + pn; 266 267 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 268 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 269 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 270 current_space = free_space; 271 272 /* Determine symbolic info for each row of C: */ 273 for (i=0;i<pn;i++) { 274 ptnzi = pti[i+1] - pti[i]; 275 ptanzi = 0; 276 /* Determine symbolic row of PtA: */ 277 for (j=0;j<ptnzi;j++) { 278 arow = *ptJ++; 279 anzj = ai[arow+1] - ai[arow]; 280 ajj = aj + ai[arow]; 281 for (k=0;k<anzj;k++) { 282 if (!ptadenserow[ajj[k]]) { 283 ptadenserow[ajj[k]] = -1; 284 ptasparserow[ptanzi++] = ajj[k]; 285 } 286 } 287 } 288 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 289 ptaj = ptasparserow; 290 cnzi = 0; 291 for (j=0;j<ptanzi;j++) { 292 prow = *ptaj++; 293 pnzj = pi[prow+1] - pi[prow]; 294 pjj = pj + pi[prow]; 295 for (k=0;k<pnzj;k++) { 296 if (!denserow[pjj[k]]) { 297 denserow[pjj[k]] = -1; 298 sparserow[cnzi++] = pjj[k]; 299 } 300 } 301 } 302 303 /* sort sparserow */ 304 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 305 306 /* If free space is not available, make more free space */ 307 /* Double the amount of total space in the list */ 308 if (current_space->local_remaining<cnzi) { 309 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 310 } 311 312 /* Copy data into free space, and zero out denserows */ 313 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 314 current_space->array += cnzi; 315 current_space->local_used += cnzi; 316 current_space->local_remaining -= cnzi; 317 318 for (j=0;j<ptanzi;j++) { 319 ptadenserow[ptasparserow[j]] = 0; 320 } 321 for (j=0;j<cnzi;j++) { 322 denserow[sparserow[j]] = 0; 323 } 324 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 325 /* For now, we will recompute what is needed. */ 326 ci[i+1] = ci[i] + cnzi; 327 } 328 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 329 /* Allocate space for cj, initialize cj, and */ 330 /* destroy list of free space and other temporary array(s) */ 331 ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 332 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 333 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 334 335 /* Allocate space for ca */ 336 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 337 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 338 339 /* put together the new matrix */ 340 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 341 342 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 343 /* Since these are PETSc arrays, change flags to free them as necessary. */ 344 c = (Mat_SeqAIJ *)((*C)->data); 345 c->freedata = PETSC_TRUE; 346 c->nonew = 0; 347 348 /* Clean up. */ 349 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 350 351 ierr = PetscLogEventEnd(logkey_matapplyptap_symbolic,A,P,0,0);CHKERRQ(ierr); 352 PetscFunctionReturn(0); 353 } 354 355 /* 356 MatApplyPtAP_Numeric_SeqAIJ_SeqAIJ - Forms the numeric product of two SeqAIJ matrices 357 C = P^T * A * P; 358 Note: C must have been created by calling MatApplyPtAP_Symbolic_SeqAIJ. 359 */ 360 #undef __FUNCT__ 361 #define __FUNCT__ "MatApplyPtAP_Numeric_SeqAIJ_SeqAIJ" 362 int MatApplyPtAP_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) { 363 int ierr,flops=0; 364 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 365 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 366 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 367 int aishift=a->indexshift,pishift=p->indexshift,cishift=c->indexshift; 368 int *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 369 int *ci=c->i,*cj=c->j,*cjj; 370 int an=A->N,am=A->M,pn=P->N,pm=P->M,cn=C->N,cm=C->M; 371 int i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 372 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 373 374 PetscFunctionBegin; 375 376 /* This error checking should be unnecessary if the symbolic was performed */ 377 if (aishift || pishift || cishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 378 if (pn!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn,cm); 379 if (pm!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm,an); 380 if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an); 381 if (pn!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn, cn); 382 383 /* Set up timers */ 384 if (!logkey_matapplyptap_numeric) { 385 ierr = PetscLogEventRegister(&logkey_matapplyptap_numeric,"MatApplyPtAP_Numeric",MAT_COOKIE);CHKERRQ(ierr); 386 } 387 ierr = PetscLogEventBegin(logkey_matapplyptap_numeric,A,P,C,0);CHKERRQ(ierr); 388 389 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr); 390 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr); 391 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 392 393 apj = (int *)(apa + cn); 394 apjdense = apj + cn; 395 396 for (i=0;i<am;i++) { 397 /* Form sparse row of A*P */ 398 anzi = ai[i+1] - ai[i]; 399 apnzj = 0; 400 for (j=0;j<anzi;j++) { 401 prow = *aj++; 402 pnzj = pi[prow+1] - pi[prow]; 403 pjj = pj + pi[prow]; 404 paj = pa + pi[prow]; 405 for (k=0;k<pnzj;k++) { 406 if (!apjdense[pjj[k]]) { 407 apjdense[pjj[k]] = -1; 408 apj[apnzj++] = pjj[k]; 409 } 410 apa[pjj[k]] += (*aa)*paj[k]; 411 } 412 flops += 2*pnzj; 413 aa++; 414 } 415 416 /* Sort the j index array for quick sparse axpy. */ 417 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 418 419 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 420 pnzi = pi[i+1] - pi[i]; 421 for (j=0;j<pnzi;j++) { 422 nextap = 0; 423 crow = *pJ++; 424 cjj = cj + ci[crow]; 425 caj = ca + ci[crow]; 426 /* Perform sparse axpy operation. Note cjj includes apj. */ 427 for (k=0;nextap<apnzj;k++) { 428 if (cjj[k]==apj[nextap]) { 429 caj[k] += (*pA)*apa[apj[nextap++]]; 430 } 431 } 432 flops += 2*apnzj; 433 pA++; 434 } 435 436 /* Zero the current row info for A*P */ 437 for (j=0;j<apnzj;j++) { 438 apa[apj[j]] = 0.; 439 apjdense[apj[j]] = 0; 440 } 441 } 442 443 /* Assemble the final matrix and clean up */ 444 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 445 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 446 ierr = PetscFree(apa);CHKERRQ(ierr); 447 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 448 ierr = PetscLogEventEnd(logkey_matapplyptap_numeric,A,P,C,0);CHKERRQ(ierr); 449 450 PetscFunctionReturn(0); 451 } 452 453 454 #undef __FUNCT__ 455 #define __FUNCT__ "MatApplyPtAP_SeqAIJ_SeqAIJ" 456 int MatApplyPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) { 457 int ierr; 458 459 PetscFunctionBegin; 460 if (!logkey_matapplyptap) { 461 ierr = PetscLogEventRegister(&logkey_matapplyptap,"MatApplyPtAP",MAT_COOKIE);CHKERRQ(ierr); 462 } 463 ierr = PetscLogEventBegin(logkey_matapplyptap,A,P,0,0);CHKERRQ(ierr); 464 465 ierr = MatApplyPtAP_Symbolic_SeqAIJ_SeqAIJ(A,P,C);CHKERRQ(ierr); 466 ierr = MatApplyPtAP_Numeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 467 468 ierr = PetscLogEventEnd(logkey_matapplyptap,A,P,0,0);CHKERRQ(ierr); 469 PetscFunctionReturn(0); 470 } 471 472 /* 473 MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices 474 C = P * A * P^T; 475 476 Note: C is assumed to be uncreated. 477 If this is not the case, Destroy C before calling this routine. 478 */ 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ" 481 int MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) { 482 /* Note: This code is virtually identical to that of MatApplyPtAP_SeqAIJ_Symbolic */ 483 /* and MatMatMult_SeqAIJ_SeqAIJ_Symbolic. Perhaps they could be merged nicely. */ 484 int ierr; 485 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 486 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 487 int aishift=a->indexshift,pishift=p->indexshift; 488 int *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pti,*ptj,*ptjj; 489 int *ci,*cj,*paj,*padenserow,*pasparserow,*denserow,*sparserow; 490 int an=A->N,am=A->M,pn=P->N,pm=P->M; 491 int i,j,k,pnzi,arow,anzj,panzi,ptrow,ptnzj,cnzi; 492 MatScalar *ca; 493 494 PetscFunctionBegin; 495 496 /* some error checking which could be moved into interface layer */ 497 if (aishift || pishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 498 if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn,am); 499 if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an); 500 501 /* Set up timers */ 502 if (!logkey_matapplypapt_symbolic) { 503 ierr = PetscLogEventRegister(&logkey_matapplypapt_symbolic,"MatApplyPAPt_Symbolic",MAT_COOKIE);CHKERRQ(ierr); 504 } 505 ierr = PetscLogEventBegin(logkey_matapplypapt_symbolic,A,P,0,0);CHKERRQ(ierr); 506 507 /* Create ij structure of P^T */ 508 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 509 510 /* Allocate ci array, arrays for fill computation and */ 511 /* free space for accumulating nonzero column info */ 512 ierr = PetscMalloc(((pm+1)*1)*sizeof(int),&ci);CHKERRQ(ierr); 513 ci[0] = 0; 514 515 ierr = PetscMalloc((2*an+2*pm+1)*sizeof(int),&padenserow);CHKERRQ(ierr); 516 ierr = PetscMemzero(padenserow,(2*an+2*pm+1)*sizeof(int));CHKERRQ(ierr); 517 pasparserow = padenserow + an; 518 denserow = pasparserow + an; 519 sparserow = denserow + pm; 520 521 /* Set initial free space to be nnz(A) scaled by aspect ratio of Pt. */ 522 /* This should be reasonable if sparsity of PAPt is similar to that of A. */ 523 ierr = GetMoreSpace((ai[am]/pn)*pm,&free_space); 524 current_space = free_space; 525 526 /* Determine fill for each row of C: */ 527 for (i=0;i<pm;i++) { 528 pnzi = pi[i+1] - pi[i]; 529 panzi = 0; 530 /* Get symbolic sparse row of PA: */ 531 for (j=0;j<pnzi;j++) { 532 arow = *pj++; 533 anzj = ai[arow+1] - ai[arow]; 534 ajj = aj + ai[arow]; 535 for (k=0;k<anzj;k++) { 536 if (!padenserow[ajj[k]]) { 537 padenserow[ajj[k]] = -1; 538 pasparserow[panzi++] = ajj[k]; 539 } 540 } 541 } 542 /* Using symbolic row of PA, determine symbolic row of C: */ 543 paj = pasparserow; 544 cnzi = 0; 545 for (j=0;j<panzi;j++) { 546 ptrow = *paj++; 547 ptnzj = pti[ptrow+1] - pti[ptrow]; 548 ptjj = ptj + pti[ptrow]; 549 for (k=0;k<ptnzj;k++) { 550 if (!denserow[ptjj[k]]) { 551 denserow[ptjj[k]] = -1; 552 sparserow[cnzi++] = ptjj[k]; 553 } 554 } 555 } 556 557 /* sort sparse representation */ 558 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 559 560 /* If free space is not available, make more free space */ 561 /* Double the amount of total space in the list */ 562 if (current_space->local_remaining<cnzi) { 563 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 564 } 565 566 /* Copy data into free space, and zero out dense row */ 567 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 568 current_space->array += cnzi; 569 current_space->local_used += cnzi; 570 current_space->local_remaining -= cnzi; 571 572 for (j=0;j<panzi;j++) { 573 padenserow[pasparserow[j]] = 0; 574 } 575 for (j=0;j<cnzi;j++) { 576 denserow[sparserow[j]] = 0; 577 } 578 ci[i+1] = ci[i] + cnzi; 579 } 580 /* column indices are in the list of free space */ 581 /* Allocate space for cj, initialize cj, and */ 582 /* destroy list of free space and other temporary array(s) */ 583 ierr = PetscMalloc((ci[pm]+1)*sizeof(int),&cj);CHKERRQ(ierr); 584 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 585 ierr = PetscFree(padenserow);CHKERRQ(ierr); 586 587 /* Allocate space for ca */ 588 ierr = PetscMalloc((ci[pm]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 589 ierr = PetscMemzero(ca,(ci[pm]+1)*sizeof(MatScalar));CHKERRQ(ierr); 590 591 /* put together the new matrix */ 592 ierr = MatCreateSeqAIJWithArrays(A->comm,pm,pm,ci,cj,ca,C);CHKERRQ(ierr); 593 594 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 595 /* Since these are PETSc arrays, change flags to free them as necessary. */ 596 c = (Mat_SeqAIJ *)((*C)->data); 597 c->freedata = PETSC_TRUE; 598 c->nonew = 0; 599 600 /* Clean up. */ 601 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 602 603 ierr = PetscLogEventEnd(logkey_matapplypapt_symbolic,A,P,0,0);CHKERRQ(ierr); 604 PetscFunctionReturn(0); 605 } 606 607 /* 608 MatApplyPAPt_Numeric_SeqAIJ - Forms the numeric product of two SeqAIJ matrices 609 C = P * A * P^T; 610 Note: C must have been created by calling MatApplyPAPt_Symbolic_SeqAIJ. 611 */ 612 #undef __FUNCT__ 613 #define __FUNCT__ "MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ" 614 int MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) { 615 int ierr,flops=0; 616 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 617 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 618 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 619 int aishift=a->indexshift,pishift=p->indexshift,cishift=c->indexshift; 620 int *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj=p->j,*paj,*pajdense,*ptj; 621 int *ci=c->i,*cj=c->j; 622 int an=A->N,am=A->M,pn=P->N,pm=P->M,cn=C->N,cm=C->M; 623 int i,j,k,k1,k2,pnzi,anzj,panzj,arow,ptcol,ptnzj,cnzi; 624 MatScalar *aa=a->a,*pa=p->a,*pta=p->a,*ptaj,*paa,*aaj,*ca=c->a,sum; 625 626 PetscFunctionBegin; 627 628 /* This error checking should be unnecessary if the symbolic was performed */ 629 if (aishift || pishift || cishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 630 if (pm!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm,cm); 631 if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn,am); 632 if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an); 633 if (pm!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm, cn); 634 635 /* Set up timers */ 636 if (!logkey_matapplypapt_numeric) { 637 ierr = PetscLogEventRegister(&logkey_matapplypapt_numeric,"MatApplyPAPt_Numeric",MAT_COOKIE);CHKERRQ(ierr); 638 } 639 ierr = PetscLogEventBegin(logkey_matapplypapt_numeric,A,P,C,0);CHKERRQ(ierr); 640 641 ierr = PetscMalloc(an*(sizeof(MatScalar)+2*sizeof(int)),&paa);CHKERRQ(ierr); 642 ierr = PetscMemzero(paa,an*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr); 643 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 644 645 paj = (int *)(paa + an); 646 pajdense = paj + an; 647 648 for (i=0;i<pm;i++) { 649 /* Form sparse row of P*A */ 650 pnzi = pi[i+1] - pi[i]; 651 panzj = 0; 652 for (j=0;j<pnzi;j++) { 653 arow = *pj++; 654 anzj = ai[arow+1] - ai[arow]; 655 ajj = aj + ai[arow]; 656 aaj = aa + ai[arow]; 657 for (k=0;k<anzj;k++) { 658 if (!pajdense[ajj[k]]) { 659 pajdense[ajj[k]] = -1; 660 paj[panzj++] = ajj[k]; 661 } 662 paa[ajj[k]] += (*pa)*aaj[k]; 663 } 664 flops += 2*anzj; 665 pa++; 666 } 667 668 /* Sort the j index array for quick sparse axpy. */ 669 ierr = PetscSortInt(panzj,paj);CHKERRQ(ierr); 670 671 /* Compute P*A*P^T using sparse inner products. */ 672 /* Take advantage of pre-computed (i,j) of C for locations of non-zeros. */ 673 cnzi = ci[i+1] - ci[i]; 674 for (j=0;j<cnzi;j++) { 675 /* Form sparse inner product of current row of P*A with (*cj++) col of P^T. */ 676 ptcol = *cj++; 677 ptnzj = pi[ptcol+1] - pi[ptcol]; 678 ptj = pjj + pi[ptcol]; 679 ptaj = pta + pi[ptcol]; 680 sum = 0.; 681 k1 = 0; 682 k2 = 0; 683 while ((k1<panzj) && (k2<ptnzj)) { 684 if (paj[k1]==ptj[k2]) { 685 sum += paa[paj[k1++]]*ptaj[k2++]; 686 } else if (paj[k1] < ptj[k2]) { 687 k1++; 688 } else /* if (paj[k1] > ptj[k2]) */ { 689 k2++; 690 } 691 } 692 *ca++ = sum; 693 } 694 695 /* Zero the current row info for P*A */ 696 for (j=0;j<panzj;j++) { 697 paa[paj[j]] = 0.; 698 pajdense[paj[j]] = 0; 699 } 700 } 701 702 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 703 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 704 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 705 ierr = PetscLogEventEnd(logkey_matapplypapt_numeric,A,P,C,0);CHKERRQ(ierr); 706 PetscFunctionReturn(0); 707 } 708 709 #undef __FUNCT__ 710 #define __FUNCT__ "MatApplyPAPt_SeqAIJ_SeqAIJ" 711 int MatApplyPAPt_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) { 712 int ierr; 713 714 PetscFunctionBegin; 715 if (!logkey_matapplypapt) { 716 ierr = PetscLogEventRegister(&logkey_matapplypapt,"MatApplyPAPt",MAT_COOKIE);CHKERRQ(ierr); 717 } 718 ierr = PetscLogEventBegin(logkey_matapplypapt,A,P,0,0);CHKERRQ(ierr); 719 ierr = MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(A,P,C);CHKERRQ(ierr); 720 ierr = MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 721 ierr = PetscLogEventEnd(logkey_matapplypapt,A,P,0,0);CHKERRQ(ierr); 722 PetscFunctionReturn(0); 723 } 724