1 /* 2 Defines projective product routines where A is a SeqAIJ matrix 3 C = P^T * A * P 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7 #include "src/mat/utils/freespace.h" 8 9 EXTERN int MatSeqAIJPtAP(Mat,Mat,Mat*); 10 EXTERN int MatSeqAIJPtAPSymbolic(Mat,Mat,Mat*); 11 EXTERN int MatSeqAIJPtAPNumeric(Mat,Mat,Mat); 12 EXTERN int RegisterMatMatMultRoutines_Private(Mat); 13 14 static int MATSeqAIJ_PtAP = 0; 15 static int MATSeqAIJ_PtAPSymbolic = 0; 16 static int MATSeqAIJ_PtAPNumeric = 0; 17 18 #undef __FUNCT__ 19 #define __FUNCT__ "MatSeqAIJPtAP" 20 /*@ 21 MatSeqAIJPtAP - Creates the matrix projection C = P^T * A * P 22 23 Collective on Mat 24 25 Input Parameters: 26 + A - the matrix 27 - P - the projection matrix 28 29 Output Parameters: 30 . C - the product matrix 31 32 Notes: 33 C will be created and must be destroyed by the user with MatDestroy(). 34 35 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 36 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 37 38 Level: intermediate 39 40 .seealso: MatSeqAIJPtAPSymbolic(),MatSeqAIJPtAPNumeric(),MatMatMult() 41 @*/ 42 int MatSeqAIJPtAP(Mat A,Mat P,Mat *C) { 43 int ierr; 44 char funct[80]; 45 int (*f)(Mat,Mat,Mat); 46 47 PetscFunctionBegin; 48 ierr = PetscLogEventBegin(MATSeqAIJ_PtAP,A,P,0,0);CHKERRQ(ierr); 49 50 ierr = MatSeqAIJPtAPSymbolic(A,P,C);CHKERRQ(ierr); 51 52 /* Avoid additional error checking included in */ 53 /* ierr = MatSeqAIJApplyPtAPNumeric(A,P,*C);CHKERRQ(ierr); */ 54 55 /* Currently only _seqaij_seqaij is implemented, so just query for it in A and P. */ 56 /* When other implementations exist, attack the multiple dispatch problem. */ 57 ierr = PetscStrcpy(funct,"MatApplyPtAPNumeric_seqaij_seqaij");CHKERRQ(ierr); 58 ierr = PetscObjectQueryFunction((PetscObject)P,funct,(PetscVoidFunction)&f);CHKERRQ(ierr); 59 if (!f) SETERRQ1(PETSC_ERR_SUP,"MatSeqAIJPtAPNumeric is not supported for P of type %s",P->type_name); 60 ierr = PetscObjectQueryFunction((PetscObject)A,funct,(PetscVoidFunction)&f);CHKERRQ(ierr); 61 if (!f) SETERRQ1(PETSC_ERR_SUP,"MatSeqAIJPtAPNumeric is not supported for A of type %s",A->type_name); 62 63 ierr = (*f)(A,P,*C);CHKERRQ(ierr); 64 65 ierr = PetscLogEventEnd(MATSeqAIJ_PtAP,A,P,0,0);CHKERRQ(ierr); 66 PetscFunctionReturn(0); 67 } 68 69 #undef __FUNCT__ 70 #define __FUNCT__ "MatSeqAIJPtAPSymbolic" 71 /*@ 72 MatSeqAIJPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 73 74 Collective on Mat 75 76 Input Parameters: 77 + A - the matrix 78 - P - the projection matrix 79 80 Output Parameters: 81 . C - the (i,j) structure of the product matrix 82 83 Notes: 84 C will be created and must be destroyed by the user with MatDestroy(). 85 86 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 87 which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 88 this (i,j) structure by calling MatSeqAIJPtAPNumeric(). 89 90 Level: intermediate 91 92 .seealso: MatSeqAIJPtAP(),MatSeqAIJPtAPNumeric(),MatMatMultSymbolic() 93 @*/ 94 int MatSeqAIJPtAPSymbolic(Mat A,Mat P,Mat *C) { 95 int ierr; 96 char funct[80]; 97 int (*f)(Mat,Mat,Mat*); 98 99 PetscFunctionBegin; 100 101 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 102 PetscValidType(A,1); 103 MatPreallocated(A); 104 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 105 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 106 107 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 108 PetscValidType(P,2); 109 MatPreallocated(P); 110 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 111 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 112 113 PetscValidPointer(C,3); 114 115 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 116 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 117 118 /* Currently only _seqaij_seqaij is implemented, so just query for it. */ 119 /* When other implementations exist, attack the multiple dispatch problem. */ 120 ierr = PetscStrcpy(funct,"MatApplyPtAPSymbolic_seqaij_seqaij");CHKERRQ(ierr); 121 ierr = PetscObjectQueryFunction((PetscObject)P,funct,(PetscVoidFunction)&f);CHKERRQ(ierr); 122 if (!f) SETERRQ1(PETSC_ERR_SUP,"MatSeqAIJPtAPSymbolic is not supported for P of type %s",P->type_name); 123 ierr = PetscObjectQueryFunction((PetscObject)A,funct,(PetscVoidFunction)&f);CHKERRQ(ierr); 124 if (!f) SETERRQ1(PETSC_ERR_SUP,"MatSeqAIJPtAPSymbolic is not supported for A of type %s",A->type_name); 125 126 ierr = (*f)(A,P,C);CHKERRQ(ierr); 127 128 PetscFunctionReturn(0); 129 } 130 131 EXTERN_C_BEGIN 132 #undef __FUNCT__ 133 #define __FUNCT__ "MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ" 134 int MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) { 135 int ierr; 136 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 137 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 138 int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 139 int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 140 int an=A->N,am=A->M,pn=P->N,pm=P->M; 141 int i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 142 MatScalar *ca; 143 144 PetscFunctionBegin; 145 146 /* Start timer */ 147 ierr = PetscLogEventBegin(MATSeqAIJ_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 148 149 /* Get ij structure of P^T */ 150 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 151 ptJ=ptj; 152 153 /* Allocate ci array, arrays for fill computation and */ 154 /* free space for accumulating nonzero column info */ 155 ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr); 156 ci[0] = 0; 157 158 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 159 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 160 ptasparserow = ptadenserow + an; 161 denserow = ptasparserow + an; 162 sparserow = denserow + pn; 163 164 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 165 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 166 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 167 current_space = free_space; 168 169 /* Determine symbolic info for each row of C: */ 170 for (i=0;i<pn;i++) { 171 ptnzi = pti[i+1] - pti[i]; 172 ptanzi = 0; 173 /* Determine symbolic row of PtA: */ 174 for (j=0;j<ptnzi;j++) { 175 arow = *ptJ++; 176 anzj = ai[arow+1] - ai[arow]; 177 ajj = aj + ai[arow]; 178 for (k=0;k<anzj;k++) { 179 if (!ptadenserow[ajj[k]]) { 180 ptadenserow[ajj[k]] = -1; 181 ptasparserow[ptanzi++] = ajj[k]; 182 } 183 } 184 } 185 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 186 ptaj = ptasparserow; 187 cnzi = 0; 188 for (j=0;j<ptanzi;j++) { 189 prow = *ptaj++; 190 pnzj = pi[prow+1] - pi[prow]; 191 pjj = pj + pi[prow]; 192 for (k=0;k<pnzj;k++) { 193 if (!denserow[pjj[k]]) { 194 denserow[pjj[k]] = -1; 195 sparserow[cnzi++] = pjj[k]; 196 } 197 } 198 } 199 200 /* sort sparserow */ 201 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 202 203 /* If free space is not available, make more free space */ 204 /* Double the amount of total space in the list */ 205 if (current_space->local_remaining<cnzi) { 206 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 207 } 208 209 /* Copy data into free space, and zero out denserows */ 210 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 211 current_space->array += cnzi; 212 current_space->local_used += cnzi; 213 current_space->local_remaining -= cnzi; 214 215 for (j=0;j<ptanzi;j++) { 216 ptadenserow[ptasparserow[j]] = 0; 217 } 218 for (j=0;j<cnzi;j++) { 219 denserow[sparserow[j]] = 0; 220 } 221 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 222 /* For now, we will recompute what is needed. */ 223 ci[i+1] = ci[i] + cnzi; 224 } 225 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 226 /* Allocate space for cj, initialize cj, and */ 227 /* destroy list of free space and other temporary array(s) */ 228 ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 229 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 230 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 231 232 /* Allocate space for ca */ 233 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 234 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 235 236 /* put together the new matrix */ 237 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 238 239 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 240 /* Since these are PETSc arrays, change flags to free them as necessary. */ 241 c = (Mat_SeqAIJ *)((*C)->data); 242 c->freedata = PETSC_TRUE; 243 c->nonew = 0; 244 245 /* Clean up. */ 246 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 247 248 ierr = PetscLogEventEnd(MATSeqAIJ_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 249 PetscFunctionReturn(0); 250 } 251 EXTERN_C_END 252 253 #include "src/mat/impls/maij/maij.h" 254 EXTERN_C_BEGIN 255 #undef __FUNCT__ 256 #define __FUNCT__ "MatApplyPtAPSymbolic_SeqAIJ_SeqMAIJ" 257 int MatApplyPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) { 258 /* This routine requires testing -- I don't think it works. */ 259 int ierr; 260 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 261 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 262 Mat P=pp->AIJ; 263 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 264 int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 265 int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 266 int an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 267 int i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 268 MatScalar *ca; 269 270 PetscFunctionBegin; 271 /* Start timer */ 272 ierr = PetscLogEventBegin(MATSeqAIJ_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 273 274 /* Get ij structure of P^T */ 275 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 276 277 /* Allocate ci array, arrays for fill computation and */ 278 /* free space for accumulating nonzero column info */ 279 ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr); 280 ci[0] = 0; 281 282 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 283 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 284 ptasparserow = ptadenserow + an; 285 denserow = ptasparserow + an; 286 sparserow = denserow + pn; 287 288 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 289 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 290 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 291 current_space = free_space; 292 293 /* Determine symbolic info for each row of C: */ 294 for (i=0;i<pn/ppdof;i++) { 295 ptnzi = pti[i+1] - pti[i]; 296 ptanzi = 0; 297 ptJ = ptj + pti[i]; 298 for (dof=0;dof<ppdof;dof++) { 299 /* Determine symbolic row of PtA: */ 300 for (j=0;j<ptnzi;j++) { 301 arow = ptJ[j] + dof; 302 anzj = ai[arow+1] - ai[arow]; 303 ajj = aj + ai[arow]; 304 for (k=0;k<anzj;k++) { 305 if (!ptadenserow[ajj[k]]) { 306 ptadenserow[ajj[k]] = -1; 307 ptasparserow[ptanzi++] = ajj[k]; 308 } 309 } 310 } 311 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 312 ptaj = ptasparserow; 313 cnzi = 0; 314 for (j=0;j<ptanzi;j++) { 315 pdof = *ptaj%dof; 316 prow = (*ptaj++)/dof; 317 pnzj = pi[prow+1] - pi[prow]; 318 pjj = pj + pi[prow]; 319 for (k=0;k<pnzj;k++) { 320 if (!denserow[pjj[k]+pdof]) { 321 denserow[pjj[k]+pdof] = -1; 322 sparserow[cnzi++] = pjj[k]+pdof; 323 } 324 } 325 } 326 327 /* sort sparserow */ 328 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 329 330 /* If free space is not available, make more free space */ 331 /* Double the amount of total space in the list */ 332 if (current_space->local_remaining<cnzi) { 333 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 334 } 335 336 /* Copy data into free space, and zero out denserows */ 337 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 338 current_space->array += cnzi; 339 current_space->local_used += cnzi; 340 current_space->local_remaining -= cnzi; 341 342 for (j=0;j<ptanzi;j++) { 343 ptadenserow[ptasparserow[j]] = 0; 344 } 345 for (j=0;j<cnzi;j++) { 346 denserow[sparserow[j]] = 0; 347 } 348 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 349 /* For now, we will recompute what is needed. */ 350 ci[i+1+dof] = ci[i+dof] + cnzi; 351 } 352 } 353 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 354 /* Allocate space for cj, initialize cj, and */ 355 /* destroy list of free space and other temporary array(s) */ 356 ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 357 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 358 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 359 360 /* Allocate space for ca */ 361 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 362 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 363 364 /* put together the new matrix */ 365 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 366 367 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 368 /* Since these are PETSc arrays, change flags to free them as necessary. */ 369 c = (Mat_SeqAIJ *)((*C)->data); 370 c->freedata = PETSC_TRUE; 371 c->nonew = 0; 372 373 /* Clean up. */ 374 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 375 376 ierr = PetscLogEventEnd(MATSeqAIJ_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 377 PetscFunctionReturn(0); 378 } 379 EXTERN_C_END 380 381 #undef __FUNCT__ 382 #define __FUNCT__ "MatSeqAIJPtAPNumeric" 383 /*@ 384 MatSeqAIJPtAPNumeric - Computes the matrix projection C = P^T * A * P 385 386 Collective on Mat 387 388 Input Parameters: 389 + A - the matrix 390 - P - the projection matrix 391 392 Output Parameters: 393 . C - the product matrix 394 395 Notes: 396 C must have been created by calling MatSeqAIJPtAPSymbolic and must be destroyed by 397 the user using MatDeatroy(). 398 399 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 400 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 401 402 Level: intermediate 403 404 .seealso: MatSeqAIJPtAP(),MatSeqAIJPtAPSymbolic(),MatMatMultNumeric() 405 @*/ 406 int MatSeqAIJPtAPNumeric(Mat A,Mat P,Mat C) { 407 int ierr; 408 char funct[80]; 409 int (*f)(Mat,Mat,Mat); 410 411 PetscFunctionBegin; 412 413 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 414 PetscValidType(A,1); 415 MatPreallocated(A); 416 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 417 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 418 419 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 420 PetscValidType(P,2); 421 MatPreallocated(P); 422 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 423 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 424 425 PetscValidHeaderSpecific(C,MAT_COOKIE,3); 426 PetscValidType(C,3); 427 MatPreallocated(C); 428 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 429 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 430 431 if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M); 432 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 433 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 434 if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N); 435 436 /* Currently only _seqaij_seqaij is implemented, so just query for it. */ 437 /* When other implementations exist, attack the multiple dispatch problem. */ 438 ierr = PetscStrcpy(funct,"MatApplyPtAPNumeric_seqaij_seqaij");CHKERRQ(ierr); 439 ierr = PetscObjectQueryFunction((PetscObject)P,funct,(PetscVoidFunction)&f);CHKERRQ(ierr); 440 if (!f) SETERRQ1(PETSC_ERR_SUP,"MatSeqAIJPtAPNumeric is not supported for P of type %s",P->type_name); 441 ierr = PetscObjectQueryFunction((PetscObject)A,funct,(PetscVoidFunction)&f);CHKERRQ(ierr); 442 if (!f) SETERRQ1(PETSC_ERR_SUP,"MatSeqAIJPtAPNumeric is not supported for A of type %s",A->type_name); 443 444 ierr = (*f)(A,P,C);CHKERRQ(ierr); 445 446 PetscFunctionReturn(0); 447 } 448 449 EXTERN_C_BEGIN 450 #undef __FUNCT__ 451 #define __FUNCT__ "MatApplyPtAPNumeric_SeqAIJ_SeqAIJ" 452 int MatApplyPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) { 453 int ierr,flops=0; 454 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 455 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 456 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 457 int *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 458 int *ci=c->i,*cj=c->j,*cjj; 459 int am=A->M,cn=C->N,cm=C->M; 460 int i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 461 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 462 463 PetscFunctionBegin; 464 ierr = PetscLogEventBegin(MATSeqAIJ_PtAPNumeric,A,P,C,0);CHKERRQ(ierr); 465 466 /* Allocate temporary array for storage of one row of A*P */ 467 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr); 468 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr); 469 470 apj = (int*)(apa + cn); 471 apjdense = apj + cn; 472 473 /* Clear old values in C */ 474 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 475 476 for (i=0;i<am;i++) { 477 /* Form sparse row of A*P */ 478 anzi = ai[i+1] - ai[i]; 479 apnzj = 0; 480 for (j=0;j<anzi;j++) { 481 prow = *aj++; 482 pnzj = pi[prow+1] - pi[prow]; 483 pjj = pj + pi[prow]; 484 paj = pa + pi[prow]; 485 for (k=0;k<pnzj;k++) { 486 if (!apjdense[pjj[k]]) { 487 apjdense[pjj[k]] = -1; 488 apj[apnzj++] = pjj[k]; 489 } 490 apa[pjj[k]] += (*aa)*paj[k]; 491 } 492 flops += 2*pnzj; 493 aa++; 494 } 495 496 /* Sort the j index array for quick sparse axpy. */ 497 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 498 499 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 500 pnzi = pi[i+1] - pi[i]; 501 for (j=0;j<pnzi;j++) { 502 nextap = 0; 503 crow = *pJ++; 504 cjj = cj + ci[crow]; 505 caj = ca + ci[crow]; 506 /* Perform sparse axpy operation. Note cjj includes apj. */ 507 for (k=0;nextap<apnzj;k++) { 508 if (cjj[k]==apj[nextap]) { 509 caj[k] += (*pA)*apa[apj[nextap++]]; 510 } 511 } 512 flops += 2*apnzj; 513 pA++; 514 } 515 516 /* Zero the current row info for A*P */ 517 for (j=0;j<apnzj;j++) { 518 apa[apj[j]] = 0.; 519 apjdense[apj[j]] = 0; 520 } 521 } 522 523 /* Assemble the final matrix and clean up */ 524 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 525 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 526 ierr = PetscFree(apa);CHKERRQ(ierr); 527 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 528 ierr = PetscLogEventEnd(MATSeqAIJ_PtAPNumeric,A,P,C,0);CHKERRQ(ierr); 529 530 PetscFunctionReturn(0); 531 } 532 EXTERN_C_END 533 534 #undef __FUNCT__ 535 #define __FUNCT__ "RegisterApplyPtAPRoutines_Private" 536 int RegisterApplyPtAPRoutines_Private(Mat A) { 537 int ierr; 538 539 PetscFunctionBegin; 540 541 if (!MATSeqAIJ_PtAP) { 542 ierr = PetscLogEventRegister(&MATSeqAIJ_PtAP,"MatSeqAIJApplyPtAP",MAT_COOKIE);CHKERRQ(ierr); 543 } 544 545 if (!MATSeqAIJ_PtAPSymbolic) { 546 ierr = PetscLogEventRegister(&MATSeqAIJ_PtAPSymbolic,"MatSeqAIJApplyPtAPSymbolic",MAT_COOKIE);CHKERRQ(ierr); 547 } 548 ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatApplyPtAPSymbolic_seqaij_seqaij", 549 "MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ", 550 MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ);CHKERRQ(ierr); 551 552 if (!MATSeqAIJ_PtAPNumeric) { 553 ierr = PetscLogEventRegister(&MATSeqAIJ_PtAPNumeric,"MatSeqAIJApplyPtAPNumeric",MAT_COOKIE);CHKERRQ(ierr); 554 } 555 ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatApplyPtAPNumeric_seqaij_seqaij", 556 "MatApplyPtAPNumeric_SeqAIJ_SeqAIJ", 557 MatApplyPtAPNumeric_SeqAIJ_SeqAIJ);CHKERRQ(ierr); 558 ierr = RegisterMatMatMultRoutines_Private(A);CHKERRQ(ierr); 559 PetscFunctionReturn(0); 560 } 561