1 /* 2 Defines projective product routines where A is a AIJ matrix 3 C = P^T * A * P 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7 #include "src/mat/utils/freespace.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 10 EXTERN PetscErrorCode RegisterMatMatMultRoutines_Private(Mat); 11 12 static int MAT_PtAPSymbolic = 0; 13 static int MAT_PtAPNumeric = 0; 14 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatPtAP" 17 /*@ 18 MatPtAP - Creates the matrix projection C = P^T * A * P 19 20 Collective on Mat 21 22 Input Parameters: 23 + A - the matrix 24 - P - the projection matrix 25 26 Output Parameters: 27 . C - the product matrix 28 29 Notes: 30 C will be created and must be destroyed by the user with MatDestroy(). 31 32 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 33 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 34 35 Level: intermediate 36 37 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 38 @*/ 39 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) { 40 PetscErrorCode ierr; 41 42 PetscFunctionBegin; 43 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 44 PetscValidType(A,1); 45 MatPreallocated(A); 46 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 47 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 48 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 49 PetscValidType(P,2); 50 MatPreallocated(P); 51 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 52 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 53 PetscValidPointer(C,3); 54 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 55 56 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 57 58 ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 59 ierr = (*A->ops->matptap)(A,P,scall,fill,C);CHKERRQ(ierr); 60 ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 61 PetscFunctionReturn(0); 62 } 63 64 #undef __FUNCT__ 65 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 66 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 67 { 68 PetscErrorCode ierr; 69 PetscFunctionBegin; 70 if (scall == MAT_INITIAL_MATRIX){ 71 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 72 } 73 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 74 PetscFunctionReturn(0); 75 } 76 77 #undef __FUNCT__ 78 #define __FUNCT__ "MatPtAPSymbolic" 79 /*@ 80 MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 81 82 Collective on Mat 83 84 Input Parameters: 85 + A - the matrix 86 - P - the projection matrix 87 88 Output Parameters: 89 . C - the (i,j) structure of the product matrix 90 91 Notes: 92 C will be created and must be destroyed by the user with MatDestroy(). 93 94 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 95 which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 96 this (i,j) structure by calling MatPtAPNumeric(). 97 98 Level: intermediate 99 100 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 101 @*/ 102 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) { 103 PetscErrorCode ierr; 104 char funct[80]; 105 int (*f)(Mat,Mat,Mat*); 106 107 PetscFunctionBegin; 108 109 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 110 PetscValidType(A,1); 111 MatPreallocated(A); 112 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 113 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 114 115 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 116 PetscValidType(P,2); 117 MatPreallocated(P); 118 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 119 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 120 121 PetscValidPointer(C,3); 122 123 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 124 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 125 126 /* Currently only _seqaij_seqaij is implemented, so just query for it. */ 127 /* When other implementations exist, attack the multiple dispatch problem. */ 128 ierr = PetscStrcpy(funct,"MatPtAPSymbolic_seqaij_seqaij");CHKERRQ(ierr); 129 ierr = PetscObjectQueryFunction((PetscObject)P,funct,(PetscVoidFunction)&f);CHKERRQ(ierr); 130 if (!f) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic is not supported for P of type %s",P->type_name); 131 ierr = PetscObjectQueryFunction((PetscObject)A,funct,(PetscVoidFunction)&f);CHKERRQ(ierr); 132 if (!f) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic is not supported for A of type %s",A->type_name); 133 134 ierr = (*f)(A,P,C);CHKERRQ(ierr); 135 136 PetscFunctionReturn(0); 137 } 138 139 EXTERN_C_BEGIN 140 #undef __FUNCT__ 141 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 142 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) { 143 PetscErrorCode ierr; 144 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 145 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 146 int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 147 int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 148 int an=A->N,am=A->M,pn=P->N,pm=P->M; 149 int i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 150 MatScalar *ca; 151 152 PetscFunctionBegin; 153 154 /* Start timer */ 155 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 156 157 /* Get ij structure of P^T */ 158 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 159 ptJ=ptj; 160 161 /* Allocate ci array, arrays for fill computation and */ 162 /* free space for accumulating nonzero column info */ 163 ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr); 164 ci[0] = 0; 165 166 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 167 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 168 ptasparserow = ptadenserow + an; 169 denserow = ptasparserow + an; 170 sparserow = denserow + pn; 171 172 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 173 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 174 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 175 current_space = free_space; 176 177 /* Determine symbolic info for each row of C: */ 178 for (i=0;i<pn;i++) { 179 ptnzi = pti[i+1] - pti[i]; 180 ptanzi = 0; 181 /* Determine symbolic row of PtA: */ 182 for (j=0;j<ptnzi;j++) { 183 arow = *ptJ++; 184 anzj = ai[arow+1] - ai[arow]; 185 ajj = aj + ai[arow]; 186 for (k=0;k<anzj;k++) { 187 if (!ptadenserow[ajj[k]]) { 188 ptadenserow[ajj[k]] = -1; 189 ptasparserow[ptanzi++] = ajj[k]; 190 } 191 } 192 } 193 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 194 ptaj = ptasparserow; 195 cnzi = 0; 196 for (j=0;j<ptanzi;j++) { 197 prow = *ptaj++; 198 pnzj = pi[prow+1] - pi[prow]; 199 pjj = pj + pi[prow]; 200 for (k=0;k<pnzj;k++) { 201 if (!denserow[pjj[k]]) { 202 denserow[pjj[k]] = -1; 203 sparserow[cnzi++] = pjj[k]; 204 } 205 } 206 } 207 208 /* sort sparserow */ 209 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 210 211 /* If free space is not available, make more free space */ 212 /* Double the amount of total space in the list */ 213 if (current_space->local_remaining<cnzi) { 214 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 215 } 216 217 /* Copy data into free space, and zero out denserows */ 218 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 219 current_space->array += cnzi; 220 current_space->local_used += cnzi; 221 current_space->local_remaining -= cnzi; 222 223 for (j=0;j<ptanzi;j++) { 224 ptadenserow[ptasparserow[j]] = 0; 225 } 226 for (j=0;j<cnzi;j++) { 227 denserow[sparserow[j]] = 0; 228 } 229 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 230 /* For now, we will recompute what is needed. */ 231 ci[i+1] = ci[i] + cnzi; 232 } 233 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 234 /* Allocate space for cj, initialize cj, and */ 235 /* destroy list of free space and other temporary array(s) */ 236 ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 237 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 238 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 239 240 /* Allocate space for ca */ 241 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 242 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 243 244 /* put together the new matrix */ 245 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 246 247 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 248 /* Since these are PETSc arrays, change flags to free them as necessary. */ 249 c = (Mat_SeqAIJ *)((*C)->data); 250 c->freedata = PETSC_TRUE; 251 c->nonew = 0; 252 253 /* Clean up. */ 254 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 255 256 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 257 PetscFunctionReturn(0); 258 } 259 EXTERN_C_END 260 261 #include "src/mat/impls/maij/maij.h" 262 EXTERN_C_BEGIN 263 #undef __FUNCT__ 264 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 265 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) { 266 /* This routine requires testing -- I don't think it works. */ 267 PetscErrorCode ierr; 268 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 269 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 270 Mat P=pp->AIJ; 271 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 272 int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 273 int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 274 int an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 275 int i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 276 MatScalar *ca; 277 278 PetscFunctionBegin; 279 /* Start timer */ 280 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 281 282 /* Get ij structure of P^T */ 283 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 284 285 /* Allocate ci array, arrays for fill computation and */ 286 /* free space for accumulating nonzero column info */ 287 ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr); 288 ci[0] = 0; 289 290 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 291 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 292 ptasparserow = ptadenserow + an; 293 denserow = ptasparserow + an; 294 sparserow = denserow + pn; 295 296 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 297 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 298 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 299 current_space = free_space; 300 301 /* Determine symbolic info for each row of C: */ 302 for (i=0;i<pn/ppdof;i++) { 303 ptnzi = pti[i+1] - pti[i]; 304 ptanzi = 0; 305 ptJ = ptj + pti[i]; 306 for (dof=0;dof<ppdof;dof++) { 307 /* Determine symbolic row of PtA: */ 308 for (j=0;j<ptnzi;j++) { 309 arow = ptJ[j] + dof; 310 anzj = ai[arow+1] - ai[arow]; 311 ajj = aj + ai[arow]; 312 for (k=0;k<anzj;k++) { 313 if (!ptadenserow[ajj[k]]) { 314 ptadenserow[ajj[k]] = -1; 315 ptasparserow[ptanzi++] = ajj[k]; 316 } 317 } 318 } 319 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 320 ptaj = ptasparserow; 321 cnzi = 0; 322 for (j=0;j<ptanzi;j++) { 323 pdof = *ptaj%dof; 324 prow = (*ptaj++)/dof; 325 pnzj = pi[prow+1] - pi[prow]; 326 pjj = pj + pi[prow]; 327 for (k=0;k<pnzj;k++) { 328 if (!denserow[pjj[k]+pdof]) { 329 denserow[pjj[k]+pdof] = -1; 330 sparserow[cnzi++] = pjj[k]+pdof; 331 } 332 } 333 } 334 335 /* sort sparserow */ 336 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 337 338 /* If free space is not available, make more free space */ 339 /* Double the amount of total space in the list */ 340 if (current_space->local_remaining<cnzi) { 341 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 342 } 343 344 /* Copy data into free space, and zero out denserows */ 345 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 346 current_space->array += cnzi; 347 current_space->local_used += cnzi; 348 current_space->local_remaining -= cnzi; 349 350 for (j=0;j<ptanzi;j++) { 351 ptadenserow[ptasparserow[j]] = 0; 352 } 353 for (j=0;j<cnzi;j++) { 354 denserow[sparserow[j]] = 0; 355 } 356 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 357 /* For now, we will recompute what is needed. */ 358 ci[i+1+dof] = ci[i+dof] + cnzi; 359 } 360 } 361 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 362 /* Allocate space for cj, initialize cj, and */ 363 /* destroy list of free space and other temporary array(s) */ 364 ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 365 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 366 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 367 368 /* Allocate space for ca */ 369 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 370 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 371 372 /* put together the new matrix */ 373 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 374 375 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 376 /* Since these are PETSc arrays, change flags to free them as necessary. */ 377 c = (Mat_SeqAIJ *)((*C)->data); 378 c->freedata = PETSC_TRUE; 379 c->nonew = 0; 380 381 /* Clean up. */ 382 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 383 384 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 EXTERN_C_END 388 389 #undef __FUNCT__ 390 #define __FUNCT__ "MatPtAPNumeric" 391 /*@ 392 MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 393 394 Collective on Mat 395 396 Input Parameters: 397 + A - the matrix 398 - P - the projection matrix 399 400 Output Parameters: 401 . C - the product matrix 402 403 Notes: 404 C must have been created by calling MatPtAPSymbolic and must be destroyed by 405 the user using MatDeatroy(). 406 407 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 408 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 409 410 Level: intermediate 411 412 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 413 @*/ 414 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) { 415 PetscErrorCode ierr; 416 char funct[80]; 417 int (*f)(Mat,Mat,Mat); 418 419 PetscFunctionBegin; 420 421 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 422 PetscValidType(A,1); 423 MatPreallocated(A); 424 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 425 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 426 427 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 428 PetscValidType(P,2); 429 MatPreallocated(P); 430 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 431 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 432 433 PetscValidHeaderSpecific(C,MAT_COOKIE,3); 434 PetscValidType(C,3); 435 MatPreallocated(C); 436 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 437 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 438 439 if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M); 440 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 441 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 442 if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N); 443 444 /* Currently only _seqaij_seqaij is implemented, so just query for it. */ 445 /* When other implementations exist, attack the multiple dispatch problem. */ 446 ierr = PetscStrcpy(funct,"MatPtAPNumeric_seqaij_seqaij");CHKERRQ(ierr); 447 ierr = PetscObjectQueryFunction((PetscObject)P,funct,(PetscVoidFunction)&f);CHKERRQ(ierr); 448 if (!f) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric is not supported for P of type %s",P->type_name); 449 ierr = PetscObjectQueryFunction((PetscObject)A,funct,(PetscVoidFunction)&f);CHKERRQ(ierr); 450 if (!f) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric is not supported for A of type %s",A->type_name); 451 452 ierr = (*f)(A,P,C);CHKERRQ(ierr); 453 454 PetscFunctionReturn(0); 455 } 456 457 EXTERN_C_BEGIN 458 #undef __FUNCT__ 459 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 460 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 461 { 462 PetscErrorCode ierr; 463 int flops=0; 464 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 465 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 466 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 467 int *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 468 int *ci=c->i,*cj=c->j,*cjj; 469 int am=A->M,cn=C->N,cm=C->M; 470 int i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 471 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 472 473 PetscFunctionBegin; 474 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,C,0);CHKERRQ(ierr); 475 476 /* Allocate temporary array for storage of one row of A*P */ 477 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr); 478 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr); 479 480 apj = (int*)(apa + cn); 481 apjdense = apj + cn; 482 483 /* Clear old values in C */ 484 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 485 486 for (i=0;i<am;i++) { 487 /* Form sparse row of A*P */ 488 anzi = ai[i+1] - ai[i]; 489 apnzj = 0; 490 for (j=0;j<anzi;j++) { 491 prow = *aj++; 492 pnzj = pi[prow+1] - pi[prow]; 493 pjj = pj + pi[prow]; 494 paj = pa + pi[prow]; 495 for (k=0;k<pnzj;k++) { 496 if (!apjdense[pjj[k]]) { 497 apjdense[pjj[k]] = -1; 498 apj[apnzj++] = pjj[k]; 499 } 500 apa[pjj[k]] += (*aa)*paj[k]; 501 } 502 flops += 2*pnzj; 503 aa++; 504 } 505 506 /* Sort the j index array for quick sparse axpy. */ 507 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 508 509 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 510 pnzi = pi[i+1] - pi[i]; 511 for (j=0;j<pnzi;j++) { 512 nextap = 0; 513 crow = *pJ++; 514 cjj = cj + ci[crow]; 515 caj = ca + ci[crow]; 516 /* Perform sparse axpy operation. Note cjj includes apj. */ 517 for (k=0;nextap<apnzj;k++) { 518 if (cjj[k]==apj[nextap]) { 519 caj[k] += (*pA)*apa[apj[nextap++]]; 520 } 521 } 522 flops += 2*apnzj; 523 pA++; 524 } 525 526 /* Zero the current row info for A*P */ 527 for (j=0;j<apnzj;j++) { 528 apa[apj[j]] = 0.; 529 apjdense[apj[j]] = 0; 530 } 531 } 532 533 /* Assemble the final matrix and clean up */ 534 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 535 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 536 ierr = PetscFree(apa);CHKERRQ(ierr); 537 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 538 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,C,0);CHKERRQ(ierr); 539 540 PetscFunctionReturn(0); 541 } 542 EXTERN_C_END 543 544 #undef __FUNCT__ 545 #define __FUNCT__ "RegisterPtAPRoutines_Private" 546 PetscErrorCode RegisterPtAPRoutines_Private(Mat A) 547 { 548 PetscErrorCode ierr; 549 550 PetscFunctionBegin; 551 if (!MAT_PtAPSymbolic) { 552 ierr = PetscLogEventRegister(&MAT_PtAPSymbolic,"MatPtAPSymbolic",MAT_COOKIE);CHKERRQ(ierr); 553 } 554 if (!MAT_PtAPNumeric) { 555 ierr = PetscLogEventRegister(&MAT_PtAPNumeric,"MatPtAPNumeric",MAT_COOKIE);CHKERRQ(ierr); 556 } 557 ierr = RegisterMatMatMultRoutines_Private(A);CHKERRQ(ierr); 558 559 PetscFunctionReturn(0); 560 } 561