1 /* 2 Defines projective product routines where A is a SeqAIJ matrix 3 C = P^T * A * P 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" 7 #include "src/mat/utils/freespace.h" 8 9 int MatSeqAIJPtAP(Mat,Mat,Mat*); 10 int MatSeqAIJPtAPSymbolic(Mat,Mat,Mat*); 11 int MatSeqAIJPtAPNumeric(Mat,Mat,Mat); 12 13 static int MATSeqAIJ_PtAP = 0; 14 static int MATSeqAIJ_PtAPSymbolic = 0; 15 static int MATSeqAIJ_PtAPNumeric = 0; 16 17 /* 18 MatSeqAIJPtAP - Creates the SeqAIJ matrix product, C, 19 of SeqAIJ matrix A and matrix P: 20 C = P^T * A * P; 21 22 Note: C is assumed to be uncreated. 23 If this is not the case, Destroy C before calling this routine. 24 */ 25 #undef __FUNCT__ 26 #define __FUNCT__ "MatSeqAIJPtAP" 27 int MatSeqAIJPtAP(Mat A,Mat P,Mat *C) { 28 int ierr; 29 char funct[80]; 30 31 PetscFunctionBegin; 32 33 ierr = PetscLogEventBegin(MATSeqAIJ_PtAP,A,P,0,0);CHKERRQ(ierr); 34 35 ierr = MatSeqAIJPtAPSymbolic(A,P,C);CHKERRQ(ierr); 36 37 /* Avoid additional error checking included in */ 38 /* ierr = MatSeqAIJApplyPtAPNumeric(A,P,*C);CHKERRQ(ierr); */ 39 40 /* Query A for ApplyPtAPNumeric implementation based on types of P */ 41 ierr = PetscStrcpy(funct,"MatApplyPtAPNumeric_seqaij_");CHKERRQ(ierr); 42 ierr = PetscStrcat(funct,P->type_name);CHKERRQ(ierr); 43 ierr = PetscTryMethod(A,funct,(Mat,Mat,Mat),(A,P,*C));CHKERRQ(ierr); 44 45 ierr = PetscLogEventEnd(MATSeqAIJ_PtAP,A,P,0,0);CHKERRQ(ierr); 46 47 PetscFunctionReturn(0); 48 } 49 50 /* 51 MatSeqAIJPtAPSymbolic - Creates the (i,j) structure of the SeqAIJ matrix product, C, 52 of SeqAIJ matrix A and matrix P, according to: 53 C = P^T * A * P; 54 55 Note: C is assumed to be uncreated. 56 If this is not the case, Destroy C before calling this routine. 57 */ 58 #undef __FUNCT__ 59 #define __FUNCT__ "MatSeqAIJPtAPSymbolic" 60 int MatSeqAIJPtAPSymbolic(Mat A,Mat P,Mat *C) { 61 int ierr; 62 char funct[80]; 63 64 PetscFunctionBegin; 65 66 PetscValidPointer(C); 67 68 PetscValidHeaderSpecific(A,MAT_COOKIE); 69 PetscValidType(A); 70 MatPreallocated(A); 71 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 72 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 73 74 PetscValidHeaderSpecific(P,MAT_COOKIE); 75 PetscValidType(P); 76 MatPreallocated(P); 77 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 78 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 79 80 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 81 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 82 83 /* Query A for ApplyPtAP implementation based on types of P */ 84 ierr = PetscStrcpy(funct,"MatApplyPtAPSymbolic_seqaij_");CHKERRQ(ierr); 85 ierr = PetscStrcat(funct,P->type_name);CHKERRQ(ierr); 86 ierr = PetscTryMethod(A,funct,(Mat,Mat,Mat*),(A,P,C));CHKERRQ(ierr); 87 88 PetscFunctionReturn(0); 89 } 90 91 EXTERN_C_BEGIN 92 #undef __FUNCT__ 93 #define __FUNCT__ "MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ" 94 int MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) { 95 int ierr; 96 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 97 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 98 int aishift=a->indexshift,pishift=p->indexshift; 99 int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 100 int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 101 int an=A->N,am=A->M,pn=P->N,pm=P->M; 102 int i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 103 MatScalar *ca; 104 105 PetscFunctionBegin; 106 107 /* some error checking which could be moved into interface layer */ 108 if (aishift || pishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 109 110 /* Start timer */ 111 ierr = PetscLogEventBegin(MATSeqAIJ_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 112 113 /* Get ij structure of P^T */ 114 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 115 ptJ=ptj; 116 117 /* Allocate ci array, arrays for fill computation and */ 118 /* free space for accumulating nonzero column info */ 119 ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr); 120 ci[0] = 0; 121 122 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 123 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 124 ptasparserow = ptadenserow + an; 125 denserow = ptasparserow + an; 126 sparserow = denserow + pn; 127 128 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 129 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 130 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 131 current_space = free_space; 132 133 /* Determine symbolic info for each row of C: */ 134 for (i=0;i<pn;i++) { 135 ptnzi = pti[i+1] - pti[i]; 136 ptanzi = 0; 137 /* Determine symbolic row of PtA: */ 138 for (j=0;j<ptnzi;j++) { 139 arow = *ptJ++; 140 anzj = ai[arow+1] - ai[arow]; 141 ajj = aj + ai[arow]; 142 for (k=0;k<anzj;k++) { 143 if (!ptadenserow[ajj[k]]) { 144 ptadenserow[ajj[k]] = -1; 145 ptasparserow[ptanzi++] = ajj[k]; 146 } 147 } 148 } 149 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 150 ptaj = ptasparserow; 151 cnzi = 0; 152 for (j=0;j<ptanzi;j++) { 153 prow = *ptaj++; 154 pnzj = pi[prow+1] - pi[prow]; 155 pjj = pj + pi[prow]; 156 for (k=0;k<pnzj;k++) { 157 if (!denserow[pjj[k]]) { 158 denserow[pjj[k]] = -1; 159 sparserow[cnzi++] = pjj[k]; 160 } 161 } 162 } 163 164 /* sort sparserow */ 165 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 166 167 /* If free space is not available, make more free space */ 168 /* Double the amount of total space in the list */ 169 if (current_space->local_remaining<cnzi) { 170 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 171 } 172 173 /* Copy data into free space, and zero out denserows */ 174 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 175 current_space->array += cnzi; 176 current_space->local_used += cnzi; 177 current_space->local_remaining -= cnzi; 178 179 for (j=0;j<ptanzi;j++) { 180 ptadenserow[ptasparserow[j]] = 0; 181 } 182 for (j=0;j<cnzi;j++) { 183 denserow[sparserow[j]] = 0; 184 } 185 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 186 /* For now, we will recompute what is needed. */ 187 ci[i+1] = ci[i] + cnzi; 188 } 189 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 190 /* Allocate space for cj, initialize cj, and */ 191 /* destroy list of free space and other temporary array(s) */ 192 ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 193 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 194 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 195 196 /* Allocate space for ca */ 197 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 198 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 199 200 /* put together the new matrix */ 201 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 202 203 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 204 /* Since these are PETSc arrays, change flags to free them as necessary. */ 205 c = (Mat_SeqAIJ *)((*C)->data); 206 c->freedata = PETSC_TRUE; 207 c->nonew = 0; 208 209 /* Clean up. */ 210 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 211 212 ierr = PetscLogEventEnd(MATSeqAIJ_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 213 PetscFunctionReturn(0); 214 } 215 EXTERN_C_END 216 217 #include "src/mat/impls/maij/maij.h" 218 EXTERN_C_BEGIN 219 #undef __FUNCT__ 220 #define __FUNCT__ "MatApplyPtAPSymbolic_SeqAIJ_SeqMAIJ" 221 int MatApplyPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) { 222 int ierr; 223 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 224 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 225 Mat P=pp->AIJ; 226 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 227 int aishift=a->indexshift,pishift=p->indexshift; 228 int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 229 int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 230 int an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 231 int i,j,k,dof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 232 MatScalar *ca; 233 234 PetscFunctionBegin; 235 236 /* some error checking which could be moved into interface layer */ 237 if (aishift || pishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 238 239 /* Start timer */ 240 ierr = PetscLogEventBegin(MATSeqAIJ_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 241 242 /* Get ij structure of P^T */ 243 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 244 245 /* Allocate ci array, arrays for fill computation and */ 246 /* free space for accumulating nonzero column info */ 247 ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr); 248 ci[0] = 0; 249 250 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 251 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 252 ptasparserow = ptadenserow + an; 253 denserow = ptasparserow + an; 254 sparserow = denserow + pn; 255 256 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 257 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 258 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 259 current_space = free_space; 260 261 /* Determine symbolic info for each row of C: */ 262 for (i=0;i<pn/ppdof;i++) { 263 ptnzi = pti[i+1] - pti[i]; 264 ptanzi = 0; 265 ptJ = ptj + pti[i]; 266 for (dof=0;dof<ppdof;dof++) { 267 /* Determine symbolic row of PtA: */ 268 for (j=0;j<ptnzi;j++) { 269 arow = ptJ[j] + dof; 270 anzj = ai[arow+1] - ai[arow]; 271 ajj = aj + ai[arow]; 272 for (k=0;k<anzj;k++) { 273 if (!ptadenserow[ajj[k]]) { 274 ptadenserow[ajj[k]] = -1; 275 ptasparserow[ptanzi++] = ajj[k]; 276 } 277 } 278 } 279 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 280 ptaj = ptasparserow; 281 cnzi = 0; 282 for (j=0;j<ptanzi;j++) { 283 prow = (*ptaj++)/dof; 284 pnzj = pi[prow+1] - pi[prow]; 285 pjj = pj + pi[prow]; 286 for (k=0;k<pnzj;k++) { 287 if (!denserow[pjj[k]]) { 288 denserow[pjj[k]] = -1; 289 sparserow[cnzi++] = pjj[k]; 290 } 291 } 292 } 293 294 /* sort sparserow */ 295 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 296 297 /* If free space is not available, make more free space */ 298 /* Double the amount of total space in the list */ 299 if (current_space->local_remaining<cnzi) { 300 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 301 } 302 303 /* Copy data into free space, and zero out denserows */ 304 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 305 current_space->array += cnzi; 306 current_space->local_used += cnzi; 307 current_space->local_remaining -= cnzi; 308 309 for (j=0;j<ptanzi;j++) { 310 ptadenserow[ptasparserow[j]] = 0; 311 } 312 for (j=0;j<cnzi;j++) { 313 denserow[sparserow[j]] = 0; 314 } 315 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 316 /* For now, we will recompute what is needed. */ 317 ci[i+1+dof] = ci[i+dof] + cnzi; 318 } 319 } 320 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 321 /* Allocate space for cj, initialize cj, and */ 322 /* destroy list of free space and other temporary array(s) */ 323 ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 324 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 325 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 326 327 /* Allocate space for ca */ 328 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 329 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 330 331 /* put together the new matrix */ 332 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 333 334 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 335 /* Since these are PETSc arrays, change flags to free them as necessary. */ 336 c = (Mat_SeqAIJ *)((*C)->data); 337 c->freedata = PETSC_TRUE; 338 c->nonew = 0; 339 340 /* Clean up. */ 341 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 342 343 ierr = PetscLogEventEnd(MATSeqAIJ_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 344 PetscFunctionReturn(0); 345 } 346 EXTERN_C_END 347 348 #include "src/mat/impls/maij/maij.h" 349 EXTERN_C_BEGIN 350 #undef __FUNCT__ 351 #define __FUNCT__ "MatApplyPtAPSymbolic_SeqAIJ_SeqMAIJ" 352 int MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat PP,Mat *C) { 353 int ierr; 354 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 355 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 356 Mat P=pp->AIJ; 357 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 358 int aishift=a->indexshift,pishift=p->indexshift; 359 int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 360 int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 361 int an=A->N,am=A->M,pn=P->N,pm=P->M; 362 int i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 363 MatScalar *ca; 364 365 PetscFunctionBegin; 366 367 /* some error checking which could be moved into interface layer */ 368 if (aishift || pishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 369 370 /* Start timer */ 371 ierr = PetscLogEventBegin(MATSeqAIJ_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 372 373 /* Get ij structure of P^T */ 374 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 375 ptJ=ptj; 376 377 /* Allocate ci array, arrays for fill computation and */ 378 /* free space for accumulating nonzero column info */ 379 ierr = PetscMalloc(((pn+1)*1)*sizeof(int),&ci);CHKERRQ(ierr); 380 ci[0] = 0; 381 382 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 383 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 384 ptasparserow = ptadenserow + an; 385 denserow = ptasparserow + an; 386 sparserow = denserow + pn; 387 388 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 389 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 390 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 391 current_space = free_space; 392 393 /* Determine symbolic info for each row of C: */ 394 for (i=0;i<pn;i++) { 395 ptnzi = pti[i+1] - pti[i]; 396 ptanzi = 0; 397 /* Determine symbolic row of PtA: */ 398 for (j=0;j<ptnzi;j++) { 399 arow = *ptJ++; 400 anzj = ai[arow+1] - ai[arow]; 401 ajj = aj + ai[arow]; 402 for (k=0;k<anzj;k++) { 403 if (!ptadenserow[ajj[k]]) { 404 ptadenserow[ajj[k]] = -1; 405 ptasparserow[ptanzi++] = ajj[k]; 406 } 407 } 408 } 409 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 410 ptaj = ptasparserow; 411 cnzi = 0; 412 for (j=0;j<ptanzi;j++) { 413 prow = *ptaj++; 414 pnzj = pi[prow+1] - pi[prow]; 415 pjj = pj + pi[prow]; 416 for (k=0;k<pnzj;k++) { 417 if (!denserow[pjj[k]]) { 418 denserow[pjj[k]] = -1; 419 sparserow[cnzi++] = pjj[k]; 420 } 421 } 422 } 423 424 /* sort sparserow */ 425 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 426 427 /* If free space is not available, make more free space */ 428 /* Double the amount of total space in the list */ 429 if (current_space->local_remaining<cnzi) { 430 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 431 } 432 433 /* Copy data into free space, and zero out denserows */ 434 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 435 current_space->array += cnzi; 436 current_space->local_used += cnzi; 437 current_space->local_remaining -= cnzi; 438 439 for (j=0;j<ptanzi;j++) { 440 ptadenserow[ptasparserow[j]] = 0; 441 } 442 for (j=0;j<cnzi;j++) { 443 denserow[sparserow[j]] = 0; 444 } 445 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 446 /* For now, we will recompute what is needed. */ 447 ci[i+1] = ci[i] + cnzi; 448 } 449 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 450 /* Allocate space for cj, initialize cj, and */ 451 /* destroy list of free space and other temporary array(s) */ 452 ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 453 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 454 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 455 456 /* Allocate space for ca */ 457 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 458 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 459 460 /* put together the new matrix */ 461 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 462 463 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 464 /* Since these are PETSc arrays, change flags to free them as necessary. */ 465 c = (Mat_SeqAIJ *)((*C)->data); 466 c->freedata = PETSC_TRUE; 467 c->nonew = 0; 468 469 /* Clean up. */ 470 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 471 472 ierr = PetscLogEventEnd(MATSeqAIJ_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 473 PetscFunctionReturn(0); 474 } 475 EXTERN_C_END 476 477 /* 478 MatSeqAIJPtAPNumeric - Computes the SeqAIJ matrix product, C, 479 of SeqAIJ matrix A and matrix P, according to: 480 C = P^T * A * P 481 Note: C must have been created by calling MatSeqAIJApplyPtAPSymbolic. 482 */ 483 #undef __FUNCT__ 484 #define __FUNCT__ "MatSeqAIJPtAPNumeric" 485 int MatSeqAIJPtAPNumeric(Mat A,Mat P,Mat C) { 486 int ierr; 487 char funct[80]; 488 489 PetscFunctionBegin; 490 491 PetscValidHeaderSpecific(A,MAT_COOKIE); 492 PetscValidType(A); 493 MatPreallocated(A); 494 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 495 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 496 497 PetscValidHeaderSpecific(P,MAT_COOKIE); 498 PetscValidType(P); 499 MatPreallocated(P); 500 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 501 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 502 503 PetscValidHeaderSpecific(C,MAT_COOKIE); 504 PetscValidType(C); 505 MatPreallocated(C); 506 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 507 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 508 509 if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M); 510 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 511 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 512 if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N); 513 514 /* Query A for ApplyPtAP implementation based on types of P */ 515 ierr = PetscStrcpy(funct,"MatApplyPtAPNumeric_seqaij_");CHKERRQ(ierr); 516 ierr = PetscStrcat(funct,P->type_name);CHKERRQ(ierr); 517 ierr = PetscTryMethod(A,funct,(Mat,Mat,Mat),(A,P,C));CHKERRQ(ierr); 518 519 PetscFunctionReturn(0); 520 } 521 522 EXTERN_C_BEGIN 523 #undef __FUNCT__ 524 #define __FUNCT__ "MatApplyPtAPNumeric_SeqAIJ_SeqAIJ" 525 int MatApplyPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) { 526 int ierr,flops=0; 527 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 528 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 529 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 530 int aishift=a->indexshift,pishift=p->indexshift,cishift=c->indexshift; 531 int *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 532 int *ci=c->i,*cj=c->j,*cjj; 533 int am=A->M,cn=C->N,cm=C->M; 534 int i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 535 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 536 537 PetscFunctionBegin; 538 539 /* Currently not for shifted matrices! */ 540 if (aishift || pishift || cishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 541 542 ierr = PetscLogEventBegin(MATSeqAIJ_PtAPNumeric,A,P,C,0);CHKERRQ(ierr); 543 544 /* Allocate temporary array for storage of one row of A*P */ 545 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr); 546 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr); 547 548 apj = (int *)(apa + cn); 549 apjdense = apj + cn; 550 551 /* Clear old values in C */ 552 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 553 554 for (i=0;i<am;i++) { 555 /* Form sparse row of A*P */ 556 anzi = ai[i+1] - ai[i]; 557 apnzj = 0; 558 for (j=0;j<anzi;j++) { 559 prow = *aj++; 560 pnzj = pi[prow+1] - pi[prow]; 561 pjj = pj + pi[prow]; 562 paj = pa + pi[prow]; 563 for (k=0;k<pnzj;k++) { 564 if (!apjdense[pjj[k]]) { 565 apjdense[pjj[k]] = -1; 566 apj[apnzj++] = pjj[k]; 567 } 568 apa[pjj[k]] += (*aa)*paj[k]; 569 } 570 flops += 2*pnzj; 571 aa++; 572 } 573 574 /* Sort the j index array for quick sparse axpy. */ 575 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 576 577 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 578 pnzi = pi[i+1] - pi[i]; 579 for (j=0;j<pnzi;j++) { 580 nextap = 0; 581 crow = *pJ++; 582 cjj = cj + ci[crow]; 583 caj = ca + ci[crow]; 584 /* Perform sparse axpy operation. Note cjj includes apj. */ 585 for (k=0;nextap<apnzj;k++) { 586 if (cjj[k]==apj[nextap]) { 587 caj[k] += (*pA)*apa[apj[nextap++]]; 588 } 589 } 590 flops += 2*apnzj; 591 pA++; 592 } 593 594 /* Zero the current row info for A*P */ 595 for (j=0;j<apnzj;j++) { 596 apa[apj[j]] = 0.; 597 apjdense[apj[j]] = 0; 598 } 599 } 600 601 /* Assemble the final matrix and clean up */ 602 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 603 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 604 ierr = PetscFree(apa);CHKERRQ(ierr); 605 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 606 ierr = PetscLogEventEnd(MATSeqAIJ_PtAPNumeric,A,P,C,0);CHKERRQ(ierr); 607 608 PetscFunctionReturn(0); 609 } 610 EXTERN_C_END 611 612 #undef __FUNCT__ 613 #define __FUNCT__ "RegisterApplyPtAPRoutines_Private" 614 int RegisterApplyPtAPRoutines_Private(Mat A) { 615 int ierr; 616 617 PetscFunctionBegin; 618 619 if (!MATSeqAIJ_PtAP) { 620 ierr = PetscLogEventRegister(&MATSeqAIJ_PtAP,"MatSeqAIJApplyPtAP",MAT_COOKIE);CHKERRQ(ierr); 621 } 622 623 if (!MATSeqAIJ_PtAPSymbolic) { 624 ierr = PetscLogEventRegister(&MATSeqAIJ_PtAPSymbolic,"MatSeqAIJApplyPtAPSymbolic",MAT_COOKIE);CHKERRQ(ierr); 625 } 626 ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatApplyPtAPSymbolic_seqaij_seqaij", 627 "MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ", 628 MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ);CHKERRQ(ierr); 629 630 if (!MATSeqAIJ_PtAPNumeric) { 631 ierr = PetscLogEventRegister(&MATSeqAIJ_PtAPNumeric,"MatSeqAIJApplyPtAPNumeric",MAT_COOKIE);CHKERRQ(ierr); 632 } 633 ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatApplyPtAPNumeric_seqaij_seqaij", 634 "MatApplyPtAPNumeric_SeqAIJ_SeqAIJ", 635 MatApplyPtAPNumeric_SeqAIJ_SeqAIJ);CHKERRQ(ierr); 636 PetscFunctionReturn(0); 637 } 638