1 /* 2 Defines projective product routines where A is a AIJ matrix 3 C = P^T * A * P 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7 #include "src/mat/utils/freespace.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatPtAP" 12 /*@ 13 MatPtAP - Creates the matrix projection C = P^T * A * P 14 15 Collective on Mat 16 17 Input Parameters: 18 + A - the matrix 19 . P - the projection matrix 20 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 21 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 22 23 Output Parameters: 24 . C - the product matrix 25 26 Notes: 27 C will be created and must be destroyed by the user with MatDestroy(). 28 29 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 30 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 31 32 Level: intermediate 33 34 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 35 @*/ 36 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) { 37 PetscErrorCode ierr; 38 PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 39 PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 40 41 PetscFunctionBegin; 42 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 43 PetscValidType(A,1); 44 MatPreallocated(A); 45 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 46 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 47 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 48 PetscValidType(P,2); 49 MatPreallocated(P); 50 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 51 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 52 PetscValidPointer(C,3); 53 54 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 55 56 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 57 58 /* For now, we do not dispatch based on the type of A and P */ 59 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 60 fA = A->ops->ptap; 61 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 62 fP = P->ops->ptap; 63 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 64 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 65 66 ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 67 ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 68 ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 69 PetscFunctionReturn(0); 70 } 71 72 EXTERN PetscErrorCode MatGetLocalMat(Mat,MatReuse,IS*,IS*,Mat*); 73 EXTERN PetscErrorCode MatGetBrowsOfAcols(Mat,Mat,MatReuse,IS*,IS*,PetscInt*,Mat*); 74 EXTERN PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,MatReuse,PetscReal,int,int,Mat*); 75 EXTERN PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,PetscReal,int,int,Mat*); 76 EXTERN PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,int,int,Mat); 77 #undef __FUNCT__ 78 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 79 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 80 { 81 PetscErrorCode ierr; 82 Mat P_seq,A_loc,C_seq; 83 int prstart,prend,m=P->m; 84 IS isrowp,iscolp; 85 86 PetscFunctionBegin; 87 /* get P_seq = submatrix of P by taking rows of P that equal to nonzero col of A */ 88 ierr = MatGetBrowsOfAcols(A,P,scall,&isrowp,&iscolp,&prstart,&P_seq);CHKERRQ(ierr); 89 ierr = ISDestroy(iscolp);CHKERRQ(ierr); 90 91 /* get A_loc = submatrix of A by taking all local rows of A */ 92 ierr = MatGetLocalMat(A,scall,PETSC_NULL,&isrowp,&A_loc);CHKERRQ(ierr); 93 ierr = ISDestroy(isrowp);CHKERRQ(ierr); 94 95 /* compute C_seq = P_loc^T * A_loc * P_seq */ 96 prend = prstart + m; 97 ierr = MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(A_loc,P_seq,scall,fill,prstart,prend,&C_seq);CHKERRQ(ierr); 98 99 /* add C_seq into mpi C */ 100 ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,scall,C);CHKERRQ(ierr); 101 102 /* clean up */ 103 ierr = MatDestroy(C_seq);CHKERRQ(ierr); 104 ierr = MatDestroy(P_seq);CHKERRQ(ierr); 105 ierr = MatDestroy(A_loc);CHKERRQ(ierr); 106 PetscFunctionReturn(0); 107 } 108 109 #undef __FUNCT__ 110 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ" 111 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 112 { 113 114 PetscFunctionBegin; 115 PetscFunctionReturn(0); 116 } 117 118 #undef __FUNCT__ 119 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ" 120 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat B,Mat C) 121 { 122 123 PetscFunctionBegin; 124 PetscFunctionReturn(0); 125 } 126 127 #undef __FUNCT__ 128 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 129 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 130 { 131 PetscErrorCode ierr; 132 PetscFunctionBegin; 133 if (scall == MAT_INITIAL_MATRIX){ 134 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 135 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 136 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 137 } 138 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 139 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 140 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 141 PetscFunctionReturn(0); 142 } 143 144 #undef __FUNCT__ 145 #define __FUNCT__ "MatPtAPSymbolic" 146 /* 147 MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 148 149 Collective on Mat 150 151 Input Parameters: 152 + A - the matrix 153 - P - the projection matrix 154 155 Output Parameters: 156 . C - the (i,j) structure of the product matrix 157 158 Notes: 159 C will be created and must be destroyed by the user with MatDestroy(). 160 161 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 162 which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 163 this (i,j) structure by calling MatPtAPNumeric(). 164 165 Level: intermediate 166 167 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 168 */ 169 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) { 170 PetscErrorCode ierr; 171 PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 172 PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 173 174 PetscFunctionBegin; 175 176 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 177 PetscValidType(A,1); 178 MatPreallocated(A); 179 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 180 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 181 182 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 183 PetscValidType(P,2); 184 MatPreallocated(P); 185 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 186 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 187 188 PetscValidPointer(C,3); 189 190 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 191 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 192 193 /* For now, we do not dispatch based on the type of A and P */ 194 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 195 fA = A->ops->ptapsymbolic; 196 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 197 fP = P->ops->ptapsymbolic; 198 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 199 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 200 201 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 202 ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 203 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 204 205 PetscFunctionReturn(0); 206 } 207 208 typedef struct { 209 Mat symAP; 210 } Mat_PtAPstruct; 211 212 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat); 213 214 #undef __FUNCT__ 215 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP" 216 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A) 217 { 218 PetscErrorCode ierr; 219 Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)A->spptr; 220 221 PetscFunctionBegin; 222 ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr); 223 ierr = PetscFree(ptap);CHKERRQ(ierr); 224 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 225 PetscFunctionReturn(0); 226 } 227 228 #undef __FUNCT__ 229 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 230 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) { 231 PetscErrorCode ierr; 232 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 233 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 234 int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 235 int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 236 int an=A->N,am=A->M,pn=P->N,pm=P->M; 237 int i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 238 MatScalar *ca; 239 240 PetscFunctionBegin; 241 /* Get ij structure of P^T */ 242 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 243 ptJ=ptj; 244 245 /* Allocate ci array, arrays for fill computation and */ 246 /* free space for accumulating nonzero column info */ 247 ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr); 248 ci[0] = 0; 249 250 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 251 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 252 ptasparserow = ptadenserow + an; 253 denserow = ptasparserow + an; 254 sparserow = denserow + pn; 255 256 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 257 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 258 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 259 current_space = free_space; 260 261 /* Determine symbolic info for each row of C: */ 262 for (i=0;i<pn;i++) { 263 ptnzi = pti[i+1] - pti[i]; 264 ptanzi = 0; 265 /* Determine symbolic row of PtA: */ 266 for (j=0;j<ptnzi;j++) { 267 arow = *ptJ++; 268 anzj = ai[arow+1] - ai[arow]; 269 ajj = aj + ai[arow]; 270 for (k=0;k<anzj;k++) { 271 if (!ptadenserow[ajj[k]]) { 272 ptadenserow[ajj[k]] = -1; 273 ptasparserow[ptanzi++] = ajj[k]; 274 } 275 } 276 } 277 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 278 ptaj = ptasparserow; 279 cnzi = 0; 280 for (j=0;j<ptanzi;j++) { 281 prow = *ptaj++; 282 pnzj = pi[prow+1] - pi[prow]; 283 pjj = pj + pi[prow]; 284 for (k=0;k<pnzj;k++) { 285 if (!denserow[pjj[k]]) { 286 denserow[pjj[k]] = -1; 287 sparserow[cnzi++] = pjj[k]; 288 } 289 } 290 } 291 292 /* sort sparserow */ 293 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 294 295 /* If free space is not available, make more free space */ 296 /* Double the amount of total space in the list */ 297 if (current_space->local_remaining<cnzi) { 298 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 299 } 300 301 /* Copy data into free space, and zero out denserows */ 302 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 303 current_space->array += cnzi; 304 current_space->local_used += cnzi; 305 current_space->local_remaining -= cnzi; 306 307 for (j=0;j<ptanzi;j++) { 308 ptadenserow[ptasparserow[j]] = 0; 309 } 310 for (j=0;j<cnzi;j++) { 311 denserow[sparserow[j]] = 0; 312 } 313 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 314 /* For now, we will recompute what is needed. */ 315 ci[i+1] = ci[i] + cnzi; 316 } 317 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 318 /* Allocate space for cj, initialize cj, and */ 319 /* destroy list of free space and other temporary array(s) */ 320 ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 321 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 322 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 323 324 /* Allocate space for ca */ 325 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 326 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 327 328 /* put together the new matrix */ 329 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 330 331 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 332 /* Since these are PETSc arrays, change flags to free them as necessary. */ 333 c = (Mat_SeqAIJ *)((*C)->data); 334 c->freedata = PETSC_TRUE; 335 c->nonew = 0; 336 337 /* Clean up. */ 338 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 339 340 PetscFunctionReturn(0); 341 } 342 343 #include "src/mat/impls/maij/maij.h" 344 EXTERN_C_BEGIN 345 #undef __FUNCT__ 346 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 347 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) { 348 /* This routine requires testing -- I don't think it works. */ 349 PetscErrorCode ierr; 350 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 351 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 352 Mat P=pp->AIJ; 353 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 354 int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 355 int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 356 int an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 357 int i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 358 MatScalar *ca; 359 360 PetscFunctionBegin; 361 /* Start timer */ 362 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 363 364 /* Get ij structure of P^T */ 365 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 366 367 /* Allocate ci array, arrays for fill computation and */ 368 /* free space for accumulating nonzero column info */ 369 ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr); 370 ci[0] = 0; 371 372 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 373 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 374 ptasparserow = ptadenserow + an; 375 denserow = ptasparserow + an; 376 sparserow = denserow + pn; 377 378 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 379 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 380 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 381 current_space = free_space; 382 383 /* Determine symbolic info for each row of C: */ 384 for (i=0;i<pn/ppdof;i++) { 385 ptnzi = pti[i+1] - pti[i]; 386 ptanzi = 0; 387 ptJ = ptj + pti[i]; 388 for (dof=0;dof<ppdof;dof++) { 389 /* Determine symbolic row of PtA: */ 390 for (j=0;j<ptnzi;j++) { 391 arow = ptJ[j] + dof; 392 anzj = ai[arow+1] - ai[arow]; 393 ajj = aj + ai[arow]; 394 for (k=0;k<anzj;k++) { 395 if (!ptadenserow[ajj[k]]) { 396 ptadenserow[ajj[k]] = -1; 397 ptasparserow[ptanzi++] = ajj[k]; 398 } 399 } 400 } 401 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 402 ptaj = ptasparserow; 403 cnzi = 0; 404 for (j=0;j<ptanzi;j++) { 405 pdof = *ptaj%dof; 406 prow = (*ptaj++)/dof; 407 pnzj = pi[prow+1] - pi[prow]; 408 pjj = pj + pi[prow]; 409 for (k=0;k<pnzj;k++) { 410 if (!denserow[pjj[k]+pdof]) { 411 denserow[pjj[k]+pdof] = -1; 412 sparserow[cnzi++] = pjj[k]+pdof; 413 } 414 } 415 } 416 417 /* sort sparserow */ 418 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 419 420 /* If free space is not available, make more free space */ 421 /* Double the amount of total space in the list */ 422 if (current_space->local_remaining<cnzi) { 423 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 424 } 425 426 /* Copy data into free space, and zero out denserows */ 427 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 428 current_space->array += cnzi; 429 current_space->local_used += cnzi; 430 current_space->local_remaining -= cnzi; 431 432 for (j=0;j<ptanzi;j++) { 433 ptadenserow[ptasparserow[j]] = 0; 434 } 435 for (j=0;j<cnzi;j++) { 436 denserow[sparserow[j]] = 0; 437 } 438 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 439 /* For now, we will recompute what is needed. */ 440 ci[i+1+dof] = ci[i+dof] + cnzi; 441 } 442 } 443 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 444 /* Allocate space for cj, initialize cj, and */ 445 /* destroy list of free space and other temporary array(s) */ 446 ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 447 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 448 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 449 450 /* Allocate space for ca */ 451 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 452 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 453 454 /* put together the new matrix */ 455 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 456 457 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 458 /* Since these are PETSc arrays, change flags to free them as necessary. */ 459 c = (Mat_SeqAIJ *)((*C)->data); 460 c->freedata = PETSC_TRUE; 461 c->nonew = 0; 462 463 /* Clean up. */ 464 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 465 466 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 467 PetscFunctionReturn(0); 468 } 469 EXTERN_C_END 470 471 #undef __FUNCT__ 472 #define __FUNCT__ "MatPtAPNumeric" 473 /* 474 MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 475 476 Collective on Mat 477 478 Input Parameters: 479 + A - the matrix 480 - P - the projection matrix 481 482 Output Parameters: 483 . C - the product matrix 484 485 Notes: 486 C must have been created by calling MatPtAPSymbolic and must be destroyed by 487 the user using MatDeatroy(). 488 489 This routine is currently only implemented for pairs of AIJ matrices and classes 490 which inherit from AIJ. C will be of type MATAIJ. 491 492 Level: intermediate 493 494 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 495 */ 496 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) { 497 PetscErrorCode ierr; 498 PetscErrorCode (*fA)(Mat,Mat,Mat); 499 PetscErrorCode (*fP)(Mat,Mat,Mat); 500 501 PetscFunctionBegin; 502 503 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 504 PetscValidType(A,1); 505 MatPreallocated(A); 506 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 507 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 508 509 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 510 PetscValidType(P,2); 511 MatPreallocated(P); 512 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 513 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 514 515 PetscValidHeaderSpecific(C,MAT_COOKIE,3); 516 PetscValidType(C,3); 517 MatPreallocated(C); 518 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 519 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 520 521 if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M); 522 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 523 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 524 if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N); 525 526 /* For now, we do not dispatch based on the type of A and P */ 527 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 528 fA = A->ops->ptapnumeric; 529 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 530 fP = P->ops->ptapnumeric; 531 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 532 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 533 534 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 535 ierr = (*fA)(A,P,C);CHKERRQ(ierr); 536 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 537 538 PetscFunctionReturn(0); 539 } 540 541 #undef __FUNCT__ 542 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 543 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 544 { 545 PetscErrorCode ierr; 546 int flops=0; 547 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 548 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 549 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 550 int *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 551 int *ci=c->i,*cj=c->j,*cjj; 552 int am=A->M,cn=C->N,cm=C->M; 553 int i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 554 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 555 556 PetscFunctionBegin; 557 /* Allocate temporary array for storage of one row of A*P */ 558 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr); 559 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr); 560 561 apj = (int *)(apa + cn); 562 apjdense = apj + cn; 563 564 /* Clear old values in C */ 565 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 566 567 for (i=0;i<am;i++) { 568 /* Form sparse row of A*P */ 569 anzi = ai[i+1] - ai[i]; 570 apnzj = 0; 571 for (j=0;j<anzi;j++) { 572 prow = *aj++; 573 pnzj = pi[prow+1] - pi[prow]; 574 pjj = pj + pi[prow]; 575 paj = pa + pi[prow]; 576 for (k=0;k<pnzj;k++) { 577 if (!apjdense[pjj[k]]) { 578 apjdense[pjj[k]] = -1; 579 apj[apnzj++] = pjj[k]; 580 } 581 apa[pjj[k]] += (*aa)*paj[k]; 582 } 583 flops += 2*pnzj; 584 aa++; 585 } 586 587 /* Sort the j index array for quick sparse axpy. */ 588 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 589 590 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 591 pnzi = pi[i+1] - pi[i]; 592 for (j=0;j<pnzi;j++) { 593 nextap = 0; 594 crow = *pJ++; 595 cjj = cj + ci[crow]; 596 caj = ca + ci[crow]; 597 /* Perform sparse axpy operation. Note cjj includes apj. */ 598 for (k=0;nextap<apnzj;k++) { 599 if (cjj[k]==apj[nextap]) { 600 caj[k] += (*pA)*apa[apj[nextap++]]; 601 } 602 } 603 flops += 2*apnzj; 604 pA++; 605 } 606 607 /* Zero the current row info for A*P */ 608 for (j=0;j<apnzj;j++) { 609 apa[apj[j]] = 0.; 610 apjdense[apj[j]] = 0; 611 } 612 } 613 614 /* Assemble the final matrix and clean up */ 615 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 616 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 617 ierr = PetscFree(apa);CHKERRQ(ierr); 618 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 619 620 PetscFunctionReturn(0); 621 } 622 623 /* Compute C = P[rstart:rend,:]^T * A * P of seqaij matrices - used by MatPtAP_MPIAIJ_MPIAIJ() */ 624 625 #undef __FUNCT__ 626 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt" 627 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,int prstart,int prend,Mat C) 628 { 629 PetscErrorCode ierr; 630 int flops=0; 631 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 632 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 633 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 634 int *ai=a->i,*aj=a->j,*apj,*apjdense; 635 int *pi=p->i,*pj=p->j,*pJ=p->j+pi[prstart],*pjj; 636 int *ci=c->i,*cj=c->j,*cjj; 637 int am=A->M,cn=C->N,cm=C->M; 638 int i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 639 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 640 641 PetscFunctionBegin; 642 pA=p->a+pi[prstart]; 643 /* Allocate temporary array for storage of one row of A*P */ 644 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr); 645 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr); 646 647 apj = (int *)(apa + cn); 648 apjdense = apj + cn; 649 650 /* Clear old values in C */ 651 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 652 653 for (i=0;i<am;i++) { 654 /* Form sparse row of A*P */ 655 anzi = ai[i+1] - ai[i]; 656 apnzj = 0; 657 for (j=0;j<anzi;j++) { 658 prow = *aj++; 659 pnzj = pi[prow+1] - pi[prow]; 660 pjj = pj + pi[prow]; 661 paj = pa + pi[prow]; 662 for (k=0;k<pnzj;k++) { 663 if (!apjdense[pjj[k]]) { 664 apjdense[pjj[k]] = -1; 665 apj[apnzj++] = pjj[k]; 666 } 667 apa[pjj[k]] += (*aa)*paj[k]; 668 } 669 flops += 2*pnzj; 670 aa++; 671 } 672 673 /* Sort the j index array for quick sparse axpy. */ 674 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 675 676 /* Compute P[[prstart:prend,:]^T*A*P using outer product (P^T)[:,j+prstart]*(A*P)[j,:]. */ 677 pnzi = pi[i+1+prstart] - pi[i+prstart]; 678 for (j=0;j<pnzi;j++) { 679 nextap = 0; 680 crow = *pJ++; 681 cjj = cj + ci[crow]; 682 caj = ca + ci[crow]; 683 /* Perform sparse axpy operation. Note cjj includes apj. */ 684 for (k=0;nextap<apnzj;k++) { 685 if (cjj[k]==apj[nextap]) { 686 caj[k] += (*pA)*apa[apj[nextap++]]; 687 } 688 } 689 flops += 2*apnzj; 690 pA++; 691 } 692 693 /* Zero the current row info for A*P */ 694 for (j=0;j<apnzj;j++) { 695 apa[apj[j]] = 0.; 696 apjdense[apj[j]] = 0; 697 } 698 } 699 700 /* Assemble the final matrix and clean up */ 701 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 702 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 703 ierr = PetscFree(apa);CHKERRQ(ierr); 704 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 705 706 PetscFunctionReturn(0); 707 } 708 709 #undef __FUNCT__ 710 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt" 711 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,PetscReal fill,int prstart,int prend,Mat *C) { 712 PetscErrorCode ierr; 713 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 714 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 715 int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 716 int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 717 int an=A->N,am=A->M,pn=P->N,pm=P->M; 718 int i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 719 MatScalar *ca; 720 Mat *psub,P_sub; 721 IS isrow,iscol; 722 int m = prend - prstart; 723 724 PetscFunctionBegin; 725 /* Get ij structure of P[rstart:rend,:]^T */ 726 ierr = ISCreateStride(PETSC_COMM_SELF,m,prstart,1,&isrow);CHKERRQ(ierr); 727 ierr = ISCreateStride(PETSC_COMM_SELF,P->n,0,1,&iscol);CHKERRQ(ierr); 728 ierr = MatGetSubMatrices(P,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&psub);CHKERRQ(ierr); 729 ierr = ISDestroy(isrow);CHKERRQ(ierr); 730 ierr = ISDestroy(iscol);CHKERRQ(ierr); 731 P_sub = psub[0]; 732 ierr = MatGetSymbolicTranspose_SeqAIJ(P_sub,&pti,&ptj);CHKERRQ(ierr); 733 ierr = MatDestroyMatrices(1,&psub);CHKERRQ(ierr); 734 ptJ=ptj; 735 736 /* Allocate ci array, arrays for fill computation and */ 737 /* free space for accumulating nonzero column info */ 738 ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr); 739 ci[0] = 0; 740 741 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 742 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 743 ptasparserow = ptadenserow + an; 744 denserow = ptasparserow + an; 745 sparserow = denserow + pn; 746 747 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 748 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 749 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 750 current_space = free_space; 751 752 /* Determine symbolic info for each row of C: */ 753 for (i=0;i<pn;i++) { 754 ptnzi = pti[i+1] - pti[i]; 755 ptanzi = 0; 756 /* Determine symbolic row of PtA_reduced: */ 757 for (j=0;j<ptnzi;j++) { 758 arow = *ptJ++; 759 anzj = ai[arow+1] - ai[arow]; 760 ajj = aj + ai[arow]; 761 for (k=0;k<anzj;k++) { 762 if (!ptadenserow[ajj[k]]) { 763 ptadenserow[ajj[k]] = -1; 764 ptasparserow[ptanzi++] = ajj[k]; 765 } 766 } 767 } 768 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 769 ptaj = ptasparserow; 770 cnzi = 0; 771 for (j=0;j<ptanzi;j++) { 772 prow = *ptaj++; 773 pnzj = pi[prow+1] - pi[prow]; 774 pjj = pj + pi[prow]; 775 for (k=0;k<pnzj;k++) { 776 if (!denserow[pjj[k]]) { 777 denserow[pjj[k]] = -1; 778 sparserow[cnzi++] = pjj[k]; 779 } 780 } 781 } 782 783 /* sort sparserow */ 784 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 785 786 /* If free space is not available, make more free space */ 787 /* Double the amount of total space in the list */ 788 if (current_space->local_remaining<cnzi) { 789 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 790 } 791 792 /* Copy data into free space, and zero out denserows */ 793 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 794 current_space->array += cnzi; 795 current_space->local_used += cnzi; 796 current_space->local_remaining -= cnzi; 797 798 for (j=0;j<ptanzi;j++) { 799 ptadenserow[ptasparserow[j]] = 0; 800 } 801 for (j=0;j<cnzi;j++) { 802 denserow[sparserow[j]] = 0; 803 } 804 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 805 /* For now, we will recompute what is needed. */ 806 ci[i+1] = ci[i] + cnzi; 807 } 808 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 809 /* Allocate space for cj, initialize cj, and */ 810 /* destroy list of free space and other temporary array(s) */ 811 ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 812 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 813 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 814 815 /* Allocate space for ca */ 816 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 817 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 818 819 /* put together the new matrix */ 820 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 821 822 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 823 /* Since these are PETSc arrays, change flags to free them as necessary. */ 824 c = (Mat_SeqAIJ *)((*C)->data); 825 c->freedata = PETSC_TRUE; 826 c->nonew = 0; 827 828 /* Clean up. */ 829 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 830 831 PetscFunctionReturn(0); 832 } 833 834 #undef __FUNCT__ 835 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ_ReducedPt" 836 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,MatReuse scall,PetscReal fill,int prstart,int prend,Mat *C) 837 { 838 PetscErrorCode ierr; 839 PetscFunctionBegin; 840 if (A->m != prend-prstart) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->m,prend-prstart); 841 if (prend-prstart > P->m) SETERRQ2(PETSC_ERR_ARG_SIZ," prend-prstart %d cannot be larger than P->m %d",prend-prstart,P->m); 842 if (scall == MAT_INITIAL_MATRIX){ 843 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(A,P,fill,prstart,prend,C);CHKERRQ(ierr); 844 } 845 846 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(A,P,prstart,prend,*C);CHKERRQ(ierr); 847 848 PetscFunctionReturn(0); 849 } 850 851