1 /* 2 Defines projective product routines where A is a AIJ matrix 3 C = P^T * A * P 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7 #include "src/mat/utils/freespace.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatPtAP" 12 /*@ 13 MatPtAP - Creates the matrix projection C = P^T * A * P 14 15 Collective on Mat 16 17 Input Parameters: 18 + A - the matrix 19 . P - the projection matrix 20 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 21 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 22 23 Output Parameters: 24 . C - the product matrix 25 26 Notes: 27 C will be created and must be destroyed by the user with MatDestroy(). 28 29 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 30 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 31 32 Level: intermediate 33 34 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 35 @*/ 36 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) { 37 PetscErrorCode ierr; 38 PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 39 PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 40 41 PetscFunctionBegin; 42 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 43 PetscValidType(A,1); 44 MatPreallocated(A); 45 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 46 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 47 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 48 PetscValidType(P,2); 49 MatPreallocated(P); 50 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 51 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 52 PetscValidPointer(C,3); 53 54 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 55 56 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 57 58 /* For now, we do not dispatch based on the type of A and P */ 59 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 60 fA = A->ops->ptap; 61 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 62 fP = P->ops->ptap; 63 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 64 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 65 66 ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 67 ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 68 ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 69 PetscFunctionReturn(0); 70 } 71 72 EXTERN PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,MatReuse,PetscReal,int,int,Mat*); 73 EXTERN PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,PetscReal,int,int,Mat*); 74 EXTERN PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,int,int,Mat); 75 #undef __FUNCT__ 76 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 77 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 78 { 79 PetscErrorCode ierr; 80 Mat AP,P_seq,A_loc,C_seq; 81 Mat_MatMatMultMPI *mult; 82 int prstart,prend,m=P->m; 83 int rank,prid=10; 84 85 PetscFunctionBegin; 86 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 87 88 /* compute symbolic and numeric AP = A*P */ 89 ierr = MatMatMult_MPIAIJ_MPIAIJ(A,P,scall,fill,&AP);CHKERRQ(ierr); 90 mult = (Mat_MatMatMultMPI*)AP->spptr; 91 P_seq = mult->bseq[0]; /* = submatrix of P by taking rows of P that equal to nonzero col of A */ 92 93 if (rank == prid){ 94 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] A_loc: %d, %d\n",rank,A_loc->m,A_loc->n);CHKERRQ(ierr); 95 ierr = MatView(A_loc,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 96 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] P_seq: %d, %d\n",rank,P_seq->m,P_seq->n);CHKERRQ(ierr); 97 ierr = MatView(P_seq,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 98 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] AP_seq: %d, %d\n",rank,AP->m,AP->n); 99 ierr = MatView(AP,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 100 } 101 102 prstart = mult->brstart; 103 prend = prstart + m; 104 105 A_loc = mult->aseq[0]; /* = submatrix of A by taking all local rows of A */ 106 ierr = MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(A_loc,P_seq,scall,fill,prstart,prend,&C_seq);CHKERRQ(ierr); 107 108 /* add C_seq into C */ 109 ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,scall,C);CHKERRQ(ierr); 110 111 /* clean up */ 112 ierr = MatDestroy(AP);CHKERRQ(ierr); 113 ierr = MatDestroy(C_seq);CHKERRQ(ierr); 114 115 PetscFunctionReturn(0); 116 } 117 118 #undef __FUNCT__ 119 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ" 120 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 121 { 122 123 PetscFunctionBegin; 124 PetscFunctionReturn(0); 125 } 126 127 #undef __FUNCT__ 128 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ" 129 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat B,Mat C) 130 { 131 132 PetscFunctionBegin; 133 PetscFunctionReturn(0); 134 } 135 136 #undef __FUNCT__ 137 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 138 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 139 { 140 PetscErrorCode ierr; 141 PetscFunctionBegin; 142 if (scall == MAT_INITIAL_MATRIX){ 143 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 144 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 145 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 146 } 147 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 148 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 149 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 150 PetscFunctionReturn(0); 151 } 152 153 #undef __FUNCT__ 154 #define __FUNCT__ "MatPtAPSymbolic" 155 /* 156 MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 157 158 Collective on Mat 159 160 Input Parameters: 161 + A - the matrix 162 - P - the projection matrix 163 164 Output Parameters: 165 . C - the (i,j) structure of the product matrix 166 167 Notes: 168 C will be created and must be destroyed by the user with MatDestroy(). 169 170 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 171 which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 172 this (i,j) structure by calling MatPtAPNumeric(). 173 174 Level: intermediate 175 176 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 177 */ 178 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) { 179 PetscErrorCode ierr; 180 PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 181 PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 182 183 PetscFunctionBegin; 184 185 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 186 PetscValidType(A,1); 187 MatPreallocated(A); 188 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 189 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 190 191 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 192 PetscValidType(P,2); 193 MatPreallocated(P); 194 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 195 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 196 197 PetscValidPointer(C,3); 198 199 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 200 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 201 202 /* For now, we do not dispatch based on the type of A and P */ 203 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 204 fA = A->ops->ptapsymbolic; 205 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 206 fP = P->ops->ptapsymbolic; 207 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 208 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 209 210 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 211 ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 212 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 213 214 PetscFunctionReturn(0); 215 } 216 217 typedef struct { 218 Mat symAP; 219 } Mat_PtAPstruct; 220 221 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat); 222 223 #undef __FUNCT__ 224 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP" 225 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A) 226 { 227 PetscErrorCode ierr; 228 Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)A->spptr; 229 230 PetscFunctionBegin; 231 ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr); 232 ierr = PetscFree(ptap);CHKERRQ(ierr); 233 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 234 PetscFunctionReturn(0); 235 } 236 237 #undef __FUNCT__ 238 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 239 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) { 240 PetscErrorCode ierr; 241 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 242 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 243 int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 244 int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 245 int an=A->N,am=A->M,pn=P->N,pm=P->M; 246 int i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 247 MatScalar *ca; 248 249 PetscFunctionBegin; 250 /* Get ij structure of P^T */ 251 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 252 ptJ=ptj; 253 254 /* Allocate ci array, arrays for fill computation and */ 255 /* free space for accumulating nonzero column info */ 256 ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr); 257 ci[0] = 0; 258 259 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 260 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 261 ptasparserow = ptadenserow + an; 262 denserow = ptasparserow + an; 263 sparserow = denserow + pn; 264 265 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 266 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 267 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 268 current_space = free_space; 269 270 /* Determine symbolic info for each row of C: */ 271 for (i=0;i<pn;i++) { 272 ptnzi = pti[i+1] - pti[i]; 273 ptanzi = 0; 274 /* Determine symbolic row of PtA: */ 275 for (j=0;j<ptnzi;j++) { 276 arow = *ptJ++; 277 anzj = ai[arow+1] - ai[arow]; 278 ajj = aj + ai[arow]; 279 for (k=0;k<anzj;k++) { 280 if (!ptadenserow[ajj[k]]) { 281 ptadenserow[ajj[k]] = -1; 282 ptasparserow[ptanzi++] = ajj[k]; 283 } 284 } 285 } 286 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 287 ptaj = ptasparserow; 288 cnzi = 0; 289 for (j=0;j<ptanzi;j++) { 290 prow = *ptaj++; 291 pnzj = pi[prow+1] - pi[prow]; 292 pjj = pj + pi[prow]; 293 for (k=0;k<pnzj;k++) { 294 if (!denserow[pjj[k]]) { 295 denserow[pjj[k]] = -1; 296 sparserow[cnzi++] = pjj[k]; 297 } 298 } 299 } 300 301 /* sort sparserow */ 302 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 303 304 /* If free space is not available, make more free space */ 305 /* Double the amount of total space in the list */ 306 if (current_space->local_remaining<cnzi) { 307 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 308 } 309 310 /* Copy data into free space, and zero out denserows */ 311 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 312 current_space->array += cnzi; 313 current_space->local_used += cnzi; 314 current_space->local_remaining -= cnzi; 315 316 for (j=0;j<ptanzi;j++) { 317 ptadenserow[ptasparserow[j]] = 0; 318 } 319 for (j=0;j<cnzi;j++) { 320 denserow[sparserow[j]] = 0; 321 } 322 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 323 /* For now, we will recompute what is needed. */ 324 ci[i+1] = ci[i] + cnzi; 325 } 326 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 327 /* Allocate space for cj, initialize cj, and */ 328 /* destroy list of free space and other temporary array(s) */ 329 ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 330 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 331 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 332 333 /* Allocate space for ca */ 334 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 335 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 336 337 /* put together the new matrix */ 338 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 339 340 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 341 /* Since these are PETSc arrays, change flags to free them as necessary. */ 342 c = (Mat_SeqAIJ *)((*C)->data); 343 c->freedata = PETSC_TRUE; 344 c->nonew = 0; 345 346 /* Clean up. */ 347 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 348 349 PetscFunctionReturn(0); 350 } 351 352 #include "src/mat/impls/maij/maij.h" 353 EXTERN_C_BEGIN 354 #undef __FUNCT__ 355 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 356 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) { 357 /* This routine requires testing -- I don't think it works. */ 358 PetscErrorCode ierr; 359 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 360 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 361 Mat P=pp->AIJ; 362 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 363 int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 364 int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 365 int an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 366 int i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 367 MatScalar *ca; 368 369 PetscFunctionBegin; 370 /* Start timer */ 371 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 372 373 /* Get ij structure of P^T */ 374 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 375 376 /* Allocate ci array, arrays for fill computation and */ 377 /* free space for accumulating nonzero column info */ 378 ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr); 379 ci[0] = 0; 380 381 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 382 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 383 ptasparserow = ptadenserow + an; 384 denserow = ptasparserow + an; 385 sparserow = denserow + pn; 386 387 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 388 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 389 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 390 current_space = free_space; 391 392 /* Determine symbolic info for each row of C: */ 393 for (i=0;i<pn/ppdof;i++) { 394 ptnzi = pti[i+1] - pti[i]; 395 ptanzi = 0; 396 ptJ = ptj + pti[i]; 397 for (dof=0;dof<ppdof;dof++) { 398 /* Determine symbolic row of PtA: */ 399 for (j=0;j<ptnzi;j++) { 400 arow = ptJ[j] + dof; 401 anzj = ai[arow+1] - ai[arow]; 402 ajj = aj + ai[arow]; 403 for (k=0;k<anzj;k++) { 404 if (!ptadenserow[ajj[k]]) { 405 ptadenserow[ajj[k]] = -1; 406 ptasparserow[ptanzi++] = ajj[k]; 407 } 408 } 409 } 410 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 411 ptaj = ptasparserow; 412 cnzi = 0; 413 for (j=0;j<ptanzi;j++) { 414 pdof = *ptaj%dof; 415 prow = (*ptaj++)/dof; 416 pnzj = pi[prow+1] - pi[prow]; 417 pjj = pj + pi[prow]; 418 for (k=0;k<pnzj;k++) { 419 if (!denserow[pjj[k]+pdof]) { 420 denserow[pjj[k]+pdof] = -1; 421 sparserow[cnzi++] = pjj[k]+pdof; 422 } 423 } 424 } 425 426 /* sort sparserow */ 427 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 428 429 /* If free space is not available, make more free space */ 430 /* Double the amount of total space in the list */ 431 if (current_space->local_remaining<cnzi) { 432 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 433 } 434 435 /* Copy data into free space, and zero out denserows */ 436 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 437 current_space->array += cnzi; 438 current_space->local_used += cnzi; 439 current_space->local_remaining -= cnzi; 440 441 for (j=0;j<ptanzi;j++) { 442 ptadenserow[ptasparserow[j]] = 0; 443 } 444 for (j=0;j<cnzi;j++) { 445 denserow[sparserow[j]] = 0; 446 } 447 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 448 /* For now, we will recompute what is needed. */ 449 ci[i+1+dof] = ci[i+dof] + cnzi; 450 } 451 } 452 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 453 /* Allocate space for cj, initialize cj, and */ 454 /* destroy list of free space and other temporary array(s) */ 455 ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 456 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 457 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 458 459 /* Allocate space for ca */ 460 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 461 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 462 463 /* put together the new matrix */ 464 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 465 466 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 467 /* Since these are PETSc arrays, change flags to free them as necessary. */ 468 c = (Mat_SeqAIJ *)((*C)->data); 469 c->freedata = PETSC_TRUE; 470 c->nonew = 0; 471 472 /* Clean up. */ 473 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 474 475 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 EXTERN_C_END 479 480 #undef __FUNCT__ 481 #define __FUNCT__ "MatPtAPNumeric" 482 /* 483 MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 484 485 Collective on Mat 486 487 Input Parameters: 488 + A - the matrix 489 - P - the projection matrix 490 491 Output Parameters: 492 . C - the product matrix 493 494 Notes: 495 C must have been created by calling MatPtAPSymbolic and must be destroyed by 496 the user using MatDeatroy(). 497 498 This routine is currently only implemented for pairs of AIJ matrices and classes 499 which inherit from AIJ. C will be of type MATAIJ. 500 501 Level: intermediate 502 503 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 504 */ 505 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) { 506 PetscErrorCode ierr; 507 PetscErrorCode (*fA)(Mat,Mat,Mat); 508 PetscErrorCode (*fP)(Mat,Mat,Mat); 509 510 PetscFunctionBegin; 511 512 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 513 PetscValidType(A,1); 514 MatPreallocated(A); 515 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 516 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 517 518 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 519 PetscValidType(P,2); 520 MatPreallocated(P); 521 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 522 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 523 524 PetscValidHeaderSpecific(C,MAT_COOKIE,3); 525 PetscValidType(C,3); 526 MatPreallocated(C); 527 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 528 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 529 530 if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M); 531 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 532 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 533 if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N); 534 535 /* For now, we do not dispatch based on the type of A and P */ 536 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 537 fA = A->ops->ptapnumeric; 538 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 539 fP = P->ops->ptapnumeric; 540 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 541 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 542 543 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 544 ierr = (*fA)(A,P,C);CHKERRQ(ierr); 545 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 546 547 PetscFunctionReturn(0); 548 } 549 550 #undef __FUNCT__ 551 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 552 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 553 { 554 PetscErrorCode ierr; 555 int flops=0; 556 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 557 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 558 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 559 int *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 560 int *ci=c->i,*cj=c->j,*cjj; 561 int am=A->M,cn=C->N,cm=C->M; 562 int i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 563 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 564 565 PetscFunctionBegin; 566 /* Allocate temporary array for storage of one row of A*P */ 567 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr); 568 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr); 569 570 apj = (int *)(apa + cn); 571 apjdense = apj + cn; 572 573 /* Clear old values in C */ 574 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 575 576 for (i=0;i<am;i++) { 577 /* Form sparse row of A*P */ 578 anzi = ai[i+1] - ai[i]; 579 apnzj = 0; 580 for (j=0;j<anzi;j++) { 581 prow = *aj++; 582 pnzj = pi[prow+1] - pi[prow]; 583 pjj = pj + pi[prow]; 584 paj = pa + pi[prow]; 585 for (k=0;k<pnzj;k++) { 586 if (!apjdense[pjj[k]]) { 587 apjdense[pjj[k]] = -1; 588 apj[apnzj++] = pjj[k]; 589 } 590 apa[pjj[k]] += (*aa)*paj[k]; 591 } 592 flops += 2*pnzj; 593 aa++; 594 } 595 596 /* Sort the j index array for quick sparse axpy. */ 597 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 598 599 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 600 pnzi = pi[i+1] - pi[i]; 601 for (j=0;j<pnzi;j++) { 602 nextap = 0; 603 crow = *pJ++; 604 cjj = cj + ci[crow]; 605 caj = ca + ci[crow]; 606 /* Perform sparse axpy operation. Note cjj includes apj. */ 607 for (k=0;nextap<apnzj;k++) { 608 if (cjj[k]==apj[nextap]) { 609 caj[k] += (*pA)*apa[apj[nextap++]]; 610 } 611 } 612 flops += 2*apnzj; 613 pA++; 614 } 615 616 /* Zero the current row info for A*P */ 617 for (j=0;j<apnzj;j++) { 618 apa[apj[j]] = 0.; 619 apjdense[apj[j]] = 0; 620 } 621 } 622 623 /* Assemble the final matrix and clean up */ 624 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 625 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 626 ierr = PetscFree(apa);CHKERRQ(ierr); 627 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 628 629 PetscFunctionReturn(0); 630 } 631 632 /* Compute C = P[rstart:rend,:]^T * A * P of seqaij matrices - used by MatPtAP_MPIAIJ_MPIAIJ() */ 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt" 636 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,int prstart,int prend,Mat C) 637 { 638 PetscErrorCode ierr; 639 int flops=0; 640 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 641 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 642 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 643 int *ai=a->i,*aj=a->j,*apj,*apjdense; 644 int *pi=p->i,*pj=p->j,*pJ=p->j+pi[prstart],*pjj; 645 int *ci=c->i,*cj=c->j,*cjj; 646 int am=A->M,cn=C->N,cm=C->M; 647 int i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 648 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 649 650 PetscFunctionBegin; 651 pA=p->a+pi[prstart]; 652 /* Allocate temporary array for storage of one row of A*P */ 653 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr); 654 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr); 655 656 apj = (int *)(apa + cn); 657 apjdense = apj + cn; 658 659 /* Clear old values in C */ 660 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 661 662 for (i=0;i<am;i++) { 663 /* Form sparse row of A*P */ 664 anzi = ai[i+1] - ai[i]; 665 apnzj = 0; 666 for (j=0;j<anzi;j++) { 667 prow = *aj++; 668 pnzj = pi[prow+1] - pi[prow]; 669 pjj = pj + pi[prow]; 670 paj = pa + pi[prow]; 671 for (k=0;k<pnzj;k++) { 672 if (!apjdense[pjj[k]]) { 673 apjdense[pjj[k]] = -1; 674 apj[apnzj++] = pjj[k]; 675 } 676 apa[pjj[k]] += (*aa)*paj[k]; 677 } 678 flops += 2*pnzj; 679 aa++; 680 } 681 682 /* Sort the j index array for quick sparse axpy. */ 683 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 684 685 /* Compute P[[prstart:prend,:]^T*A*P using outer product (P^T)[:,j+prstart]*(A*P)[j,:]. */ 686 pnzi = pi[i+1+prstart] - pi[i+prstart]; 687 for (j=0;j<pnzi;j++) { 688 nextap = 0; 689 crow = *pJ++; 690 cjj = cj + ci[crow]; 691 caj = ca + ci[crow]; 692 /* Perform sparse axpy operation. Note cjj includes apj. */ 693 for (k=0;nextap<apnzj;k++) { 694 if (cjj[k]==apj[nextap]) { 695 caj[k] += (*pA)*apa[apj[nextap++]]; 696 } 697 } 698 flops += 2*apnzj; 699 pA++; 700 } 701 702 /* Zero the current row info for A*P */ 703 for (j=0;j<apnzj;j++) { 704 apa[apj[j]] = 0.; 705 apjdense[apj[j]] = 0; 706 } 707 } 708 709 /* Assemble the final matrix and clean up */ 710 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 711 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 712 ierr = PetscFree(apa);CHKERRQ(ierr); 713 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 714 715 PetscFunctionReturn(0); 716 } 717 718 #undef __FUNCT__ 719 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt" 720 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,PetscReal fill,int prstart,int prend,Mat *C) { 721 PetscErrorCode ierr; 722 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 723 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 724 int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 725 int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 726 int an=A->N,am=A->M,pn=P->N,pm=P->M; 727 int i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 728 MatScalar *ca; 729 Mat *psub,P_sub; 730 IS isrow,iscol; 731 int m = prend - prstart; 732 733 PetscFunctionBegin; 734 /* Get ij structure of P[rstart:rend,:]^T */ 735 ierr = ISCreateStride(PETSC_COMM_SELF,m,prstart,1,&isrow);CHKERRQ(ierr); 736 ierr = ISCreateStride(PETSC_COMM_SELF,P->n,0,1,&iscol);CHKERRQ(ierr); 737 ierr = MatGetSubMatrices(P,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&psub);CHKERRQ(ierr); 738 ierr = ISDestroy(isrow);CHKERRQ(ierr); 739 ierr = ISDestroy(iscol);CHKERRQ(ierr); 740 P_sub = psub[0]; 741 ierr = MatGetSymbolicTranspose_SeqAIJ(P_sub,&pti,&ptj);CHKERRQ(ierr); 742 ierr = MatDestroyMatrices(1,&psub);CHKERRQ(ierr); 743 ptJ=ptj; 744 745 /* Allocate ci array, arrays for fill computation and */ 746 /* free space for accumulating nonzero column info */ 747 ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr); 748 ci[0] = 0; 749 750 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 751 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 752 ptasparserow = ptadenserow + an; 753 denserow = ptasparserow + an; 754 sparserow = denserow + pn; 755 756 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 757 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 758 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 759 current_space = free_space; 760 761 /* Determine symbolic info for each row of C: */ 762 for (i=0;i<pn;i++) { 763 ptnzi = pti[i+1] - pti[i]; 764 ptanzi = 0; 765 /* Determine symbolic row of PtA_reduced: */ 766 for (j=0;j<ptnzi;j++) { 767 arow = *ptJ++; 768 anzj = ai[arow+1] - ai[arow]; 769 ajj = aj + ai[arow]; 770 for (k=0;k<anzj;k++) { 771 if (!ptadenserow[ajj[k]]) { 772 ptadenserow[ajj[k]] = -1; 773 ptasparserow[ptanzi++] = ajj[k]; 774 } 775 } 776 } 777 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 778 ptaj = ptasparserow; 779 cnzi = 0; 780 for (j=0;j<ptanzi;j++) { 781 prow = *ptaj++; 782 pnzj = pi[prow+1] - pi[prow]; 783 pjj = pj + pi[prow]; 784 for (k=0;k<pnzj;k++) { 785 if (!denserow[pjj[k]]) { 786 denserow[pjj[k]] = -1; 787 sparserow[cnzi++] = pjj[k]; 788 } 789 } 790 } 791 792 /* sort sparserow */ 793 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 794 795 /* If free space is not available, make more free space */ 796 /* Double the amount of total space in the list */ 797 if (current_space->local_remaining<cnzi) { 798 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 799 } 800 801 /* Copy data into free space, and zero out denserows */ 802 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 803 current_space->array += cnzi; 804 current_space->local_used += cnzi; 805 current_space->local_remaining -= cnzi; 806 807 for (j=0;j<ptanzi;j++) { 808 ptadenserow[ptasparserow[j]] = 0; 809 } 810 for (j=0;j<cnzi;j++) { 811 denserow[sparserow[j]] = 0; 812 } 813 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 814 /* For now, we will recompute what is needed. */ 815 ci[i+1] = ci[i] + cnzi; 816 } 817 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 818 /* Allocate space for cj, initialize cj, and */ 819 /* destroy list of free space and other temporary array(s) */ 820 ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 821 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 822 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 823 824 /* Allocate space for ca */ 825 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 826 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 827 828 /* put together the new matrix */ 829 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 830 831 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 832 /* Since these are PETSc arrays, change flags to free them as necessary. */ 833 c = (Mat_SeqAIJ *)((*C)->data); 834 c->freedata = PETSC_TRUE; 835 c->nonew = 0; 836 837 /* Clean up. */ 838 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 839 840 PetscFunctionReturn(0); 841 } 842 843 #undef __FUNCT__ 844 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ_ReducedPt" 845 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,MatReuse scall,PetscReal fill,int prstart,int prend,Mat *C) 846 { 847 PetscErrorCode ierr; 848 PetscFunctionBegin; 849 if (A->m != prend-prstart) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->m,prend-prstart); 850 if (prend-prstart > P->m) SETERRQ2(PETSC_ERR_ARG_SIZ," prend-prstart %d cannot be larger than P->m %d",prend-prstart,P->m); 851 if (scall == MAT_INITIAL_MATRIX){ 852 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(A,P,fill,prstart,prend,C);CHKERRQ(ierr); 853 } 854 855 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(A,P,prstart,prend,*C);CHKERRQ(ierr); 856 857 PetscFunctionReturn(0); 858 } 859 860