1 /*$Id: matmatmult.c,v 1.15 2001/09/07 20:04:44 buschelm Exp $*/ 2 /* 3 Defines matrix-matrix product routines for pairs of SeqAIJ matrices 4 C = A * B 5 C = P^T * A * P 6 C = P * A * P^T 7 */ 8 9 #include "src/mat/impls/aij/seq/aij.h" 10 11 static int logkey_matmatmult = 0; 12 static int logkey_matmatmult_symbolic = 0; 13 static int logkey_matmatmult_numeric = 0; 14 15 static int logkey_matgetsymtranspose = 0; 16 static int logkey_mattranspose = 0; 17 18 static int logkey_matapplyptap = 0; 19 static int logkey_matapplyptap_symbolic = 0; 20 static int logkey_matapplyptap_numeric = 0; 21 22 static int logkey_matapplypapt = 0; 23 static int logkey_matapplypapt_symbolic = 0; 24 static int logkey_matapplypapt_numeric = 0; 25 26 typedef struct _Space *FreeSpaceList; 27 typedef struct _Space { 28 FreeSpaceList more_space; 29 int *array; 30 int *array_head; 31 int total_array_size; 32 int local_used; 33 int local_remaining; 34 } FreeSpace; 35 36 #undef __FUNCT__ 37 #define __FUNCT__ "GetMoreSpace" 38 int GetMoreSpace(int size,FreeSpaceList *list) { 39 FreeSpaceList a; 40 int ierr; 41 42 PetscFunctionBegin; 43 ierr = PetscMalloc(sizeof(FreeSpace),&a);CHKERRQ(ierr); 44 ierr = PetscMalloc(size*sizeof(int),&(a->array_head));CHKERRQ(ierr); 45 a->array = a->array_head; 46 a->local_remaining = size; 47 a->local_used = 0; 48 a->total_array_size = 0; 49 a->more_space = NULL; 50 51 if (*list) { 52 (*list)->more_space = a; 53 a->total_array_size = (*list)->total_array_size; 54 } 55 56 a->total_array_size += size; 57 *list = a; 58 PetscFunctionReturn(0); 59 } 60 61 #undef __FUNCT__ 62 #define __FUNCT__ "MakeSpaceContiguous" 63 int MakeSpaceContiguous(int *space,FreeSpaceList *head) { 64 FreeSpaceList a; 65 int ierr; 66 67 PetscFunctionBegin; 68 while ((*head)!=NULL) { 69 a = (*head)->more_space; 70 ierr = PetscMemcpy(space,(*head)->array_head,((*head)->local_used)*sizeof(int));CHKERRQ(ierr); 71 space += (*head)->local_used; 72 ierr = PetscFree((*head)->array_head);CHKERRQ(ierr); 73 ierr = PetscFree(*head);CHKERRQ(ierr); 74 *head = a; 75 } 76 PetscFunctionReturn(0); 77 } 78 79 /* 80 MatMatMult_Symbolic_SeqAIJ_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices 81 C = A * B; 82 83 Note: C is assumed to be uncreated. 84 If this is not the case, Destroy C before calling this routine. 85 */ 86 #undef __FUNCT__ 87 #define __FUNCT__ "MatMatMult_Symbolic_SeqAIJ_SeqAIJ" 88 int MatMatMult_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C) 89 { 90 int ierr; 91 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 92 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 93 int aishift=a->indexshift,bishift=b->indexshift; 94 int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj; 95 int *ci,*cj,*denserow,*sparserow; 96 int an=A->N,am=A->M,bn=B->N,bm=B->M; 97 int i,j,k,anzi,brow,bnzj,cnzi; 98 MatScalar *ca; 99 100 PetscFunctionBegin; 101 /* some error checking which could be moved into interface layer */ 102 if (aishift || bishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 103 if (an!=bm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",an,bm); 104 105 /* Set up timers */ 106 if (!logkey_matmatmult_symbolic) { 107 ierr = PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMult_Symbolic",MAT_COOKIE);CHKERRQ(ierr); 108 } 109 ierr = PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr); 110 111 /* Set up */ 112 /* Allocate ci array, arrays for fill computation and */ 113 /* free space for accumulating nonzero column info */ 114 ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr); 115 ci[0] = 0; 116 117 ierr = PetscMalloc((2*bn+1)*sizeof(int),&denserow);CHKERRQ(ierr); 118 ierr = PetscMemzero(denserow,(2*bn+1)*sizeof(int));CHKERRQ(ierr); 119 sparserow = denserow + bn; 120 121 /* Initial FreeSpace size is nnz(B)=bi[bm] */ 122 ierr = GetMoreSpace(bi[bm],&free_space);CHKERRQ(ierr); 123 current_space = free_space; 124 125 /* Determine symbolic info for each row of the product: */ 126 for (i=0;i<am;i++) { 127 anzi = ai[i+1] - ai[i]; 128 cnzi = 0; 129 for (j=0;j<anzi;j++) { 130 brow = *aj++; 131 bnzj = bi[brow+1] - bi[brow]; 132 bjj = bj + bi[brow]; 133 for (k=0;k<bnzj;k++) { 134 /* If column is not marked, mark it in compressed and uncompressed locations. */ 135 /* For simplicity, leave uncompressed row unsorted until finished with row, */ 136 /* and increment nonzero count for this row. */ 137 if (!denserow[bjj[k]]) { 138 denserow[bjj[k]] = -1; 139 sparserow[cnzi++] = bjj[k]; 140 } 141 } 142 } 143 144 /* sort sparserow */ 145 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 146 147 /* If free space is not available, make more free space */ 148 /* Double the amount of total space in the list */ 149 if (current_space->local_remaining<cnzi) { 150 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 151 } 152 153 /* Copy data into free space, and zero out denserow */ 154 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 155 current_space->array += cnzi; 156 current_space->local_used += cnzi; 157 current_space->local_remaining -= cnzi; 158 for (j=0;j<cnzi;j++) { 159 denserow[sparserow[j]] = 0; 160 } 161 ci[i+1] = ci[i] + cnzi; 162 } 163 164 /* Column indices are in the list of free space */ 165 /* Allocate space for cj, initialize cj, and */ 166 /* destroy list of free space and other temporary array(s) */ 167 ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr); 168 ierr = MakeSpaceContiguous(cj,&free_space);CHKERRQ(ierr); 169 ierr = PetscFree(denserow);CHKERRQ(ierr); 170 171 /* Allocate space for ca */ 172 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 173 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 174 175 /* put together the new matrix */ 176 ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 177 178 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 179 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 180 c = (Mat_SeqAIJ *)((*C)->data); 181 c->freedata = PETSC_TRUE; 182 c->nonew = 0; 183 184 ierr = PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr); 185 PetscFunctionReturn(0); 186 } 187 188 /* 189 MatMatMult_Numeric_SeqAIJ_SeqAIJ - Forms the numeric product of two SeqAIJ matrices 190 C=A*B; 191 Note: C must have been created by calling MatMatMult_Symbolic_SeqAIJ_SeqAIJ. 192 */ 193 #undef __FUNCT__ 194 #define __FUNCT__ "MatMatMult_Numeric_SeqAIJ_SeqAIJ" 195 int MatMatMult_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 196 { 197 int ierr,flops=0; 198 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 199 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 200 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 201 int aishift=a->indexshift,bishift=b->indexshift,cishift=c->indexshift; 202 int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 203 int an=A->N,am=A->M,bn=B->N,bm=B->M,cn=C->N,cm=C->M; 204 int i,j,k,anzi,bnzi,cnzi,brow; 205 MatScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp; 206 207 PetscFunctionBegin; 208 209 /* This error checking should be unnecessary if the symbolic was performed */ 210 if (aishift || bishift || cishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 211 if (am!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",am,cm); 212 if (an!=bm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",an,bm); 213 if (bn!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",bn,cn); 214 215 /* Set up timers */ 216 if (!logkey_matmatmult_numeric) { 217 ierr = PetscLogEventRegister(&logkey_matmatmult_numeric,"MatMatMult_Numeric",MAT_COOKIE);CHKERRQ(ierr); 218 } 219 ierr = PetscLogEventBegin(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr); 220 221 /* Allocate temp accumulation space to avoid searching for nonzero columns in C */ 222 ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr); 223 ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr); 224 /* Traverse A row-wise. */ 225 /* Build the ith row in C by summing over nonzero columns in A, */ 226 /* the rows of B corresponding to nonzeros of A. */ 227 for (i=0;i<am;i++) { 228 anzi = ai[i+1] - ai[i]; 229 for (j=0;j<anzi;j++) { 230 brow = *aj++; 231 bnzi = bi[brow+1] - bi[brow]; 232 bjj = bj + bi[brow]; 233 baj = ba + bi[brow]; 234 for (k=0;k<bnzi;k++) { 235 temp[bjj[k]] += (*aa)*baj[k]; 236 } 237 flops += 2*bnzi; 238 aa++; 239 } 240 /* Store row back into C, and re-zero temp */ 241 cnzi = ci[i+1] - ci[i]; 242 for (j=0;j<cnzi;j++) { 243 ca[j] = temp[cj[j]]; 244 temp[cj[j]] = 0.0; 245 } 246 ca += cnzi; 247 cj += cnzi; 248 } 249 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 250 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 251 252 /* Free temp */ 253 ierr = PetscFree(temp);CHKERRQ(ierr); 254 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 255 ierr = PetscLogEventEnd(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr); 256 PetscFunctionReturn(0); 257 } 258 259 #undef __FUNCT__ 260 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ" 261 int MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C) { 262 int ierr; 263 264 PetscFunctionBegin; 265 if (!logkey_matmatmult) { 266 ierr = PetscLogEventRegister(&logkey_matmatmult,"MatMatMult",MAT_COOKIE);CHKERRQ(ierr); 267 } 268 ierr = PetscLogEventBegin(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr); 269 ierr = MatMatMult_Symbolic_SeqAIJ_SeqAIJ(A,B,C);CHKERRQ(ierr); 270 ierr = MatMatMult_Numeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 271 ierr = PetscLogEventEnd(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr); 272 PetscFunctionReturn(0); 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatGetSymbolicTranspose_SeqIJ" 277 int MatGetSymbolicTranspose_SeqAIJ(Mat A,int *Ati[],int *Atj[]) { 278 int ierr,i,j,anzj; 279 Mat_SeqAIJ *a=(Mat_SeqAIJ *)A->data; 280 int aishift = a->indexshift,an=A->N,am=A->M; 281 int *ati,*atj,*atfill,*ai=a->i,*aj=a->j; 282 283 PetscFunctionBegin; 284 285 ierr = PetscLogInfo(A,"Getting Symbolic Transpose.\n");CHKERRQ(ierr); 286 if (aishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 287 288 /* Set up timers */ 289 if (!logkey_matgetsymtranspose) { 290 ierr = PetscLogEventRegister(&logkey_matgetsymtranspose,"MatGetSymbolicTranspose",MAT_COOKIE);CHKERRQ(ierr); 291 } 292 ierr = PetscLogEventBegin(logkey_matgetsymtranspose,A,0,0,0);CHKERRQ(ierr); 293 294 /* Allocate space for symbolic transpose info and work array */ 295 ierr = PetscMalloc((an+1)*sizeof(int),&ati);CHKERRQ(ierr); 296 ierr = PetscMalloc(ai[am]*sizeof(int),&atj);CHKERRQ(ierr); 297 ierr = PetscMalloc(an*sizeof(int),&atfill);CHKERRQ(ierr); 298 ierr = PetscMemzero(ati,(an+1)*sizeof(int));CHKERRQ(ierr); 299 300 /* Walk through aj and count ## of non-zeros in each row of A^T. */ 301 /* Note: offset by 1 for fast conversion into csr format. */ 302 for (i=0;i<ai[am];i++) { 303 ati[aj[i]+1] += 1; 304 } 305 /* Form ati for csr format of A^T. */ 306 for (i=0;i<an;i++) { 307 ati[i+1] += ati[i]; 308 } 309 310 /* Copy ati into atfill so we have locations of the next free space in atj */ 311 ierr = PetscMemcpy(atfill,ati,an*sizeof(int));CHKERRQ(ierr); 312 313 /* Walk through A row-wise and mark nonzero entries of A^T. */ 314 for (i=0;i<am;i++) { 315 anzj = ai[i+1] - ai[i]; 316 for (j=0;j<anzj;j++) { 317 atj[atfill[*aj]] = i; 318 atfill[*aj++] += 1; 319 } 320 } 321 322 /* Clean up temporary space and complete requests. */ 323 ierr = PetscFree(atfill);CHKERRQ(ierr); 324 *Ati = ati; 325 *Atj = atj; 326 327 ierr = PetscLogEventEnd(logkey_matgetsymtranspose,A,0,0,0);CHKERRQ(ierr); 328 PetscFunctionReturn(0); 329 } 330 331 extern int MatTranspose_SeqAIJ(Mat A,Mat *B); 332 333 #undef __FUNCT__ 334 #define __FUNCT__ "MatTranspose_SeqIJ_FAST" 335 int MatTranspose_SeqAIJ_FAST(Mat A,Mat *B) { 336 int ierr,i,j,anzj; 337 Mat At; 338 Mat_SeqAIJ *a=(Mat_SeqAIJ *)A->data,*at; 339 int aishift = a->indexshift,an=A->N,am=A->M; 340 int *ati,*atj,*atfill,*ai=a->i,*aj=a->j; 341 MatScalar *ata,*aa=a->a; 342 PetscFunctionBegin; 343 344 if (aishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 345 346 /* Set up timers */ 347 if (!logkey_mattranspose) { 348 ierr = PetscLogEventRegister(&logkey_mattranspose,"MatTranspose_SeqAIJ_FAST",MAT_COOKIE);CHKERRQ(ierr); 349 } 350 ierr = PetscLogEventBegin(logkey_mattranspose,A,0,0,0);CHKERRQ(ierr); 351 352 /* Allocate space for symbolic transpose info and work array */ 353 ierr = PetscMalloc((an+1)*sizeof(int),&ati);CHKERRQ(ierr); 354 ierr = PetscMalloc(ai[am]*sizeof(int),&atj);CHKERRQ(ierr); 355 ierr = PetscMalloc(ai[am]*sizeof(MatScalar),&ata);CHKERRQ(ierr); 356 ierr = PetscMalloc(an*sizeof(int),&atfill);CHKERRQ(ierr); 357 ierr = PetscMemzero(ati,(an+1)*sizeof(int));CHKERRQ(ierr); 358 /* Walk through aj and count ## of non-zeros in each row of A^T. */ 359 /* Note: offset by 1 for fast conversion into csr format. */ 360 for (i=0;i<ai[am];i++) { 361 ati[aj[i]+1] += 1; 362 } 363 /* Form ati for csr format of A^T. */ 364 for (i=0;i<an;i++) { 365 ati[i+1] += ati[i]; 366 } 367 368 /* Copy ati into atfill so we have locations of the next free space in atj */ 369 ierr = PetscMemcpy(atfill,ati,an*sizeof(int));CHKERRQ(ierr); 370 371 /* Walk through A row-wise and mark nonzero entries of A^T. */ 372 for (i=0;i<am;i++) { 373 anzj = ai[i+1] - ai[i]; 374 for (j=0;j<anzj;j++) { 375 atj[atfill[*aj]] = i; 376 ata[atfill[*aj]] = *aa++; 377 atfill[*aj++] += 1; 378 } 379 } 380 381 /* Clean up temporary space and complete requests. */ 382 ierr = PetscFree(atfill);CHKERRQ(ierr); 383 ierr = MatCreateSeqAIJWithArrays(A->comm,an,am,ati,atj,ata,&At);CHKERRQ(ierr); 384 at = (Mat_SeqAIJ *)(At->data); 385 at->freedata = PETSC_TRUE; 386 at->nonew = 0; 387 if (B) { 388 *B = At; 389 } else { 390 ierr = MatHeaderCopy(A,At); 391 } 392 ierr = PetscLogEventEnd(logkey_mattranspose,A,0,0,0);CHKERRQ(ierr); 393 PetscFunctionReturn(0); 394 } 395 396 #undef __FUNCT__ 397 #define __FUNCT__ "MatRestoreSymbolicTranspose" 398 int MatRestoreSymbolicTranspose(Mat A,int *ati[],int *atj[]) { 399 int ierr; 400 401 PetscFunctionBegin; 402 ierr = PetscLogInfo(A,"Restoring Symbolic Transpose.\n");CHKERRQ(ierr); 403 ierr = PetscFree(*ati);CHKERRQ(ierr); 404 ati = PETSC_NULL; 405 ierr = PetscFree(*atj);CHKERRQ(ierr); 406 atj = PETSC_NULL; 407 PetscFunctionReturn(0); 408 } 409 410 /* 411 MatApplyPtAP_Symbolic_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices 412 C = P^T * A * P; 413 414 Note: C is assumed to be uncreated. 415 If this is not the case, Destroy C before calling this routine. 416 */ 417 #undef __FUNCT__ 418 #define __FUNCT__ "MatApplyPtAP_Symbolic_SeqAIJ" 419 int MatApplyPtAP_Symbolic_SeqAIJ(Mat A,Mat P,Mat *C) { 420 int ierr; 421 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 422 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 423 int aishift=a->indexshift,pishift=p->indexshift; 424 int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 425 int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 426 int an=A->N,am=A->M,pn=P->N,pm=P->M; 427 int i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 428 MatScalar *ca; 429 430 PetscFunctionBegin; 431 432 /* some error checking which could be moved into interface layer */ 433 if (aishift || pishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 434 if (pm!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm,an); 435 if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an); 436 437 /* Set up timers */ 438 if (!logkey_matapplyptap_symbolic) { 439 ierr = PetscLogEventRegister(&logkey_matapplyptap_symbolic,"MatApplyPtAP_Symbolic",MAT_COOKIE);CHKERRQ(ierr); 440 } 441 ierr = PetscLogEventBegin(logkey_matapplyptap_symbolic,A,P,0,0);CHKERRQ(ierr); 442 443 /* Get ij structure of P^T */ 444 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 445 ptJ=ptj; 446 447 /* Allocate ci array, arrays for fill computation and */ 448 /* free space for accumulating nonzero column info */ 449 ierr = PetscMalloc(((pn+1)*1)*sizeof(int),&ci);CHKERRQ(ierr); 450 ci[0] = 0; 451 452 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 453 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 454 ptasparserow = ptadenserow + an; 455 denserow = ptasparserow + an; 456 sparserow = denserow + pn; 457 458 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 459 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 460 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 461 current_space = free_space; 462 463 /* Determine symbolic info for each row of C: */ 464 for (i=0;i<pn;i++) { 465 ptnzi = pti[i+1] - pti[i]; 466 ptanzi = 0; 467 /* Determine symbolic row of PtA: */ 468 for (j=0;j<ptnzi;j++) { 469 arow = *ptJ++; 470 anzj = ai[arow+1] - ai[arow]; 471 ajj = aj + ai[arow]; 472 for (k=0;k<anzj;k++) { 473 if (!ptadenserow[ajj[k]]) { 474 ptadenserow[ajj[k]] = -1; 475 ptasparserow[ptanzi++] = ajj[k]; 476 } 477 } 478 } 479 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 480 ptaj = ptasparserow; 481 cnzi = 0; 482 for (j=0;j<ptanzi;j++) { 483 prow = *ptaj++; 484 pnzj = pi[prow+1] - pi[prow]; 485 pjj = pj + pi[prow]; 486 for (k=0;k<pnzj;k++) { 487 if (!denserow[pjj[k]]) { 488 denserow[pjj[k]] = -1; 489 sparserow[cnzi++] = pjj[k]; 490 } 491 } 492 } 493 494 /* sort sparserow */ 495 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 496 497 /* If free space is not available, make more free space */ 498 /* Double the amount of total space in the list */ 499 if (current_space->local_remaining<cnzi) { 500 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 501 } 502 503 /* Copy data into free space, and zero out denserows */ 504 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 505 current_space->array += cnzi; 506 current_space->local_used += cnzi; 507 current_space->local_remaining -= cnzi; 508 509 for (j=0;j<ptanzi;j++) { 510 ptadenserow[ptasparserow[j]] = 0; 511 } 512 for (j=0;j<cnzi;j++) { 513 denserow[sparserow[j]] = 0; 514 } 515 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 516 /* For now, we will recompute what is needed. */ 517 ci[i+1] = ci[i] + cnzi; 518 } 519 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 520 /* Allocate space for cj, initialize cj, and */ 521 /* destroy list of free space and other temporary array(s) */ 522 ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 523 ierr = MakeSpaceContiguous(cj,&free_space);CHKERRQ(ierr); 524 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 525 526 /* Allocate space for ca */ 527 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 528 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 529 530 /* put together the new matrix */ 531 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 532 533 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 534 /* Since these are PETSc arrays, change flags to free them as necessary. */ 535 c = (Mat_SeqAIJ *)((*C)->data); 536 c->freedata = PETSC_TRUE; 537 c->nonew = 0; 538 539 /* Clean up. */ 540 ierr = MatRestoreSymbolicTranspose(P,&pti,&ptj);CHKERRQ(ierr); 541 542 ierr = PetscLogEventEnd(logkey_matapplyptap_symbolic,A,P,0,0);CHKERRQ(ierr); 543 PetscFunctionReturn(0); 544 } 545 546 /* 547 MatApplyPtAP_Numeric_SeqAIJ - Forms the numeric product of two SeqAIJ matrices 548 C = P^T * A * P; 549 Note: C must have been created by calling MatApplyPtAP_Symbolic_SeqAIJ. 550 */ 551 #undef __FUNCT__ 552 #define __FUNCT__ "MatApplyPtAP_Numeric_SeqAIJ" 553 int MatApplyPtAP_Numeric_SeqAIJ(Mat A,Mat P,Mat C) { 554 int ierr,flops=0; 555 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 556 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 557 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 558 int aishift=a->indexshift,pishift=p->indexshift,cishift=c->indexshift; 559 int *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 560 int *ci=c->i,*cj=c->j,*cjj; 561 int an=A->N,am=A->M,pn=P->N,pm=P->M,cn=C->N,cm=C->M; 562 int i,j,k,anzi,pnzi,apnzj,nextap,pnzj,cnzj,prow,crow; 563 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 564 565 PetscFunctionBegin; 566 567 /* This error checking should be unnecessary if the symbolic was performed */ 568 if (aishift || pishift || cishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 569 if (pn!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn,cm); 570 if (pm!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm,an); 571 if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an); 572 if (pn!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn, cn); 573 574 /* Set up timers */ 575 if (!logkey_matapplyptap_numeric) { 576 ierr = PetscLogEventRegister(&logkey_matapplyptap_numeric,"MatApplyPtAP_Numeric",MAT_COOKIE);CHKERRQ(ierr); 577 } 578 ierr = PetscLogEventBegin(logkey_matapplyptap_numeric,A,P,C,0);CHKERRQ(ierr); 579 580 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr); 581 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr); 582 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 583 584 apj = (int *)(apa + cn); 585 apjdense = apj + cn; 586 587 for (i=0;i<am;i++) { 588 /* Form sparse row of A*P */ 589 anzi = ai[i+1] - ai[i]; 590 apnzj = 0; 591 for (j=0;j<anzi;j++) { 592 prow = *aj++; 593 pnzj = pi[prow+1] - pi[prow]; 594 pjj = pj + pi[prow]; 595 paj = pa + pi[prow]; 596 for (k=0;k<pnzj;k++) { 597 if (!apjdense[pjj[k]]) { 598 apjdense[pjj[k]] = -1; 599 apj[apnzj++] = pjj[k]; 600 } 601 apa[pjj[k]] += (*aa)*paj[k]; 602 } 603 flops += 2*pnzj; 604 aa++; 605 } 606 607 /* Sort the j index array for quick sparse axpy. */ 608 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 609 610 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 611 pnzi = pi[i+1] - pi[i]; 612 for (j=0;j<pnzi;j++) { 613 nextap = 0; 614 crow = *pJ++; 615 cnzj = ci[crow+1] - ci[crow]; 616 cjj = cj + ci[crow]; 617 caj = ca + ci[crow]; 618 /* Perform sparse axpy operation. Note cjj includes apj. */ 619 for (k=0;nextap<apnzj;k++) { 620 if (cjj[k]==apj[nextap]) { 621 caj[k] += (*pA)*apa[apj[nextap++]]; 622 } 623 } 624 flops += 2*apnzj; 625 pA++; 626 } 627 628 /* Zero the current row info for A*P */ 629 for (j=0;j<apnzj;j++) { 630 apa[apj[j]] = 0.; 631 apjdense[apj[j]] = 0; 632 } 633 } 634 635 /* Assemble the final matrix and clean up */ 636 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 637 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 638 ierr = PetscFree(apa);CHKERRQ(ierr); 639 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 640 ierr = PetscLogEventEnd(logkey_matapplyptap_numeric,A,P,C,0);CHKERRQ(ierr); 641 642 PetscFunctionReturn(0); 643 } 644 645 646 #undef __FUNCT__ 647 #define __FUNCT__ "MatApplyPtAP_SeqAIJ" 648 int MatApplyPtAP_SeqAIJ(Mat A,Mat P,Mat *C) { 649 int ierr; 650 651 PetscFunctionBegin; 652 if (!logkey_matapplyptap) { 653 ierr = PetscLogEventRegister(&logkey_matapplyptap,"MatApplyPtAP",MAT_COOKIE);CHKERRQ(ierr); 654 } 655 ierr = PetscLogEventBegin(logkey_matapplyptap,A,P,0,0);CHKERRQ(ierr); 656 657 ierr = MatApplyPtAP_Symbolic_SeqAIJ(A,P,C);CHKERRQ(ierr); 658 ierr = MatApplyPtAP_Numeric_SeqAIJ(A,P,*C);CHKERRQ(ierr); 659 660 ierr = PetscLogEventEnd(logkey_matapplyptap,A,P,0,0);CHKERRQ(ierr); 661 PetscFunctionReturn(0); 662 } 663 664 /* 665 MatApplyPAPt_Symbolic_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices 666 C = P * A * P^T; 667 668 Note: C is assumed to be uncreated. 669 If this is not the case, Destroy C before calling this routine. 670 */ 671 #undef __FUNCT__ 672 #define __FUNCT__ "MatApplyPAPt_Symbolic_SeqAIJ" 673 int MatApplyPAPt_Symbolic_SeqAIJ(Mat A,Mat P,Mat *C) { 674 /* Note: This code is virtually identical to that of MatApplyPtAP_SeqAIJ_Symbolic */ 675 /* and MatMatMult_SeqAIJ_SeqAIJ_Symbolic. Perhaps they could be merged nicely. */ 676 int ierr; 677 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 678 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 679 int aishift=a->indexshift,pishift=p->indexshift; 680 int *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pti,*ptj,*ptjj; 681 int *ci,*cj,*paj,*padenserow,*pasparserow,*denserow,*sparserow; 682 int an=A->N,am=A->M,pn=P->N,pm=P->M; 683 int i,j,k,pnzi,arow,anzj,panzi,ptrow,ptnzj,cnzi; 684 MatScalar *ca; 685 686 PetscFunctionBegin; 687 688 /* some error checking which could be moved into interface layer */ 689 if (aishift || pishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 690 if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn,am); 691 if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an); 692 693 /* Set up timers */ 694 if (!logkey_matapplypapt_symbolic) { 695 ierr = PetscLogEventRegister(&logkey_matapplypapt_symbolic,"MatApplyPAPt_Symbolic",MAT_COOKIE);CHKERRQ(ierr); 696 } 697 ierr = PetscLogEventBegin(logkey_matapplypapt_symbolic,A,P,0,0);CHKERRQ(ierr); 698 699 /* Create ij structure of P^T */ 700 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 701 702 /* Allocate ci array, arrays for fill computation and */ 703 /* free space for accumulating nonzero column info */ 704 ierr = PetscMalloc(((pm+1)*1)*sizeof(int),&ci);CHKERRQ(ierr); 705 ci[0] = 0; 706 707 ierr = PetscMalloc((2*an+2*pm+1)*sizeof(int),&padenserow);CHKERRQ(ierr); 708 ierr = PetscMemzero(padenserow,(2*an+2*pm+1)*sizeof(int));CHKERRQ(ierr); 709 pasparserow = padenserow + an; 710 denserow = pasparserow + an; 711 sparserow = denserow + pm; 712 713 /* Set initial free space to be nnz(A) scaled by aspect ratio of Pt. */ 714 /* This should be reasonable if sparsity of PAPt is similar to that of A. */ 715 ierr = GetMoreSpace((ai[am]/pn)*pm,&free_space); 716 current_space = free_space; 717 718 /* Determine fill for each row of C: */ 719 for (i=0;i<pm;i++) { 720 pnzi = pi[i+1] - pi[i]; 721 panzi = 0; 722 /* Get symbolic sparse row of PA: */ 723 for (j=0;j<pnzi;j++) { 724 arow = *pj++; 725 anzj = ai[arow+1] - ai[arow]; 726 ajj = aj + ai[arow]; 727 for (k=0;k<anzj;k++) { 728 if (!padenserow[ajj[k]]) { 729 padenserow[ajj[k]] = -1; 730 pasparserow[panzi++] = ajj[k]; 731 } 732 } 733 } 734 /* Using symbolic row of PA, determine symbolic row of C: */ 735 paj = pasparserow; 736 cnzi = 0; 737 for (j=0;j<panzi;j++) { 738 ptrow = *paj++; 739 ptnzj = pti[ptrow+1] - pti[ptrow]; 740 ptjj = ptj + pti[ptrow]; 741 for (k=0;k<ptnzj;k++) { 742 if (!denserow[ptjj[k]]) { 743 denserow[ptjj[k]] = -1; 744 sparserow[cnzi++] = ptjj[k]; 745 } 746 } 747 } 748 749 /* sort sparse representation */ 750 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 751 752 /* If free space is not available, make more free space */ 753 /* Double the amount of total space in the list */ 754 if (current_space->local_remaining<cnzi) { 755 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 756 } 757 758 /* Copy data into free space, and zero out dense row */ 759 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 760 current_space->array += cnzi; 761 current_space->local_used += cnzi; 762 current_space->local_remaining -= cnzi; 763 764 for (j=0;j<panzi;j++) { 765 padenserow[pasparserow[j]] = 0; 766 } 767 for (j=0;j<cnzi;j++) { 768 denserow[sparserow[j]] = 0; 769 } 770 ci[i+1] = ci[i] + cnzi; 771 } 772 /* column indices are in the list of free space */ 773 /* Allocate space for cj, initialize cj, and */ 774 /* destroy list of free space and other temporary array(s) */ 775 ierr = PetscMalloc((ci[pm]+1)*sizeof(int),&cj);CHKERRQ(ierr); 776 ierr = MakeSpaceContiguous(cj,&free_space);CHKERRQ(ierr); 777 ierr = PetscFree(padenserow);CHKERRQ(ierr); 778 779 /* Allocate space for ca */ 780 ierr = PetscMalloc((ci[pm]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 781 ierr = PetscMemzero(ca,(ci[pm]+1)*sizeof(MatScalar));CHKERRQ(ierr); 782 783 /* put together the new matrix */ 784 ierr = MatCreateSeqAIJWithArrays(A->comm,pm,pm,ci,cj,ca,C);CHKERRQ(ierr); 785 786 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 787 /* Since these are PETSc arrays, change flags to free them as necessary. */ 788 c = (Mat_SeqAIJ *)((*C)->data); 789 c->freedata = PETSC_TRUE; 790 c->nonew = 0; 791 792 /* Clean up. */ 793 ierr = MatRestoreSymbolicTranspose(P,&pti,&ptj);CHKERRQ(ierr); 794 795 ierr = PetscLogEventEnd(logkey_matapplypapt_symbolic,A,P,0,0);CHKERRQ(ierr); 796 PetscFunctionReturn(0); 797 } 798 799 /* 800 MatApplyPAPt_Numeric_SeqAIJ - Forms the numeric product of two SeqAIJ matrices 801 C = P * A * P^T; 802 Note: C must have been created by calling MatApplyPAPt_Symbolic_SeqAIJ. 803 */ 804 #undef __FUNCT__ 805 #define __FUNCT__ "MatApplyPAPt_Numeric_SeqAIJ" 806 int MatApplyPAPt_Numeric_SeqAIJ(Mat A,Mat P,Mat C) { 807 int ierr,flops=0; 808 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 809 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 810 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 811 int aishift=a->indexshift,pishift=p->indexshift,cishift=c->indexshift; 812 int *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj=p->j,*paj,*pajdense,*ptj; 813 int *ci=c->i,*cj=c->j; 814 int an=A->N,am=A->M,pn=P->N,pm=P->M,cn=C->N,cm=C->M; 815 int i,j,k,k1,k2,pnzi,anzj,panzj,arow,ptcol,ptnzj,cnzi; 816 MatScalar *aa=a->a,*pa=p->a,*pta=p->a,*ptaj,*paa,*aaj,*ca=c->a,sum; 817 818 PetscFunctionBegin; 819 820 /* This error checking should be unnecessary if the symbolic was performed */ 821 if (aishift || pishift || cishift) SETERRQ(PETSC_ERR_SUP,"Shifted matrix indices are not supported."); 822 if (pm!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm,cm); 823 if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pn,am); 824 if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",am, an); 825 if (pm!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",pm, cn); 826 827 /* Set up timers */ 828 if (!logkey_matapplypapt_numeric) { 829 ierr = PetscLogEventRegister(&logkey_matapplypapt_numeric,"MatApplyPAPt_Numeric",MAT_COOKIE);CHKERRQ(ierr); 830 } 831 ierr = PetscLogEventBegin(logkey_matapplypapt_numeric,A,P,C,0);CHKERRQ(ierr); 832 833 ierr = PetscMalloc(an*(sizeof(MatScalar)+2*sizeof(int)),&paa);CHKERRQ(ierr); 834 ierr = PetscMemzero(paa,an*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr); 835 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 836 837 paj = (int *)(paa + an); 838 pajdense = paj + an; 839 840 for (i=0;i<pm;i++) { 841 /* Form sparse row of P*A */ 842 pnzi = pi[i+1] - pi[i]; 843 panzj = 0; 844 for (j=0;j<pnzi;j++) { 845 arow = *pj++; 846 anzj = ai[arow+1] - ai[arow]; 847 ajj = aj + ai[arow]; 848 aaj = aa + ai[arow]; 849 for (k=0;k<anzj;k++) { 850 if (!pajdense[ajj[k]]) { 851 pajdense[ajj[k]] = -1; 852 paj[panzj++] = ajj[k]; 853 } 854 paa[ajj[k]] += (*pa)*aaj[k]; 855 } 856 flops += 2*anzj; 857 pa++; 858 } 859 860 /* Sort the j index array for quick sparse axpy. */ 861 ierr = PetscSortInt(panzj,paj);CHKERRQ(ierr); 862 863 /* Compute P*A*P^T using sparse inner products. */ 864 /* Take advantage of pre-computed (i,j) of C for locations of non-zeros. */ 865 cnzi = ci[i+1] - ci[i]; 866 for (j=0;j<cnzi;j++) { 867 /* Form sparse inner product of current row of P*A with (*cj++) col of P^T. */ 868 ptcol = *cj++; 869 ptnzj = pi[ptcol+1] - pi[ptcol]; 870 ptj = pjj + pi[ptcol]; 871 ptaj = pta + pi[ptcol]; 872 sum = 0.; 873 k1 = 0; 874 k2 = 0; 875 while ((k1<panzj) && (k2<ptnzj)) { 876 if (paj[k1]==ptj[k2]) { 877 sum += paa[paj[k1++]]*pta[k2++]; 878 } else if (paj[k1] < ptj[k2]) { 879 k1++; 880 } else /* if (paj[k1] > ptj[k2]) */ { 881 k2++; 882 } 883 } 884 *ca++ = sum; 885 } 886 887 /* Zero the current row info for P*A */ 888 for (j=0;j<panzj;j++) { 889 paa[paj[j]] = 0.; 890 pajdense[paj[j]] = 0; 891 } 892 } 893 894 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 895 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 896 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 897 ierr = PetscLogEventEnd(logkey_matapplypapt_numeric,A,P,C,0);CHKERRQ(ierr); 898 PetscFunctionReturn(0); 899 } 900 901 #undef __FUNCT__ 902 #define __FUNCT__ "MatApplyPAPt_SeqAIJ" 903 int MatApplyPAPt_SeqAIJ(Mat A,Mat P,Mat *C) { 904 int ierr; 905 906 PetscFunctionBegin; 907 if (!logkey_matapplypapt) { 908 ierr = PetscLogEventRegister(&logkey_matapplypapt,"MatApplyPAPt",MAT_COOKIE);CHKERRQ(ierr); 909 } 910 ierr = PetscLogEventBegin(logkey_matapplypapt,A,P,0,0);CHKERRQ(ierr); 911 ierr = MatApplyPAPt_Symbolic_SeqAIJ(A,P,C);CHKERRQ(ierr); 912 ierr = MatApplyPAPt_Numeric_SeqAIJ(A,P,*C);CHKERRQ(ierr); 913 ierr = PetscLogEventEnd(logkey_matapplypapt,A,P,0,0);CHKERRQ(ierr); 914 PetscFunctionReturn(0); 915 } 916