1 /* 2 Defines matrix-matrix product routines for pairs of SeqAIJ matrices 3 C = A * B 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7 #include "src/mat/utils/freespace.h" 8 9 /* 10 Add a index set into a sorted linked list 11 input: 12 nidx - number of input indices 13 indices - interger array 14 idx_head - the header of the list 15 idx_unset - the value indicating the entry in the list is not set yet 16 lnk - linked list(an integer array) that is created 17 output: 18 nlnk - number of newly added indices 19 lnk - the sorted(increasing order) linked list containing new and non-redundate entries from indices 20 */ 21 #undef __FUNCT__ 22 #define __FUNCT__ "LnklistAdd" 23 int LnklistAdd(int nidx,int *indices,int idx_head,int idx_unset,int *nlnk,int *lnk) 24 { 25 int i,idx,lidx,entry,n; 26 27 PetscFunctionBegin; 28 n = 0; 29 lidx = idx_head; 30 i = nidx; 31 while (i){ 32 /* assume indices is almost in increasing order, starting from its end saves computation */ 33 entry = indices[--i]; 34 if (lnk[entry] == idx_unset) { /* new entry */ 35 do { 36 idx = lidx; 37 lidx = lnk[idx]; 38 } while (entry > lidx); 39 lnk[idx] = entry; 40 lnk[entry] = lidx; 41 n++; 42 } 43 } 44 *nlnk = n; 45 PetscFunctionReturn(0); 46 } 47 48 49 static int logkey_matmatmult = 0; 50 static int logkey_matmatmult_symbolic = 0; 51 static int logkey_matmatmult_numeric = 0; 52 53 #undef __FUNCT__ 54 #define __FUNCT__ "MatMatMult" 55 /*@ 56 MatMatMult - Performs Matrix-Matrix Multiplication C=A*B. 57 58 Collective on Mat 59 60 Input Parameters: 61 + A - the left matrix 62 - B - the right matrix 63 64 Output Parameters: 65 . C - the product matrix 66 67 Notes: 68 C will be created and must be destroyed by the user with MatDestroy(). 69 70 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 71 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 72 73 Level: intermediate 74 75 .seealso: MatMatMultSymbolic(),MatMatMultNumeric() 76 @*/ 77 int MatMatMult(Mat A,Mat B, Mat *C) { 78 /* Perhaps this "interface" routine should be moved into the interface directory.*/ 79 /* To facilitate implementations with varying types, QueryFunction is used.*/ 80 /* It is assumed that implementations will be composed as "MatMatMult_<type of A><type of B>". */ 81 int ierr; 82 char funct[80]; 83 int (*mult)(Mat,Mat,Mat*); 84 85 PetscFunctionBegin; 86 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 87 PetscValidType(A,1); 88 MatPreallocated(A); 89 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 90 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 91 92 PetscValidHeaderSpecific(B,MAT_COOKIE,2); 93 PetscValidType(B,2); 94 MatPreallocated(B); 95 if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 96 if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 97 98 PetscValidPointer(C,3); 99 100 if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 101 102 /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */ 103 /* When other implementations exist, attack the multiple dispatch problem. */ 104 ierr = PetscStrcpy(funct,"MatMatMult_seqaijseqaij");CHKERRQ(ierr); 105 ierr = PetscObjectQueryFunction((PetscObject)B,funct,(PetscVoidFunction)&mult);CHKERRQ(ierr); 106 if (!mult) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name); 107 ierr = PetscObjectQueryFunction((PetscObject)A,funct,(PetscVoidFunction)&mult);CHKERRQ(ierr); 108 if (!mult) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name); 109 110 ierr = (*mult)(A,B,C);CHKERRQ(ierr); 111 PetscFunctionReturn(0); 112 } 113 114 #undef __FUNCT__ 115 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ" 116 int MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B, Mat *C) { 117 int ierr; 118 char symfunct[80],numfunct[80]; 119 int (*symbolic)(Mat,Mat,Mat*),(*numeric)(Mat,Mat,Mat); 120 121 PetscFunctionBegin; 122 123 /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */ 124 /* When other implementations exist, attack the multiple dispatch problem. */ 125 ierr = PetscStrcpy(symfunct,"MatMatMultSymbolic_seqaijseqaij");CHKERRQ(ierr); 126 ierr = PetscObjectQueryFunction((PetscObject)B,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr); 127 if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name); 128 ierr = PetscObjectQueryFunction((PetscObject)A,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr); 129 if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name); 130 131 ierr = PetscStrcpy(numfunct,"MatMatMultNumeric_seqaijseqaij");CHKERRQ(ierr); 132 ierr = PetscObjectQueryFunction((PetscObject)A,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr); 133 if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name); 134 ierr = PetscObjectQueryFunction((PetscObject)B,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr); 135 if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name); 136 137 ierr = PetscLogEventBegin(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr); 138 ierr = (*symbolic)(A,B,C);CHKERRQ(ierr); 139 ierr = (*numeric)(A,B,*C);CHKERRQ(ierr); 140 ierr = PetscLogEventEnd(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr); 141 PetscFunctionReturn(0); 142 } 143 144 #undef __FUNCT__ 145 #define __FUNCT__ "MatMatMultSymbolic" 146 /*@ 147 MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure 148 of the matrix-matrix product C=A*B. Call this routine before calling MatMatMultNumeric(). 149 150 Collective on Mat 151 152 Input Parameters: 153 + A - the left matrix 154 - B - the right matrix 155 156 Output Parameters: 157 . C - the matrix containing the ij structure of product matrix 158 159 Notes: 160 C will be created as a MATSEQAIJ matrix and must be destroyed by the user with MatDestroy(). 161 162 This routine is currently only implemented for SeqAIJ matrices and classes which inherit from SeqAIJ. 163 164 Level: intermediate 165 166 .seealso: MatMatMult(),MatMatMultNumeric() 167 @*/ 168 int MatMatMultSymbolic(Mat A,Mat B,Mat *C) { 169 /* Perhaps this "interface" routine should be moved into the interface directory.*/ 170 /* To facilitate implementations with varying types, QueryFunction is used.*/ 171 /* It is assumed that implementations will be composed as "MatMatMultSymbolic_<type of A><type of B>". */ 172 int ierr; 173 char symfunct[80]; 174 int (*symbolic)(Mat,Mat,Mat *); 175 176 PetscFunctionBegin; 177 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 178 PetscValidType(A,1); 179 MatPreallocated(A); 180 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 181 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 182 183 PetscValidHeaderSpecific(B,MAT_COOKIE,2); 184 PetscValidType(B,2); 185 MatPreallocated(B); 186 if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 187 if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 188 PetscValidPointer(C,3); 189 190 191 if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 192 193 /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */ 194 /* When other implementations exist, attack the multiple dispatch problem. */ 195 ierr = PetscStrcpy(symfunct,"MatMatMultSymbolic_seqaijseqaij");CHKERRQ(ierr); 196 ierr = PetscObjectQueryFunction((PetscObject)B,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr); 197 if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name); 198 ierr = PetscObjectQueryFunction((PetscObject)A,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr); 199 if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name); 200 ierr = (*symbolic)(A,B,C);CHKERRQ(ierr); 201 202 PetscFunctionReturn(0); 203 } 204 205 #undef __FUNCT__ 206 #define __FUNCT__ "MatMatMult_Symbolic_SeqAIJ_SeqAIJ" 207 int MatMatMult_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C) 208 { 209 int ierr; 210 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 211 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 212 int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj; 213 int *ci,*cj,*lnk,idx0,idx; 214 int am=A->M,bn=B->N,bm=B->M; 215 int i,j,k,anzi,brow,bnzj,cnzi,nlnk; 216 MatScalar *ca; 217 218 PetscFunctionBegin; 219 /* Start timers */ 220 ierr = PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr); 221 222 /* Set up */ 223 /* Allocate ci array, arrays for fill computation and */ 224 /* free space for accumulating nonzero column info */ 225 ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr); 226 ci[0] = 0; 227 228 ierr = PetscMalloc((bn+1)*sizeof(int),&lnk);CHKERRQ(ierr); 229 for (i=0; i<bn; i++) lnk[i] = -1; 230 231 /* Initial FreeSpace size is nnz(B)=4*bi[bm] */ 232 ierr = GetMoreSpace(4*bi[bm],&free_space);CHKERRQ(ierr); 233 current_space = free_space; 234 235 /* Determine symbolic info for each row of the product: */ 236 for (i=0;i<am;i++) { 237 anzi = ai[i+1] - ai[i]; 238 cnzi = 0; 239 lnk[bn] = bn; 240 for (j=0;j<anzi;j++) { 241 brow = *aj++; 242 bnzj = bi[brow+1] - bi[brow]; 243 bjj = bj + bi[brow]; 244 /* add non-zero cols of B into the sorted linked list lnk */ 245 ierr = LnklistAdd(bnzj,bjj,bn,-1,&nlnk,lnk);CHKERRQ(ierr); 246 cnzi += nlnk; 247 } 248 249 /* If free space is not available, make more free space */ 250 /* Double the amount of total space in the list */ 251 if (current_space->local_remaining<cnzi) { 252 printf("...%d -th row, double space ...\n",i); 253 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 254 } 255 256 /* Copy data into free space, and zero out denserow and lnk */ 257 idx = bn; 258 for (j=0; j<cnzi; j++){ 259 idx0 = idx; 260 idx = lnk[idx0]; 261 *current_space->array++ = idx; 262 lnk[idx0] = -1; 263 } 264 lnk[idx] = -1; 265 266 current_space->local_used += cnzi; 267 current_space->local_remaining -= cnzi; 268 269 ci[i+1] = ci[i] + cnzi; 270 } 271 272 /* Column indices are in the list of free space */ 273 /* Allocate space for cj, initialize cj, and */ 274 /* destroy list of free space and other temporary array(s) */ 275 ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr); 276 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 277 ierr = PetscFree(lnk);CHKERRQ(ierr); 278 279 /* Allocate space for ca */ 280 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 281 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 282 283 /* put together the new matrix */ 284 ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 285 286 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 287 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 288 c = (Mat_SeqAIJ *)((*C)->data); 289 c->freedata = PETSC_TRUE; 290 c->nonew = 0; 291 292 ierr = PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr); 293 PetscFunctionReturn(0); 294 } 295 296 #undef __FUNCT__ 297 #define __FUNCT__ "MatMatMultNumeric" 298 /*@ 299 MatMatMultNumeric - Performs the numeric matrix-matrix product. 300 Call this routine after first calling MatMatMultSymbolic(). 301 302 Collective on Mat 303 304 Input Parameters: 305 + A - the left matrix 306 - B - the right matrix 307 308 Output Parameters: 309 . C - the product matrix, whose ij structure was defined from MatMatMultSymbolic(). 310 311 Notes: 312 C must have been created with MatMatMultSymbolic. 313 314 This routine is currently only implemented for SeqAIJ type matrices. 315 316 Level: intermediate 317 318 .seealso: MatMatMult(),MatMatMultSymbolic() 319 @*/ 320 int MatMatMultNumeric(Mat A,Mat B,Mat C){ 321 /* Perhaps this "interface" routine should be moved into the interface directory.*/ 322 /* To facilitate implementations with varying types, QueryFunction is used.*/ 323 /* It is assumed that implementations will be composed as "MatMatMultNumeric_<type of A><type of B>". */ 324 int ierr; 325 char numfunct[80]; 326 int (*numeric)(Mat,Mat,Mat); 327 328 PetscFunctionBegin; 329 330 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 331 PetscValidType(A,1); 332 MatPreallocated(A); 333 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 334 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 335 336 PetscValidHeaderSpecific(B,MAT_COOKIE,2); 337 PetscValidType(B,2); 338 MatPreallocated(B); 339 if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 340 if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 341 342 PetscValidHeaderSpecific(C,MAT_COOKIE,3); 343 PetscValidType(C,3); 344 MatPreallocated(C); 345 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 346 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 347 348 if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->N,C->N); 349 if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 350 if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->M,C->M); 351 352 /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */ 353 /* When other implementations exist, attack the multiple dispatch problem. */ 354 ierr = PetscStrcpy(numfunct,"MatMatMultNumeric_seqaijseqaij");CHKERRQ(ierr); 355 ierr = PetscObjectQueryFunction((PetscObject)A,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr); 356 if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name); 357 ierr = PetscObjectQueryFunction((PetscObject)B,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr); 358 if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name); 359 360 ierr = (*numeric)(A,B,C);CHKERRQ(ierr); 361 362 PetscFunctionReturn(0); 363 } 364 365 #undef __FUNCT__ 366 #define __FUNCT__ "MatMatMult_Numeric_SeqAIJ_SeqAIJ" 367 int MatMatMult_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 368 { 369 int ierr,flops=0; 370 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 371 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 372 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 373 int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 374 int am=A->M,cn=C->N; 375 int i,j,k,anzi,bnzi,cnzi,brow; 376 MatScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp; 377 378 PetscFunctionBegin; 379 380 /* Start timers */ 381 ierr = PetscLogEventBegin(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr); 382 383 /* Allocate temp accumulation space to avoid searching for nonzero columns in C */ 384 ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr); 385 ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr); 386 /* Traverse A row-wise. */ 387 /* Build the ith row in C by summing over nonzero columns in A, */ 388 /* the rows of B corresponding to nonzeros of A. */ 389 for (i=0;i<am;i++) { 390 anzi = ai[i+1] - ai[i]; 391 for (j=0;j<anzi;j++) { 392 brow = *aj++; 393 bnzi = bi[brow+1] - bi[brow]; 394 bjj = bj + bi[brow]; 395 baj = ba + bi[brow]; 396 for (k=0;k<bnzi;k++) { 397 temp[bjj[k]] += (*aa)*baj[k]; 398 } 399 flops += 2*bnzi; 400 aa++; 401 } 402 /* Store row back into C, and re-zero temp */ 403 cnzi = ci[i+1] - ci[i]; 404 for (j=0;j<cnzi;j++) { 405 ca[j] = temp[cj[j]]; 406 temp[cj[j]] = 0.0; 407 } 408 ca += cnzi; 409 cj += cnzi; 410 } 411 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 412 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 413 414 /* Free temp */ 415 ierr = PetscFree(temp);CHKERRQ(ierr); 416 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 417 ierr = PetscLogEventEnd(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr); 418 PetscFunctionReturn(0); 419 } 420 421 #undef __FUNCT__ 422 #define __FUNCT__ "RegisterMatMatMultRoutines_Private" 423 int RegisterMatMatMultRoutines_Private(Mat A) { 424 int ierr; 425 426 PetscFunctionBegin; 427 if (!logkey_matmatmult) { 428 ierr = PetscLogEventRegister(&logkey_matmatmult,"MatMatMult",MAT_COOKIE);CHKERRQ(ierr); 429 } 430 ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMult_seqaijseqaij", 431 "MatMatMult_SeqAIJ_SeqAIJ", 432 MatMatMult_SeqAIJ_SeqAIJ);CHKERRQ(ierr); 433 if (!logkey_matmatmult_symbolic) { 434 ierr = PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMult_Symbolic",MAT_COOKIE);CHKERRQ(ierr); 435 } 436 ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMultSymbolic_seqaijseqaij", 437 "MatMatMult_Symbolic_SeqAIJ_SeqAIJ", 438 MatMatMult_Symbolic_SeqAIJ_SeqAIJ);CHKERRQ(ierr); 439 if (!logkey_matmatmult_numeric) { 440 ierr = PetscLogEventRegister(&logkey_matmatmult_numeric,"MatMatMult_Numeric",MAT_COOKIE);CHKERRQ(ierr); 441 } 442 ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMultNumeric_seqaijseqaij", 443 "MatMatMult_Numeric_SeqAIJ_SeqAIJ", 444 MatMatMult_Numeric_SeqAIJ_SeqAIJ);CHKERRQ(ierr); 445 PetscFunctionReturn(0); 446 } 447