1 2 /* 3 Defines matrix-matrix product routines for pairs of SeqAIJ matrices 4 C = A * B 5 */ 6 7 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/utils/freespace.h> 9 #include <petscbt.h> 10 #include <../src/mat/impls/dense/seq/dense.h> /*I "petscmat.h" I*/ 11 /* 12 #define DEBUG_MATMATMULT 13 */ 14 EXTERN_C_BEGIN 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ" 17 PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 18 { 19 PetscErrorCode ierr; 20 PetscBool scalable=PETSC_FALSE,scalable_new; 21 22 PetscFunctionBegin; 23 if (scall == MAT_INITIAL_MATRIX){ 24 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 25 ierr = PetscOptionsGetBool(PETSC_NULL,"-matmatmult_scalable",&scalable,PETSC_NULL);CHKERRQ(ierr); 26 ierr = PetscOptionsGetBool(PETSC_NULL,"-matmatmult_scalable_new",&scalable_new,PETSC_NULL);CHKERRQ(ierr); 27 if (scalable_new){ 28 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_new(A,B,fill,C);CHKERRQ(ierr); 29 } else if (scalable) { 30 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(A,B,fill,C);CHKERRQ(ierr); 31 }else { 32 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 33 } 34 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 35 } 36 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 37 ierr = (*(*C)->ops->matmultnumeric)(A,B,*C);CHKERRQ(ierr); 38 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 39 PetscFunctionReturn(0); 40 } 41 EXTERN_C_END 42 43 /* 44 MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ - Get symbolic structure of C=A*B 45 Input Parameter: 46 . am, Ai, Aj - number of rows and structure of A 47 . bm, bn, Bi, Bj - number of rows, columns, and structure of B 48 . fill - filll ratio See MatMatMult() 49 50 Output Parameter: 51 . Ci, Cj - structure of C = A*B 52 . nspacedouble - number of extra mallocs 53 */ 54 #undef __FUNCT__ 55 #define __FUNCT__ "MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ" 56 PetscErrorCode MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,PetscInt *Ci[],PetscInt *Cj[],PetscInt *nspacedouble) 57 { 58 PetscErrorCode ierr; 59 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 60 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 61 const PetscInt *Ai=a->i,*Aj=a->j,*Bi=b->i,*Bj=b->j,am=A->rmap->N,bm=B->rmap->N,bn=B->cmap->N; 62 const PetscInt *aj=Aj,*bi=Bi,*bj=Bj,*bjj; 63 PetscInt *ci,*cj; 64 PetscInt i,j,anzi,brow,bnzj,cnzi,nlnk,*lnk,ndouble=0; 65 PetscBT lnkbt; 66 67 PetscFunctionBegin; 68 /* Allocate ci array, arrays for fill computation and */ 69 /* free space for accumulating nonzero column info */ 70 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 71 ci[0] = 0; 72 73 /* create and initialize a linked list */ 74 nlnk = bn+1; 75 ierr = PetscLLCreate(bn,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 76 77 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 78 ierr = PetscFreeSpaceGet((PetscInt)(fill*(Ai[am]+Bi[bm])),&free_space);CHKERRQ(ierr); 79 current_space = free_space; 80 81 /* Determine ci and cj */ 82 for (i=0; i<am; i++) { 83 anzi = Ai[i+1] - Ai[i]; 84 cnzi = 0; 85 aj = Aj + Ai[i]; 86 for (j=0; j<anzi; j++){ 87 brow = aj[j]; 88 bnzj = bi[brow+1] - bi[brow]; 89 bjj = bj + bi[brow]; 90 /* add non-zero cols of B into the sorted linked list lnk */ 91 ierr = PetscLLAddSorted(bnzj,bjj,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 92 cnzi += nlnk; 93 } 94 95 /* If free space is not available, make more free space */ 96 /* Double the amount of total space in the list */ 97 if (current_space->local_remaining<cnzi) { 98 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 99 ndouble++; 100 } 101 102 /* Copy data into free space, then initialize lnk */ 103 ierr = PetscLLClean(bn,bn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 104 current_space->array += cnzi; 105 current_space->local_used += cnzi; 106 current_space->local_remaining -= cnzi; 107 ci[i+1] = ci[i] + cnzi; 108 } 109 110 /* Column indices are in the list of free space */ 111 /* Allocate space for cj, initialize cj, and */ 112 /* destroy list of free space and other temporary array(s) */ 113 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 114 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 115 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 116 117 *Ci = ci; 118 *Cj = cj; 119 *nspacedouble = ndouble; 120 PetscFunctionReturn(0); 121 } 122 123 #undef __FUNCT__ 124 #define __FUNCT__ "MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable" 125 PetscErrorCode MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,PetscReal fill,PetscInt *Ci[],PetscInt *Cj[],PetscInt *nspacedouble) 126 { 127 PetscErrorCode ierr; 128 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 129 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 130 const PetscInt *Ai=a->i,*Aj=a->j,*Bi=b->i,*Bj=b->j,am=A->rmap->N,bm=B->rmap->N,bn=B->cmap->N; 131 const PetscInt *aj=Aj,*bi=Bi,*bj; 132 PetscInt *ci,*cj; 133 PetscInt i,j,anzi,brow,bnzj,cnzi,crmax,ndouble=0; 134 PetscBT bt; 135 PetscInt nlnk_max,lnk_max=bn,*lnk,*nlnk; 136 137 PetscFunctionBegin; 138 /* Allocate ci array, arrays for fill computation and */ 139 /* free space for accumulating nonzero column info */ 140 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 141 ci[0] = 0; 142 143 /* create and initialize a condensed linked list */ 144 crmax = a->rmax*b->rmax; 145 nlnk_max = PetscMin(crmax,bn); 146 if (!nlnk_max && bn) { 147 nlnk_max = bn; /* in case rmax is not defined for A or B */ 148 #if defined(PETSC_USE_DEBUGGING) 149 ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: Armax or Brmax is not defined in MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable!\n"); 150 #endif 151 } 152 #if defined(DEBUG_MATMATMULT) 153 ierr = (PETSC_COMM_SELF,"LLCondensedCreate nlnk_max=%d, bn %d, crmax %d\n",nlnk_max,bn,crmax); 154 #endif 155 ierr = PetscLLCondensedCreate(nlnk_max,lnk_max,lnk,nlnk,bt);CHKERRQ(ierr); 156 157 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 158 ierr = PetscFreeSpaceGet((PetscInt)(fill*(Ai[am]+Bi[bm])),&free_space);CHKERRQ(ierr); 159 current_space = free_space; 160 161 /* Determine ci and cj */ 162 for (i=0; i<am; i++) { 163 anzi = Ai[i+1] - Ai[i]; 164 cnzi = 0; 165 aj = Aj + Ai[i]; 166 for (j=0; j<anzi; j++){ 167 brow = aj[j]; 168 bnzj = bi[brow+1] - bi[brow]; 169 bj = Bj + bi[brow]; 170 /* add non-zero cols of B into the condensed sorted linked list lnk */ 171 ierr = PetscLLCondensedAddSorted(nlnk_max,lnk_max,bnzj,bj,nlnk,lnk,bt);CHKERRQ(ierr); 172 } 173 cnzi = *nlnk; 174 ci[i+1] = ci[i] + cnzi; 175 176 /* If free space is not available, make more free space */ 177 /* Double the amount of total space in the list */ 178 if (current_space->local_remaining<cnzi) { 179 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 180 ndouble++; 181 } 182 183 /* Copy data into free space, then initialize lnk */ 184 ierr = PetscLLCondensedClean(nlnk_max,lnk_max,cnzi,current_space->array,nlnk,lnk,bt);CHKERRQ(ierr); 185 current_space->array += cnzi; 186 current_space->local_used += cnzi; 187 current_space->local_remaining -= cnzi; 188 } 189 190 /* Column indices are in the list of free space */ 191 /* Allocate and copy column indices to cj, then destroy list of free space, lnk and bt */ 192 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 193 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 194 ierr = PetscLLCondensedDestroy(lnk,bt);CHKERRQ(ierr); 195 196 *Ci = ci; 197 *Cj = cj; 198 *nspacedouble = ndouble; 199 PetscFunctionReturn(0); 200 } 201 202 #define foo 203 #if !defined(foo) 204 /* 205 Does not use bitarray 206 */ 207 #undef __FUNCT__ 208 #define __FUNCT__ "MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable_new" 209 PetscErrorCode MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable_new(Mat A,Mat B,PetscReal fill,PetscInt *Ci[],PetscInt *Cj[],PetscInt *nspacedouble) 210 { 211 PetscErrorCode ierr; 212 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 213 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 214 const PetscInt *Ai=a->i,*Aj=a->j,*Bi=b->i,*Bj=b->j,am=A->rmap->N,bm=B->rmap->N,bn=B->cmap->N; 215 const PetscInt *aj=Aj,*bi=Bi,*bj; 216 PetscInt *ci,*cj; 217 PetscInt i,j,anzi,brow,bnzj,cnzi,crmax,ndouble=0; 218 PetscInt lnk_max=bn,*lnk; 219 220 PetscFunctionBegin; 221 /* Allocate ci array, arrays for fill computation and */ 222 /* free space for accumulating nonzero column info */ 223 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 224 ci[0] = 0; 225 226 /* create and initialize a condensed linked list */ 227 crmax = a->rmax*b->rmax; 228 lnk_max = PetscMin(crmax,bn); 229 if (!lnk_max && bn) { 230 lnk_max = bn; /* in case rmax is not defined for A or B */ 231 #if defined(PETSC_USE_DEBUGGING) 232 ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: Armax or Brmax is not defined in MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable!\n"); 233 #endif 234 } 235 #if defined(DEBUG_MATMATMULT) 236 ierr = (PETSC_COMM_SELF,"LLCondensedCreate lnk_max=%d, bn %d, crmax %d\n",lnk_max,bn,crmax); 237 #endif 238 ierr = PetscLLCondensedCreate_new(lnk_max,lnk);CHKERRQ(ierr); 239 240 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 241 ierr = PetscFreeSpaceGet((PetscInt)(fill*(Ai[am]+Bi[bm])),&free_space);CHKERRQ(ierr); 242 current_space = free_space; 243 244 /* Determine ci and cj */ 245 for (i=0; i<am; i++) { 246 anzi = Ai[i+1] - Ai[i]; 247 cnzi = 0; 248 aj = Aj + Ai[i]; 249 for (j=0; j<anzi; j++){ 250 brow = aj[j]; 251 bnzj = bi[brow+1] - bi[brow]; 252 bj = Bj + bi[brow]; 253 /* add non-zero cols of B into the condensed sorted linked list lnk */ 254 /* ierr = PetscLLCondensedView(lnk_max,lnk_max,lnk,lnk);CHKERRQ(ierr); */ 255 ierr = PetscLLCondensedAddSorted_new(bnzj,bj,lnk);CHKERRQ(ierr); 256 } 257 cnzi = *lnk; 258 ci[i+1] = ci[i] + cnzi; 259 260 /* If free space is not available, make more free space */ 261 /* Double the amount of total space in the list */ 262 if (current_space->local_remaining<cnzi) { 263 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 264 ndouble++; 265 } 266 267 /* Copy data into free space, then initialize lnk */ 268 /* ierr = PetscLLCondensedView(lnk_max,lnk_max,lnk,lnk);CHKERRQ(ierr); */ 269 ierr = PetscLLCondensedClean_new(lnk_max,cnzi,current_space->array,lnk);CHKERRQ(ierr); 270 current_space->array += cnzi; 271 current_space->local_used += cnzi; 272 current_space->local_remaining -= cnzi; 273 } 274 275 /* Column indices are in the list of free space */ 276 /* Allocate and copy column indices to cj, then destroy list of free space, lnk and bt */ 277 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 278 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 279 ierr = PetscLLCondensedDestroy_new(lnk);CHKERRQ(ierr); 280 281 *Ci = ci; 282 *Cj = cj; 283 *nspacedouble = ndouble; 284 PetscFunctionReturn(0); 285 } 286 287 #else 288 #include "private/sysimpl.h" 289 /* 290 Does not use bitarray 291 */ 292 #undef __FUNCT__ 293 #define __FUNCT__ "MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable_new" 294 PetscErrorCode MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable_new(Mat A,Mat B,PetscReal fill,PetscInt *Ci[],PetscInt *Cj[],PetscInt *nspacedouble) 295 { 296 PetscErrorCode ierr; 297 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 298 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 299 const PetscInt *Ai=a->i,*Aj=a->j,*Bi=b->i,*Bj=b->j,am=A->rmap->N,bm=B->rmap->N,bn=B->cmap->N; 300 const PetscInt *aj=Aj,*bi=Bi,*bj; 301 PetscInt *ci,*cj; 302 PetscInt i,j,anzi,brow,bnzj,cnzi,crmax,ndouble=0; 303 PetscInt lnk_max=bn,*lnk; 304 305 PetscFunctionBegin; 306 /* Allocate ci array, arrays for fill computation and */ 307 /* free space for accumulating nonzero column info */ 308 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 309 ci[0] = 0; 310 311 /* create and initialize a condensed linked list */ 312 crmax = a->rmax*b->rmax; 313 lnk_max = PetscMin(crmax,bn); 314 if (!lnk_max && bn) { 315 lnk_max = bn; /* in case rmax is not defined for A or B */ 316 #if defined(PETSC_USE_DEBUGGING) 317 ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: Armax or Brmax is not defined in MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable!\n"); 318 #endif 319 } 320 #if defined(DEBUG_MATMATMULT) 321 ierr = (PETSC_COMM_SELF,"LLCondensedCreate lnk_max=%d, bn %d, crmax %d\n",lnk_max,bn,crmax); 322 #endif 323 ierr = PetscHeapMergeCreate(lnk_max,lnk);CHKERRQ(ierr); 324 325 /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */ 326 ierr = PetscFreeSpaceGet((PetscInt)(fill*(Ai[am]+Bi[bm])),&free_space);CHKERRQ(ierr); 327 current_space = free_space; 328 329 /* Determine ci and cj */ 330 for (i=0; i<am; i++) { 331 anzi = Ai[i+1] - Ai[i]; 332 cnzi = 0; 333 aj = Aj + Ai[i]; 334 for (j=0; j<anzi; j++){ 335 brow = aj[j]; 336 bnzj = bi[brow+1] - bi[brow]; 337 bj = Bj + bi[brow]; 338 /* add non-zero cols of B into the condensed sorted linked list lnk */ 339 /* ierr = PetscLLCondensedView(lnk_max,lnk_max,lnk,lnk);CHKERRQ(ierr); */ 340 ierr = PetscHeapMergeAddSorted(bnzj,bj,lnk);CHKERRQ(ierr); 341 } 342 cnzi = *lnk; 343 ci[i+1] = ci[i] + cnzi; 344 345 /* If free space is not available, make more free space */ 346 /* Double the amount of total space in the list */ 347 if (current_space->local_remaining<cnzi) { 348 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 349 ndouble++; 350 } 351 352 /* Copy data into free space, then initialize lnk */ 353 /* ierr = PetscLLCondensedView(lnk_max,lnk_max,lnk,lnk);CHKERRQ(ierr); */ 354 ierr = PetscHeapMergeClean(lnk_max,cnzi,current_space->array,lnk);CHKERRQ(ierr); 355 current_space->array += cnzi; 356 current_space->local_used += cnzi; 357 current_space->local_remaining -= cnzi; 358 } 359 360 /* Column indices are in the list of free space */ 361 /* Allocate and copy column indices to cj, then destroy list of free space, lnk and bt */ 362 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 363 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 364 ierr = PetscHeapMergeDestroy(lnk);CHKERRQ(ierr); 365 366 *Ci = ci; 367 *Cj = cj; 368 *nspacedouble = ndouble; 369 PetscFunctionReturn(0); 370 } 371 #endif 372 373 #undef __FUNCT__ 374 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ" 375 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 376 { 377 PetscErrorCode ierr; 378 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 379 PetscInt *ai=a->i,*bi=b->i,*ci,*cj; 380 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N,nspacedouble; 381 MatScalar *ca; 382 PetscReal afill; 383 384 PetscFunctionBegin; 385 #if defined(DEBUG_MATMATMULT) 386 ierr = PetscPrintf(PETSC_COMM_SELF,"MatMatMultSymbolic_SeqAIJ_SeqAIJ...\n");CHKERRQ(ierr); 387 #endif 388 /* Get ci and cj */ 389 ierr = MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ(A,B,fill,&ci,&cj,&nspacedouble);CHKERRQ(ierr); 390 391 /* Allocate space for ca */ 392 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 393 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 394 395 /* put together the new symbolic matrix */ 396 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 397 398 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 399 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 400 c = (Mat_SeqAIJ *)((*C)->data); 401 c->free_a = PETSC_TRUE; 402 c->free_ij = PETSC_TRUE; 403 c->nonew = 0; 404 (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; /* fast, needs non-scalable O(bn) array 'abdense' */ 405 ierr = PetscMalloc(bn*sizeof(PetscScalar),&c->matmult_abdense);CHKERRQ(ierr); 406 ierr = PetscMemzero(c->matmult_abdense,bn*sizeof(PetscScalar));CHKERRQ(ierr); 407 408 /* set MatInfo */ 409 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 410 if (afill < 1.0) afill = 1.0; 411 c->maxnz = ci[am]; 412 c->nz = ci[am]; 413 (*C)->info.mallocs = nspacedouble; 414 (*C)->info.fill_ratio_given = fill; 415 (*C)->info.fill_ratio_needed = afill; 416 417 #if defined(PETSC_USE_INFO) 418 if (ci[am]) { 419 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); 420 ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 421 } else { 422 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 423 } 424 #endif 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ" 430 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 431 { 432 PetscErrorCode ierr; 433 PetscLogDouble flops=0.0; 434 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 435 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 436 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 437 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 438 PetscInt am=A->rmap->n,cm=C->rmap->n; 439 PetscInt i,j,k,anzi,bnzi,cnzi,brow; 440 PetscScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,valtmp; 441 PetscScalar *ab_dense=c->matmult_abdense; 442 443 PetscFunctionBegin; 444 /* clean old values in C */ 445 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 446 /* Traverse A row-wise. */ 447 /* Build the ith row in C by summing over nonzero columns in A, */ 448 /* the rows of B corresponding to nonzeros of A. */ 449 for (i=0; i<am; i++) { 450 anzi = ai[i+1] - ai[i]; 451 for (j=0; j<anzi; j++) { 452 brow = aj[j]; 453 bnzi = bi[brow+1] - bi[brow]; 454 bjj = bj + bi[brow]; 455 baj = ba + bi[brow]; 456 /* perform dense axpy */ 457 valtmp = aa[j]; 458 for (k=0; k<bnzi; k++) { 459 ab_dense[bjj[k]] += valtmp*baj[k]; 460 } 461 flops += 2*bnzi; 462 } 463 aj += anzi; aa += anzi; 464 465 cnzi = ci[i+1] - ci[i]; 466 for (k=0; k<cnzi; k++) { 467 ca[k] += ab_dense[cj[k]]; 468 ab_dense[cj[k]] = 0.0; /* zero ab_dense */ 469 } 470 flops += cnzi; 471 cj += cnzi; ca += cnzi; 472 } 473 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 474 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 475 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable" 481 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,Mat C) 482 { 483 PetscErrorCode ierr; 484 PetscLogDouble flops=0.0; 485 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 486 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 487 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 488 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 489 PetscInt am=A->rmap->N,cm=C->rmap->N; 490 PetscInt i,j,k,anzi,bnzi,cnzi,brow; 491 PetscScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,valtmp; 492 PetscInt nextb; 493 494 PetscFunctionBegin; 495 #if defined(DEBUG_MATMATMULT) 496 ierr = PetscPrintf(PETSC_COMM_SELF,"MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable...\n");CHKERRQ(ierr); 497 #endif 498 /* clean old values in C */ 499 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 500 /* Traverse A row-wise. */ 501 /* Build the ith row in C by summing over nonzero columns in A, */ 502 /* the rows of B corresponding to nonzeros of A. */ 503 for (i=0;i<am;i++) { 504 anzi = ai[i+1] - ai[i]; 505 cnzi = ci[i+1] - ci[i]; 506 for (j=0;j<anzi;j++) { 507 brow = aj[j]; 508 bnzi = bi[brow+1] - bi[brow]; 509 bjj = bj + bi[brow]; 510 baj = ba + bi[brow]; 511 /* perform sparse axpy */ 512 valtmp = aa[j]; 513 nextb = 0; 514 for (k=0; nextb<bnzi; k++) { 515 if (cj[k] == bjj[nextb]){ /* ccol == bcol */ 516 ca[k] += valtmp*baj[nextb++]; 517 } 518 } 519 flops += 2*bnzi; 520 } 521 aj += anzi; aa += anzi; 522 cj += cnzi; ca += cnzi; 523 } 524 525 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 526 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 527 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 528 PetscFunctionReturn(0); 529 } 530 531 #undef __FUNCT__ 532 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_new" 533 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_new(Mat A,Mat B,PetscReal fill,Mat *C) 534 { 535 PetscErrorCode ierr; 536 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 537 PetscInt *ai=a->i,*bi=b->i,*ci,*cj; 538 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N,nspacedouble; 539 MatScalar *ca; 540 PetscReal afill; 541 542 PetscFunctionBegin; 543 #if defined(DEBUG_MATMATMULT) 544 ierr = PetscPrintf(PETSC_COMM_SELF,"MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable Armax %d, Brmax %d\n",a->rmax,b->rmax);CHKERRQ(ierr); 545 #endif 546 /* Get ci and cj */ 547 ierr = MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable_new(A,B,fill,&ci,&cj,&nspacedouble);CHKERRQ(ierr); 548 549 /* Allocate space for ca */ 550 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 551 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 552 553 /* put together the new symbolic matrix */ 554 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 555 556 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 557 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 558 c = (Mat_SeqAIJ *)((*C)->data); 559 c->free_a = PETSC_TRUE; 560 c->free_ij = PETSC_TRUE; 561 c->nonew = 0; 562 (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; /* slower, less memory */ 563 564 /* set MatInfo */ 565 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 566 if (afill < 1.0) afill = 1.0; 567 c->maxnz = ci[am]; 568 c->nz = ci[am]; 569 (*C)->info.mallocs = nspacedouble; 570 (*C)->info.fill_ratio_given = fill; 571 (*C)->info.fill_ratio_needed = afill; 572 573 #if defined(PETSC_USE_INFO) 574 if (ci[am]) { 575 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); 576 ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 577 } else { 578 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 579 } 580 #endif 581 PetscFunctionReturn(0); 582 } 583 584 585 #undef __FUNCT__ 586 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable" 587 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,PetscReal fill,Mat *C) 588 { 589 PetscErrorCode ierr; 590 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 591 PetscInt *ai=a->i,*bi=b->i,*ci,*cj; 592 PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N,nspacedouble; 593 MatScalar *ca; 594 PetscReal afill; 595 596 PetscFunctionBegin; 597 #if defined(DEBUG_MATMATMULT) 598 ierr = PetscPrintf(PETSC_COMM_SELF,"MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable Armax %d, Brmax %d\n",a->rmax,b->rmax);CHKERRQ(ierr); 599 #endif 600 /* Get ci and cj */ 601 ierr = MatGetSymbolicMatMatMult_SeqAIJ_SeqAIJ_Scalable(A,B,fill,&ci,&cj,&nspacedouble);CHKERRQ(ierr); 602 603 /* Allocate space for ca */ 604 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 605 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 606 607 /* put together the new symbolic matrix */ 608 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 609 610 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 611 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 612 c = (Mat_SeqAIJ *)((*C)->data); 613 c->free_a = PETSC_TRUE; 614 c->free_ij = PETSC_TRUE; 615 c->nonew = 0; 616 (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable; /* slower, less memory */ 617 618 /* set MatInfo */ 619 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 620 if (afill < 1.0) afill = 1.0; 621 c->maxnz = ci[am]; 622 c->nz = ci[am]; 623 (*C)->info.mallocs = nspacedouble; 624 (*C)->info.fill_ratio_given = fill; 625 (*C)->info.fill_ratio_needed = afill; 626 627 #if defined(PETSC_USE_INFO) 628 if (ci[am]) { 629 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); 630 ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 631 } else { 632 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 633 } 634 #endif 635 PetscFunctionReturn(0); 636 } 637 638 639 /* This routine is not used. Should be removed! */ 640 #undef __FUNCT__ 641 #define __FUNCT__ "MatMatTransposeMult_SeqAIJ_SeqAIJ" 642 PetscErrorCode MatMatTransposeMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 643 { 644 PetscErrorCode ierr; 645 646 PetscFunctionBegin; 647 if (scall == MAT_INITIAL_MATRIX){ 648 ierr = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 649 } 650 ierr = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 651 PetscFunctionReturn(0); 652 } 653 654 #undef __FUNCT__ 655 #define __FUNCT__ "PetscContainerDestroy_Mat_MatMatTransMult" 656 PetscErrorCode PetscContainerDestroy_Mat_MatMatTransMult(void *ptr) 657 { 658 PetscErrorCode ierr; 659 Mat_MatMatTransMult *multtrans=(Mat_MatMatTransMult*)ptr; 660 661 PetscFunctionBegin; 662 ierr = MatTransposeColoringDestroy(&multtrans->matcoloring);CHKERRQ(ierr); 663 ierr = MatDestroy(&multtrans->Bt_den);CHKERRQ(ierr); 664 ierr = MatDestroy(&multtrans->ABt_den);CHKERRQ(ierr); 665 ierr = PetscFree(multtrans);CHKERRQ(ierr); 666 PetscFunctionReturn(0); 667 } 668 669 #undef __FUNCT__ 670 #define __FUNCT__ "MatDestroy_SeqAIJ_MatMatMultTrans" 671 PetscErrorCode MatDestroy_SeqAIJ_MatMatMultTrans(Mat A) 672 { 673 PetscErrorCode ierr; 674 PetscContainer container; 675 Mat_MatMatTransMult *multtrans=PETSC_NULL; 676 677 PetscFunctionBegin; 678 ierr = PetscObjectQuery((PetscObject)A,"Mat_MatMatTransMult",(PetscObject *)&container);CHKERRQ(ierr); 679 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 680 ierr = PetscContainerGetPointer(container,(void **)&multtrans);CHKERRQ(ierr); 681 A->ops->destroy = multtrans->destroy; 682 if (A->ops->destroy) { 683 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 684 } 685 ierr = PetscObjectCompose((PetscObject)A,"Mat_MatMatTransMult",0);CHKERRQ(ierr); 686 PetscFunctionReturn(0); 687 } 688 689 #undef __FUNCT__ 690 #define __FUNCT__ "MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ" 691 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 692 { 693 PetscErrorCode ierr; 694 Mat Bt; 695 PetscInt *bti,*btj; 696 Mat_MatMatTransMult *multtrans; 697 PetscContainer container; 698 PetscLogDouble t0,tf,etime2=0.0; 699 700 PetscFunctionBegin; 701 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 702 /* create symbolic Bt */ 703 ierr = MatGetSymbolicTranspose_SeqAIJ(B,&bti,&btj);CHKERRQ(ierr); 704 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,B->cmap->n,B->rmap->n,bti,btj,PETSC_NULL,&Bt);CHKERRQ(ierr); 705 706 /* get symbolic C=A*Bt */ 707 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,Bt,fill,C);CHKERRQ(ierr); 708 709 /* create a supporting struct for reuse intermidiate dense matrices with matcoloring */ 710 ierr = PetscNew(Mat_MatMatTransMult,&multtrans);CHKERRQ(ierr); 711 712 /* attach the supporting struct to C */ 713 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 714 ierr = PetscContainerSetPointer(container,multtrans);CHKERRQ(ierr); 715 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_Mat_MatMatTransMult);CHKERRQ(ierr); 716 ierr = PetscObjectCompose((PetscObject)(*C),"Mat_MatMatTransMult",(PetscObject)container);CHKERRQ(ierr); 717 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 718 719 multtrans->usecoloring = PETSC_FALSE; 720 multtrans->destroy = (*C)->ops->destroy; 721 (*C)->ops->destroy = MatDestroy_SeqAIJ_MatMatMultTrans; 722 723 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 724 etime2 += tf - t0; 725 726 ierr = PetscOptionsGetBool(PETSC_NULL,"-matmattransmult_color",&multtrans->usecoloring,PETSC_NULL);CHKERRQ(ierr); 727 if (multtrans->usecoloring){ 728 /* Create MatTransposeColoring from symbolic C=A*B^T */ 729 MatTransposeColoring matcoloring; 730 ISColoring iscoloring; 731 Mat Bt_dense,C_dense; 732 PetscLogDouble etime0=0.0,etime01=0.0,etime1=0.0; 733 734 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 735 ierr = MatGetColoring(*C,MATCOLORINGLF,&iscoloring);CHKERRQ(ierr); 736 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 737 etime0 += tf - t0; 738 739 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 740 ierr = MatTransposeColoringCreate(*C,iscoloring,&matcoloring);CHKERRQ(ierr); 741 multtrans->matcoloring = matcoloring; 742 ierr = ISColoringDestroy(&iscoloring);CHKERRQ(ierr); 743 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 744 etime01 += tf - t0; 745 746 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 747 /* Create Bt_dense and C_dense = A*Bt_dense */ 748 ierr = MatCreate(PETSC_COMM_SELF,&Bt_dense);CHKERRQ(ierr); 749 ierr = MatSetSizes(Bt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors);CHKERRQ(ierr); 750 ierr = MatSetType(Bt_dense,MATSEQDENSE);CHKERRQ(ierr); 751 ierr = MatSeqDenseSetPreallocation(Bt_dense,PETSC_NULL);CHKERRQ(ierr); 752 Bt_dense->assembled = PETSC_TRUE; 753 multtrans->Bt_den = Bt_dense; 754 755 ierr = MatCreate(PETSC_COMM_SELF,&C_dense);CHKERRQ(ierr); 756 ierr = MatSetSizes(C_dense,A->rmap->n,matcoloring->ncolors,A->rmap->n,matcoloring->ncolors);CHKERRQ(ierr); 757 ierr = MatSetType(C_dense,MATSEQDENSE);CHKERRQ(ierr); 758 ierr = MatSeqDenseSetPreallocation(C_dense,PETSC_NULL);CHKERRQ(ierr); 759 Bt_dense->assembled = PETSC_TRUE; 760 multtrans->ABt_den = C_dense; 761 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 762 etime1 += tf - t0; 763 764 #if defined(PETSC_USE_INFO) 765 { 766 Mat_SeqAIJ *c=(Mat_SeqAIJ*)(*C)->data; 767 ierr = PetscInfo5(*C,"Bt_dense: %D,%D; Cnz %D / (cm*ncolors %D) = %g\n",A->cmap->n,matcoloring->ncolors,c->nz,A->rmap->n*matcoloring->ncolors,(PetscReal)(c->nz)/(A->rmap->n*matcoloring->ncolors)); 768 ierr = PetscInfo5(*C,"Sym = GetColor %g + ColorCreate %g + MatDenseCreate %g + non-colorSym %g = %g\n",etime0,etime01,etime1,etime2,etime0+etime01+etime1+etime2); 769 } 770 #endif 771 } 772 /* clean up */ 773 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 774 ierr = MatRestoreSymbolicTranspose_SeqAIJ(B,&bti,&btj);CHKERRQ(ierr); 775 776 777 778 #if defined(INEFFICIENT_ALGORITHM) 779 /* The algorithm below computes am*bm sparse inner-product - inefficient! It will be deleted later. */ 780 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 781 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 782 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*ci,*cj,*acol,*bcol; 783 PetscInt am=A->rmap->N,bm=B->rmap->N; 784 PetscInt i,j,anzi,bnzj,cnzi,nlnk,*lnk,nspacedouble=0,ka,kb,index[1]; 785 MatScalar *ca; 786 PetscBT lnkbt; 787 PetscReal afill; 788 789 /* Allocate row pointer array ci */ 790 ierr = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 791 ci[0] = 0; 792 793 /* Create and initialize a linked list for C columns */ 794 nlnk = bm+1; 795 ierr = PetscLLCreate(bm,bm,nlnk,lnk,lnkbt);CHKERRQ(ierr); 796 797 /* Initial FreeSpace with size fill*(nnz(A)+nnz(B)) */ 798 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr); 799 current_space = free_space; 800 801 /* Determine symbolic info for each row of the product A*B^T: */ 802 for (i=0; i<am; i++) { 803 anzi = ai[i+1] - ai[i]; 804 cnzi = 0; 805 acol = aj + ai[i]; 806 for (j=0; j<bm; j++){ 807 bnzj = bi[j+1] - bi[j]; 808 bcol= bj + bi[j]; 809 /* sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */ 810 ka = 0; kb = 0; 811 while (ka < anzi && kb < bnzj){ 812 while (acol[ka] < bcol[kb] && ka < anzi) ka++; 813 if (ka == anzi) break; 814 while (acol[ka] > bcol[kb] && kb < bnzj) kb++; 815 if (kb == bnzj) break; 816 if (acol[ka] == bcol[kb]){ /* add nonzero c(i,j) to lnk */ 817 index[0] = j; 818 ierr = PetscLLAdd(1,index,bm,nlnk,lnk,lnkbt);CHKERRQ(ierr); 819 cnzi++; 820 break; 821 } 822 } 823 } 824 825 /* If free space is not available, make more free space */ 826 /* Double the amount of total space in the list */ 827 if (current_space->local_remaining<cnzi) { 828 ierr = PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 829 nspacedouble++; 830 } 831 832 /* Copy data into free space, then initialize lnk */ 833 ierr = PetscLLClean(bm,bm,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 834 current_space->array += cnzi; 835 current_space->local_used += cnzi; 836 current_space->local_remaining -= cnzi; 837 838 ci[i+1] = ci[i] + cnzi; 839 } 840 841 842 /* Column indices are in the list of free space. 843 Allocate array cj, copy column indices to cj, and destroy list of free space */ 844 ierr = PetscMalloc((ci[am]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 845 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 846 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 847 848 /* Allocate space for ca */ 849 ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 850 ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 851 852 /* put together the new symbolic matrix */ 853 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,am,bm,ci,cj,ca,C);CHKERRQ(ierr); 854 855 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 856 /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 857 c = (Mat_SeqAIJ *)((*C)->data); 858 c->free_a = PETSC_TRUE; 859 c->free_ij = PETSC_TRUE; 860 c->nonew = 0; 861 862 /* set MatInfo */ 863 afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5; 864 if (afill < 1.0) afill = 1.0; 865 c->maxnz = ci[am]; 866 c->nz = ci[am]; 867 (*C)->info.mallocs = nspacedouble; 868 (*C)->info.fill_ratio_given = fill; 869 (*C)->info.fill_ratio_needed = afill; 870 871 #if defined(PETSC_USE_INFO) 872 if (ci[am]) { 873 ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",nspacedouble,fill,afill);CHKERRQ(ierr); 874 ierr = PetscInfo1((*C),"Use MatMatTransposeMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr); 875 } else { 876 ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr); 877 } 878 #endif 879 #endif 880 PetscFunctionReturn(0); 881 } 882 883 /* #define USE_ARRAY - for sparse dot product. Slower than !USE_ARRAY */ 884 #undef __FUNCT__ 885 #define __FUNCT__ "MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ" 886 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 887 { 888 PetscErrorCode ierr; 889 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 890 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,anzi,bnzj,nexta,nextb,*acol,*bcol,brow; 891 PetscInt cm=C->rmap->n,*ci=c->i,*cj=c->j,i,j,cnzi,*ccol; 892 PetscLogDouble flops=0.0; 893 MatScalar *aa=a->a,*aval,*ba=b->a,*bval,*ca=c->a,*cval; 894 Mat_MatMatTransMult *multtrans; 895 PetscContainer container; 896 #if defined(USE_ARRAY) 897 MatScalar *spdot; 898 #endif 899 900 PetscFunctionBegin; 901 ierr = PetscObjectQuery((PetscObject)C,"Mat_MatMatTransMult",(PetscObject *)&container);CHKERRQ(ierr); 902 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 903 ierr = PetscContainerGetPointer(container,(void **)&multtrans);CHKERRQ(ierr); 904 if (multtrans->usecoloring){ 905 MatTransposeColoring matcoloring = multtrans->matcoloring; 906 Mat Bt_dense; 907 PetscInt m,n; 908 PetscLogDouble t0,tf,etime0=0.0,etime1=0.0,etime2=0.0; 909 Mat C_dense = multtrans->ABt_den; 910 911 Bt_dense = multtrans->Bt_den; 912 ierr = MatGetLocalSize(Bt_dense,&m,&n);CHKERRQ(ierr); 913 914 /* Get Bt_dense by Apply MatTransposeColoring to B */ 915 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 916 ierr = MatTransColoringApplySpToDen(matcoloring,B,Bt_dense);CHKERRQ(ierr); 917 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 918 etime0 += tf - t0; 919 920 /* C_dense = A*Bt_dense */ 921 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 922 ierr = MatMatMultNumeric_SeqAIJ_SeqDense(A,Bt_dense,C_dense);CHKERRQ(ierr); 923 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 924 etime2 += tf - t0; 925 926 /* Recover C from C_dense */ 927 ierr = PetscGetTime(&t0);CHKERRQ(ierr); 928 ierr = MatTransColoringApplyDenToSp(matcoloring,C_dense,C);CHKERRQ(ierr); 929 ierr = PetscGetTime(&tf);CHKERRQ(ierr); 930 etime1 += tf - t0; 931 #if defined(PETSC_USE_INFO) 932 ierr = PetscInfo4(C,"Num = ColoringApply: %g %g + Mult_sp_dense %g = %g\n",etime0,etime1,etime2,etime0+etime1+etime2); 933 #endif 934 PetscFunctionReturn(0); 935 } 936 937 #if defined(USE_ARRAY) 938 /* allocate an array for implementing sparse inner-product */ 939 ierr = PetscMalloc((A->cmap->n+1)*sizeof(MatScalar),&spdot);CHKERRQ(ierr); 940 ierr = PetscMemzero(spdot,(A->cmap->n+1)*sizeof(MatScalar));CHKERRQ(ierr); 941 #endif 942 943 /* clear old values in C */ 944 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 945 946 for (i=0; i<cm; i++) { 947 anzi = ai[i+1] - ai[i]; 948 acol = aj + ai[i]; 949 aval = aa + ai[i]; 950 cnzi = ci[i+1] - ci[i]; 951 ccol = cj + ci[i]; 952 cval = ca + ci[i]; 953 for (j=0; j<cnzi; j++){ 954 brow = ccol[j]; 955 bnzj = bi[brow+1] - bi[brow]; 956 bcol = bj + bi[brow]; 957 bval = ba + bi[brow]; 958 959 /* perform sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */ 960 #if defined(USE_ARRAY) 961 /* put ba to spdot array */ 962 for (nextb=0; nextb<bnzj; nextb++) spdot[bcol[nextb]] = bval[nextb]; 963 /* c(i,j)=A[i,:]*B[j,:]^T */ 964 for (nexta=0; nexta<anzi; nexta++){ 965 cval[j] += spdot[acol[nexta]]*aval[nexta]; 966 } 967 /* zero spdot array */ 968 for (nextb=0; nextb<bnzj; nextb++) spdot[bcol[nextb]] = 0.0; 969 #else 970 nexta = 0; nextb = 0; 971 while (nexta<anzi && nextb<bnzj){ 972 while (acol[nexta] < bcol[nextb] && nexta < anzi) nexta++; 973 if (nexta == anzi) break; 974 while (acol[nexta] > bcol[nextb] && nextb < bnzj) nextb++; 975 if (nextb == bnzj) break; 976 if (acol[nexta] == bcol[nextb]){ 977 cval[j] += aval[nexta]*bval[nextb]; 978 nexta++; nextb++; 979 flops += 2; 980 } 981 } 982 #endif 983 } 984 } 985 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 986 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 987 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 988 #if defined(USE_ARRAY) 989 ierr = PetscFree(spdot); 990 #endif 991 PetscFunctionReturn(0); 992 } 993 994 #undef __FUNCT__ 995 #define __FUNCT__ "MatTransposeMatMult_SeqAIJ_SeqAIJ" 996 PetscErrorCode MatTransposeMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) { 997 PetscErrorCode ierr; 998 999 PetscFunctionBegin; 1000 if (scall == MAT_INITIAL_MATRIX){ 1001 ierr = MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr); 1002 } 1003 ierr = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr); 1004 PetscFunctionReturn(0); 1005 } 1006 1007 #undef __FUNCT__ 1008 #define __FUNCT__ "MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ" 1009 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 1010 { 1011 PetscErrorCode ierr; 1012 Mat At; 1013 PetscInt *ati,*atj; 1014 1015 PetscFunctionBegin; 1016 /* create symbolic At */ 1017 ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 1018 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->cmap->n,A->rmap->n,ati,atj,PETSC_NULL,&At);CHKERRQ(ierr); 1019 1020 /* get symbolic C=At*B */ 1021 ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr); 1022 1023 /* clean up */ 1024 ierr = MatDestroy(&At);CHKERRQ(ierr); 1025 ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr); 1026 PetscFunctionReturn(0); 1027 } 1028 1029 #undef __FUNCT__ 1030 #define __FUNCT__ "MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ" 1031 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 1032 { 1033 PetscErrorCode ierr; 1034 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data; 1035 PetscInt am=A->rmap->n,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb; 1036 PetscInt cm=C->rmap->n,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k; 1037 PetscLogDouble flops=0.0; 1038 MatScalar *aa=a->a,*ba,*ca=c->a,*caj; 1039 1040 PetscFunctionBegin; 1041 /* clear old values in C */ 1042 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 1043 1044 /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */ 1045 for (i=0;i<am;i++) { 1046 bj = b->j + bi[i]; 1047 ba = b->a + bi[i]; 1048 bnzi = bi[i+1] - bi[i]; 1049 anzi = ai[i+1] - ai[i]; 1050 for (j=0; j<anzi; j++) { 1051 nextb = 0; 1052 crow = *aj++; 1053 cjj = cj + ci[crow]; 1054 caj = ca + ci[crow]; 1055 /* perform sparse axpy operation. Note cjj includes bj. */ 1056 for (k=0; nextb<bnzi; k++) { 1057 if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */ 1058 caj[k] += (*aa)*(*(ba+nextb)); 1059 nextb++; 1060 } 1061 } 1062 flops += 2*bnzi; 1063 aa++; 1064 } 1065 } 1066 1067 /* Assemble the final matrix and clean up */ 1068 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1069 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1070 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1071 PetscFunctionReturn(0); 1072 } 1073 1074 EXTERN_C_BEGIN 1075 #undef __FUNCT__ 1076 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqDense" 1077 PetscErrorCode MatMatMult_SeqAIJ_SeqDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1078 { 1079 PetscErrorCode ierr; 1080 1081 PetscFunctionBegin; 1082 if (scall == MAT_INITIAL_MATRIX){ 1083 ierr = MatMatMultSymbolic_SeqAIJ_SeqDense(A,B,fill,C);CHKERRQ(ierr); 1084 } 1085 ierr = MatMatMultNumeric_SeqAIJ_SeqDense(A,B,*C);CHKERRQ(ierr); 1086 PetscFunctionReturn(0); 1087 } 1088 EXTERN_C_END 1089 1090 #undef __FUNCT__ 1091 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqDense" 1092 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqDense(Mat A,Mat B,PetscReal fill,Mat *C) 1093 { 1094 PetscErrorCode ierr; 1095 1096 PetscFunctionBegin; 1097 ierr = MatMatMultSymbolic_SeqDense_SeqDense(A,B,0.0,C);CHKERRQ(ierr); 1098 (*C)->ops->matmult = MatMatMult_SeqAIJ_SeqDense; 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqDense" 1104 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqDense(Mat A,Mat B,Mat C) 1105 { 1106 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1107 PetscErrorCode ierr; 1108 PetscScalar *b,*c,r1,r2,r3,r4,*b1,*b2,*b3,*b4; 1109 MatScalar *aa; 1110 PetscInt cm=C->rmap->n, cn=B->cmap->n, bm=B->rmap->n, col, i,j,n,*aj, am = A->rmap->n; 1111 PetscInt am2 = 2*am, am3 = 3*am, bm4 = 4*bm,colam; 1112 1113 PetscFunctionBegin; 1114 if (!cm || !cn) PetscFunctionReturn(0); 1115 if (bm != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %D not equal rows in B %D\n",A->cmap->n,bm); 1116 if (A->rmap->n != C->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in C %D not equal rows in A %D\n",C->rmap->n,A->rmap->n); 1117 if (B->cmap->n != C->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in B %D not equal columns in C %D\n",B->cmap->n,C->cmap->n); 1118 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 1119 ierr = MatGetArray(C,&c);CHKERRQ(ierr); 1120 b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 1121 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 1122 colam = col*am; 1123 for (i=0; i<am; i++) { /* over rows of C in those columns */ 1124 r1 = r2 = r3 = r4 = 0.0; 1125 n = a->i[i+1] - a->i[i]; 1126 aj = a->j + a->i[i]; 1127 aa = a->a + a->i[i]; 1128 for (j=0; j<n; j++) { 1129 r1 += (*aa)*b1[*aj]; 1130 r2 += (*aa)*b2[*aj]; 1131 r3 += (*aa)*b3[*aj]; 1132 r4 += (*aa++)*b4[*aj++]; 1133 } 1134 c[colam + i] = r1; 1135 c[colam + am + i] = r2; 1136 c[colam + am2 + i] = r3; 1137 c[colam + am3 + i] = r4; 1138 } 1139 b1 += bm4; 1140 b2 += bm4; 1141 b3 += bm4; 1142 b4 += bm4; 1143 } 1144 for (;col<cn; col++){ /* over extra columns of C */ 1145 for (i=0; i<am; i++) { /* over rows of C in those columns */ 1146 r1 = 0.0; 1147 n = a->i[i+1] - a->i[i]; 1148 aj = a->j + a->i[i]; 1149 aa = a->a + a->i[i]; 1150 1151 for (j=0; j<n; j++) { 1152 r1 += (*aa++)*b1[*aj++]; 1153 } 1154 c[col*am + i] = r1; 1155 } 1156 b1 += bm; 1157 } 1158 ierr = PetscLogFlops(cn*(2.0*a->nz));CHKERRQ(ierr); 1159 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 1160 ierr = MatRestoreArray(C,&c);CHKERRQ(ierr); 1161 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1162 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1163 PetscFunctionReturn(0); 1164 } 1165 1166 /* 1167 Note very similar to MatMult_SeqAIJ(), should generate both codes from same base 1168 */ 1169 #undef __FUNCT__ 1170 #define __FUNCT__ "MatMatMultNumericAdd_SeqAIJ_SeqDense" 1171 PetscErrorCode MatMatMultNumericAdd_SeqAIJ_SeqDense(Mat A,Mat B,Mat C) 1172 { 1173 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1174 PetscErrorCode ierr; 1175 PetscScalar *b,*c,r1,r2,r3,r4,*b1,*b2,*b3,*b4; 1176 MatScalar *aa; 1177 PetscInt cm=C->rmap->n, cn=B->cmap->n, bm=B->rmap->n, col, i,j,n,*aj, am = A->rmap->n,*ii,arm; 1178 PetscInt am2 = 2*am, am3 = 3*am, bm4 = 4*bm,colam,*ridx; 1179 1180 PetscFunctionBegin; 1181 if (!cm || !cn) PetscFunctionReturn(0); 1182 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 1183 ierr = MatGetArray(C,&c);CHKERRQ(ierr); 1184 b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm; 1185 1186 if (a->compressedrow.use){ /* use compressed row format */ 1187 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 1188 colam = col*am; 1189 arm = a->compressedrow.nrows; 1190 ii = a->compressedrow.i; 1191 ridx = a->compressedrow.rindex; 1192 for (i=0; i<arm; i++) { /* over rows of C in those columns */ 1193 r1 = r2 = r3 = r4 = 0.0; 1194 n = ii[i+1] - ii[i]; 1195 aj = a->j + ii[i]; 1196 aa = a->a + ii[i]; 1197 for (j=0; j<n; j++) { 1198 r1 += (*aa)*b1[*aj]; 1199 r2 += (*aa)*b2[*aj]; 1200 r3 += (*aa)*b3[*aj]; 1201 r4 += (*aa++)*b4[*aj++]; 1202 } 1203 c[colam + ridx[i]] += r1; 1204 c[colam + am + ridx[i]] += r2; 1205 c[colam + am2 + ridx[i]] += r3; 1206 c[colam + am3 + ridx[i]] += r4; 1207 } 1208 b1 += bm4; 1209 b2 += bm4; 1210 b3 += bm4; 1211 b4 += bm4; 1212 } 1213 for (;col<cn; col++){ /* over extra columns of C */ 1214 colam = col*am; 1215 arm = a->compressedrow.nrows; 1216 ii = a->compressedrow.i; 1217 ridx = a->compressedrow.rindex; 1218 for (i=0; i<arm; i++) { /* over rows of C in those columns */ 1219 r1 = 0.0; 1220 n = ii[i+1] - ii[i]; 1221 aj = a->j + ii[i]; 1222 aa = a->a + ii[i]; 1223 1224 for (j=0; j<n; j++) { 1225 r1 += (*aa++)*b1[*aj++]; 1226 } 1227 c[col*am + ridx[i]] += r1; 1228 } 1229 b1 += bm; 1230 } 1231 } else { 1232 for (col=0; col<cn-4; col += 4){ /* over columns of C */ 1233 colam = col*am; 1234 for (i=0; i<am; i++) { /* over rows of C in those columns */ 1235 r1 = r2 = r3 = r4 = 0.0; 1236 n = a->i[i+1] - a->i[i]; 1237 aj = a->j + a->i[i]; 1238 aa = a->a + a->i[i]; 1239 for (j=0; j<n; j++) { 1240 r1 += (*aa)*b1[*aj]; 1241 r2 += (*aa)*b2[*aj]; 1242 r3 += (*aa)*b3[*aj]; 1243 r4 += (*aa++)*b4[*aj++]; 1244 } 1245 c[colam + i] += r1; 1246 c[colam + am + i] += r2; 1247 c[colam + am2 + i] += r3; 1248 c[colam + am3 + i] += r4; 1249 } 1250 b1 += bm4; 1251 b2 += bm4; 1252 b3 += bm4; 1253 b4 += bm4; 1254 } 1255 for (;col<cn; col++){ /* over extra columns of C */ 1256 for (i=0; i<am; i++) { /* over rows of C in those columns */ 1257 r1 = 0.0; 1258 n = a->i[i+1] - a->i[i]; 1259 aj = a->j + a->i[i]; 1260 aa = a->a + a->i[i]; 1261 1262 for (j=0; j<n; j++) { 1263 r1 += (*aa++)*b1[*aj++]; 1264 } 1265 c[col*am + i] += r1; 1266 } 1267 b1 += bm; 1268 } 1269 } 1270 ierr = PetscLogFlops(cn*2.0*a->nz);CHKERRQ(ierr); 1271 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 1272 ierr = MatRestoreArray(C,&c);CHKERRQ(ierr); 1273 PetscFunctionReturn(0); 1274 } 1275 1276 #undef __FUNCT__ 1277 #define __FUNCT__ "MatTransColoringApplySpToDen_SeqAIJ" 1278 PetscErrorCode MatTransColoringApplySpToDen_SeqAIJ(MatTransposeColoring coloring,Mat B,Mat Btdense) 1279 { 1280 PetscErrorCode ierr; 1281 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 1282 Mat_SeqDense *btdense = (Mat_SeqDense*)Btdense->data; 1283 PetscInt *bi=b->i,*bj=b->j; 1284 PetscInt m=Btdense->rmap->n,n=Btdense->cmap->n,j,k,l,col,anz,*btcol,brow,ncolumns; 1285 MatScalar *btval,*btval_den,*ba=b->a; 1286 PetscInt *columns=coloring->columns,*colorforcol=coloring->colorforcol,ncolors=coloring->ncolors; 1287 1288 PetscFunctionBegin; 1289 btval_den=btdense->v; 1290 ierr = PetscMemzero(btval_den,(m*n)*sizeof(MatScalar));CHKERRQ(ierr); 1291 for (k=0; k<ncolors; k++) { 1292 ncolumns = coloring->ncolumns[k]; 1293 for (l=0; l<ncolumns; l++) { /* insert a row of B to a column of Btdense */ 1294 col = *(columns + colorforcol[k] + l); 1295 btcol = bj + bi[col]; 1296 btval = ba + bi[col]; 1297 anz = bi[col+1] - bi[col]; 1298 for (j=0; j<anz; j++){ 1299 brow = btcol[j]; 1300 btval_den[brow] = btval[j]; 1301 } 1302 } 1303 btval_den += m; 1304 } 1305 PetscFunctionReturn(0); 1306 } 1307 1308 #undef __FUNCT__ 1309 #define __FUNCT__ "MatTransColoringApplyDenToSp_SeqAIJ" 1310 PetscErrorCode MatTransColoringApplyDenToSp_SeqAIJ(MatTransposeColoring matcoloring,Mat Cden,Mat Csp) 1311 { 1312 PetscErrorCode ierr; 1313 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)Csp->data; 1314 PetscInt k,l,*row,*idx,m,ncolors=matcoloring->ncolors,nrows; 1315 PetscScalar *ca_den,*cp_den,*ca=csp->a; 1316 PetscInt *rows=matcoloring->rows,*spidx=matcoloring->columnsforspidx,*colorforrow=matcoloring->colorforrow; 1317 1318 PetscFunctionBegin; 1319 ierr = MatGetLocalSize(Csp,&m,PETSC_NULL);CHKERRQ(ierr); 1320 ierr = MatGetArray(Cden,&ca_den);CHKERRQ(ierr); 1321 cp_den = ca_den; 1322 for (k=0; k<ncolors; k++) { 1323 nrows = matcoloring->nrows[k]; 1324 row = rows + colorforrow[k]; 1325 idx = spidx + colorforrow[k]; 1326 for (l=0; l<nrows; l++){ 1327 ca[idx[l]] = cp_den[row[l]]; 1328 } 1329 cp_den += m; 1330 } 1331 ierr = MatRestoreArray(Cden,&ca_den);CHKERRQ(ierr); 1332 PetscFunctionReturn(0); 1333 } 1334 1335 /* 1336 MatGetColumnIJ_SeqAIJ_Color() and MatRestoreColumnIJ_SeqAIJ_Color() are customized from 1337 MatGetColumnIJ_SeqAIJ() and MatRestoreColumnIJ_SeqAIJ() by adding an output 1338 spidx[], index of a->j, to be used for setting 'columnsforspidx' in MatTransposeColoringCreate_SeqAIJ(). 1339 */ 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatGetColumnIJ_SeqAIJ_Color" 1342 PetscErrorCode MatGetColumnIJ_SeqAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 1343 { 1344 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1345 PetscErrorCode ierr; 1346 PetscInt i,*collengths,*cia,*cja,n = A->cmap->n,m = A->rmap->n; 1347 PetscInt nz = a->i[m],row,*jj,mr,col; 1348 PetscInt *cspidx; 1349 1350 PetscFunctionBegin; 1351 *nn = n; 1352 if (!ia) PetscFunctionReturn(0); 1353 if (symmetric) { 1354 SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"MatGetColumnIJ_SeqAIJ_Color() not supported for the case symmetric"); 1355 ierr = MatToSymmetricIJ_SeqAIJ(A->rmap->n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 1356 } else { 1357 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 1358 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 1359 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 1360 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 1361 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cspidx);CHKERRQ(ierr); 1362 jj = a->j; 1363 for (i=0; i<nz; i++) { 1364 collengths[jj[i]]++; 1365 } 1366 cia[0] = oshift; 1367 for (i=0; i<n; i++) { 1368 cia[i+1] = cia[i] + collengths[i]; 1369 } 1370 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 1371 jj = a->j; 1372 for (row=0; row<m; row++) { 1373 mr = a->i[row+1] - a->i[row]; 1374 for (i=0; i<mr; i++) { 1375 col = *jj++; 1376 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 1377 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 1378 } 1379 } 1380 ierr = PetscFree(collengths);CHKERRQ(ierr); 1381 *ia = cia; *ja = cja; 1382 *spidx = cspidx; 1383 } 1384 PetscFunctionReturn(0); 1385 } 1386 1387 #undef __FUNCT__ 1388 #define __FUNCT__ "MatRestoreColumnIJ_SeqAIJ_Color" 1389 PetscErrorCode MatRestoreColumnIJ_SeqAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 1390 { 1391 PetscErrorCode ierr; 1392 1393 PetscFunctionBegin; 1394 if (!ia) PetscFunctionReturn(0); 1395 1396 ierr = PetscFree(*ia);CHKERRQ(ierr); 1397 ierr = PetscFree(*ja);CHKERRQ(ierr); 1398 ierr = PetscFree(*spidx);CHKERRQ(ierr); 1399 PetscFunctionReturn(0); 1400 } 1401 1402 #undef __FUNCT__ 1403 #define __FUNCT__ "MatTransposeColoringCreate_SeqAIJ" 1404 PetscErrorCode MatTransposeColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatTransposeColoring c) 1405 { 1406 PetscErrorCode ierr; 1407 PetscInt i,n,nrows,N,j,k,m,*row_idx,*ci,*cj,ncols,col,cm; 1408 const PetscInt *is; 1409 PetscInt nis = iscoloring->n,*rowhit,bs = 1; 1410 IS *isa; 1411 PetscBool done; 1412 PetscBool flg1,flg2; 1413 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 1414 PetscInt *colorforrow,*rows,*rows_i,*columnsforspidx,*columnsforspidx_i,*idxhit,*spidx; 1415 PetscInt *colorforcol,*columns,*columns_i; 1416 1417 PetscFunctionBegin; 1418 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 1419 1420 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1 */ 1421 ierr = PetscTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 1422 ierr = PetscTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg2);CHKERRQ(ierr); 1423 if (flg1 || flg2) { 1424 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 1425 } 1426 1427 N = mat->cmap->N/bs; 1428 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 1429 c->N = mat->cmap->N/bs; 1430 c->m = mat->rmap->N/bs; 1431 c->rstart = 0; 1432 1433 c->ncolors = nis; 1434 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 1435 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 1436 ierr = PetscMalloc2(csp->nz+1,PetscInt,&rows,csp->nz+1,PetscInt,&columnsforspidx);CHKERRQ(ierr); 1437 ierr = PetscMalloc((nis+1)*sizeof(PetscInt),&colorforrow);CHKERRQ(ierr); 1438 colorforrow[0] = 0; 1439 rows_i = rows; 1440 columnsforspidx_i = columnsforspidx; 1441 1442 ierr = PetscMalloc((nis+1)*sizeof(PetscInt),&colorforcol);CHKERRQ(ierr); 1443 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columns);CHKERRQ(ierr); 1444 colorforcol[0] = 0; 1445 columns_i = columns; 1446 1447 ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,&done);CHKERRQ(ierr); /* column-wise storage of mat */ 1448 if (!done) SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name); 1449 1450 cm = c->m; 1451 ierr = PetscMalloc((cm+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 1452 ierr = PetscMalloc((cm+1)*sizeof(PetscInt),&idxhit);CHKERRQ(ierr); 1453 for (i=0; i<nis; i++) { 1454 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 1455 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 1456 c->ncolumns[i] = n; 1457 if (n) { 1458 ierr = PetscMemcpy(columns_i,is,n*sizeof(PetscInt));CHKERRQ(ierr); 1459 } 1460 colorforcol[i+1] = colorforcol[i] + n; 1461 columns_i += n; 1462 1463 /* fast, crude version requires O(N*N) work */ 1464 ierr = PetscMemzero(rowhit,cm*sizeof(PetscInt));CHKERRQ(ierr); 1465 1466 /* loop over columns*/ 1467 for (j=0; j<n; j++) { 1468 col = is[j]; 1469 row_idx = cj + ci[col]; 1470 m = ci[col+1] - ci[col]; 1471 /* loop over columns marking them in rowhit */ 1472 for (k=0; k<m; k++) { 1473 idxhit[*row_idx] = spidx[ci[col] + k]; 1474 rowhit[*row_idx++] = col + 1; 1475 } 1476 } 1477 /* count the number of hits */ 1478 nrows = 0; 1479 for (j=0; j<cm; j++) { 1480 if (rowhit[j]) nrows++; 1481 } 1482 c->nrows[i] = nrows; 1483 colorforrow[i+1] = colorforrow[i] + nrows; 1484 1485 nrows = 0; 1486 for (j=0; j<cm; j++) { 1487 if (rowhit[j]) { 1488 rows_i[nrows] = j; 1489 columnsforspidx_i[nrows] = idxhit[j]; 1490 nrows++; 1491 } 1492 } 1493 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 1494 rows_i += nrows; columnsforspidx_i += nrows; 1495 } 1496 ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,&done);CHKERRQ(ierr); 1497 ierr = PetscFree(rowhit);CHKERRQ(ierr); 1498 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 1499 #if defined(PETSC_USE_DEBUG) 1500 if (csp->nz != colorforrow[nis]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"csp->nz %d != colorforrow[nis] %d",csp->nz,colorforrow[nis]); 1501 #endif 1502 1503 c->colorforrow = colorforrow; 1504 c->rows = rows; 1505 c->columnsforspidx = columnsforspidx; 1506 c->colorforcol = colorforcol; 1507 c->columns = columns; 1508 ierr = PetscFree(idxhit);CHKERRQ(ierr); 1509 PetscFunctionReturn(0); 1510 } 1511