1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 extern PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 17 { 18 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 19 PetscErrorCode ierr; 20 PetscInt i,*idxb = 0; 21 PetscScalar *va,*vb; 22 Vec vtmp; 23 24 PetscFunctionBegin; 25 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 26 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 27 if (idx) { 28 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 29 } 30 31 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 32 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 33 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 34 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 35 36 for (i=0; i<A->rmap->n; i++){ 37 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 38 } 39 40 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 41 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 42 ierr = PetscFree(idxb);CHKERRQ(ierr); 43 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 44 PetscFunctionReturn(0); 45 } 46 47 EXTERN_C_BEGIN 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 50 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 51 { 52 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 53 PetscErrorCode ierr; 54 55 PetscFunctionBegin; 56 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 57 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 EXTERN_C_END 61 62 EXTERN_C_BEGIN 63 #undef __FUNCT__ 64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 65 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 66 { 67 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 68 PetscErrorCode ierr; 69 70 PetscFunctionBegin; 71 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 72 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 73 PetscFunctionReturn(0); 74 } 75 EXTERN_C_END 76 77 /* 78 Local utility routine that creates a mapping from the global column 79 number to the local number in the off-diagonal part of the local 80 storage of the matrix. This is done in a non scalable way since the 81 length of colmap equals the global matrix length. 82 */ 83 #undef __FUNCT__ 84 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 85 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 86 { 87 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 88 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 89 PetscErrorCode ierr; 90 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 91 92 PetscFunctionBegin; 93 #if defined (PETSC_USE_CTABLE) 94 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 95 for (i=0; i<nbs; i++){ 96 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 97 } 98 #else 99 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 100 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 101 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 102 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 103 #endif 104 PetscFunctionReturn(0); 105 } 106 107 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 108 { \ 109 \ 110 brow = row/bs; \ 111 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 112 rmax = aimax[brow]; nrow = ailen[brow]; \ 113 bcol = col/bs; \ 114 ridx = row % bs; cidx = col % bs; \ 115 low = 0; high = nrow; \ 116 while (high-low > 3) { \ 117 t = (low+high)/2; \ 118 if (rp[t] > bcol) high = t; \ 119 else low = t; \ 120 } \ 121 for (_i=low; _i<high; _i++) { \ 122 if (rp[_i] > bcol) break; \ 123 if (rp[_i] == bcol) { \ 124 bap = ap + bs2*_i + bs*cidx + ridx; \ 125 if (addv == ADD_VALUES) *bap += value; \ 126 else *bap = value; \ 127 goto a_noinsert; \ 128 } \ 129 } \ 130 if (a->nonew == 1) goto a_noinsert; \ 131 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 132 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 133 N = nrow++ - 1; \ 134 /* shift up all the later entries in this row */ \ 135 for (ii=N; ii>=_i; ii--) { \ 136 rp[ii+1] = rp[ii]; \ 137 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 138 } \ 139 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 140 rp[_i] = bcol; \ 141 ap[bs2*_i + bs*cidx + ridx] = value; \ 142 a_noinsert:; \ 143 ailen[brow] = nrow; \ 144 } 145 146 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 147 { \ 148 brow = row/bs; \ 149 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 150 rmax = bimax[brow]; nrow = bilen[brow]; \ 151 bcol = col/bs; \ 152 ridx = row % bs; cidx = col % bs; \ 153 low = 0; high = nrow; \ 154 while (high-low > 3) { \ 155 t = (low+high)/2; \ 156 if (rp[t] > bcol) high = t; \ 157 else low = t; \ 158 } \ 159 for (_i=low; _i<high; _i++) { \ 160 if (rp[_i] > bcol) break; \ 161 if (rp[_i] == bcol) { \ 162 bap = ap + bs2*_i + bs*cidx + ridx; \ 163 if (addv == ADD_VALUES) *bap += value; \ 164 else *bap = value; \ 165 goto b_noinsert; \ 166 } \ 167 } \ 168 if (b->nonew == 1) goto b_noinsert; \ 169 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 170 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 171 CHKMEMQ;\ 172 N = nrow++ - 1; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 177 } \ 178 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 179 rp[_i] = bcol; \ 180 ap[bs2*_i + bs*cidx + ridx] = value; \ 181 b_noinsert:; \ 182 bilen[brow] = nrow; \ 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatSetValues_MPIBAIJ" 187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 188 { 189 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 190 MatScalar value; 191 PetscBool roworiented = baij->roworiented; 192 PetscErrorCode ierr; 193 PetscInt i,j,row,col; 194 PetscInt rstart_orig=mat->rmap->rstart; 195 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 196 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 197 198 /* Some Variables required in the macro */ 199 Mat A = baij->A; 200 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 201 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 202 MatScalar *aa=a->a; 203 204 Mat B = baij->B; 205 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 206 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 207 MatScalar *ba=b->a; 208 209 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 210 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 211 MatScalar *ap,*bap; 212 213 PetscFunctionBegin; 214 if (v) PetscValidScalarPointer(v,6); 215 for (i=0; i<m; i++) { 216 if (im[i] < 0) continue; 217 #if defined(PETSC_USE_DEBUG) 218 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 219 #endif 220 if (im[i] >= rstart_orig && im[i] < rend_orig) { 221 row = im[i] - rstart_orig; 222 for (j=0; j<n; j++) { 223 if (in[j] >= cstart_orig && in[j] < cend_orig){ 224 col = in[j] - cstart_orig; 225 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 226 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 227 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 228 } else if (in[j] < 0) continue; 229 #if defined(PETSC_USE_DEBUG) 230 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 231 #endif 232 else { 233 if (mat->was_assembled) { 234 if (!baij->colmap) { 235 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 236 } 237 #if defined (PETSC_USE_CTABLE) 238 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 239 col = col - 1; 240 #else 241 col = baij->colmap[in[j]/bs] - 1; 242 #endif 243 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 244 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 245 col = in[j]; 246 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 247 B = baij->B; 248 b = (Mat_SeqBAIJ*)(B)->data; 249 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 250 ba=b->a; 251 } else col += in[j]%bs; 252 } else col = in[j]; 253 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 254 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 255 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 256 } 257 } 258 } else { 259 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 260 if (!baij->donotstash) { 261 if (roworiented) { 262 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 263 } else { 264 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 265 } 266 } 267 } 268 } 269 PetscFunctionReturn(0); 270 } 271 272 #undef __FUNCT__ 273 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 274 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 275 { 276 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 277 const PetscScalar *value; 278 MatScalar *barray=baij->barray; 279 PetscBool roworiented = baij->roworiented; 280 PetscErrorCode ierr; 281 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 282 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 283 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 284 285 PetscFunctionBegin; 286 if(!barray) { 287 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 288 baij->barray = barray; 289 } 290 291 if (roworiented) { 292 stepval = (n-1)*bs; 293 } else { 294 stepval = (m-1)*bs; 295 } 296 for (i=0; i<m; i++) { 297 if (im[i] < 0) continue; 298 #if defined(PETSC_USE_DEBUG) 299 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 300 #endif 301 if (im[i] >= rstart && im[i] < rend) { 302 row = im[i] - rstart; 303 for (j=0; j<n; j++) { 304 /* If NumCol = 1 then a copy is not required */ 305 if ((roworiented) && (n == 1)) { 306 barray = (MatScalar*)v + i*bs2; 307 } else if((!roworiented) && (m == 1)) { 308 barray = (MatScalar*)v + j*bs2; 309 } else { /* Here a copy is required */ 310 if (roworiented) { 311 value = v + (i*(stepval+bs) + j)*bs; 312 } else { 313 value = v + (j*(stepval+bs) + i)*bs; 314 } 315 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 316 for (jj=0; jj<bs; jj++) { 317 barray[jj] = value[jj]; 318 } 319 barray += bs; 320 } 321 barray -= bs2; 322 } 323 324 if (in[j] >= cstart && in[j] < cend){ 325 col = in[j] - cstart; 326 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 327 } 328 else if (in[j] < 0) continue; 329 #if defined(PETSC_USE_DEBUG) 330 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 331 #endif 332 else { 333 if (mat->was_assembled) { 334 if (!baij->colmap) { 335 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 336 } 337 338 #if defined(PETSC_USE_DEBUG) 339 #if defined (PETSC_USE_CTABLE) 340 { PetscInt data; 341 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 342 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 343 } 344 #else 345 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 346 #endif 347 #endif 348 #if defined (PETSC_USE_CTABLE) 349 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 350 col = (col - 1)/bs; 351 #else 352 col = (baij->colmap[in[j]] - 1)/bs; 353 #endif 354 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 355 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 356 col = in[j]; 357 } 358 } 359 else col = in[j]; 360 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 361 } 362 } 363 } else { 364 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 365 if (!baij->donotstash) { 366 if (roworiented) { 367 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 368 } else { 369 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 370 } 371 } 372 } 373 } 374 PetscFunctionReturn(0); 375 } 376 377 #define HASH_KEY 0.6180339887 378 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 379 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 380 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 381 #undef __FUNCT__ 382 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 383 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 384 { 385 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 386 PetscBool roworiented = baij->roworiented; 387 PetscErrorCode ierr; 388 PetscInt i,j,row,col; 389 PetscInt rstart_orig=mat->rmap->rstart; 390 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 391 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 392 PetscReal tmp; 393 MatScalar **HD = baij->hd,value; 394 #if defined(PETSC_USE_DEBUG) 395 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 396 #endif 397 398 PetscFunctionBegin; 399 if (v) PetscValidScalarPointer(v,6); 400 for (i=0; i<m; i++) { 401 #if defined(PETSC_USE_DEBUG) 402 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 403 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 404 #endif 405 row = im[i]; 406 if (row >= rstart_orig && row < rend_orig) { 407 for (j=0; j<n; j++) { 408 col = in[j]; 409 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 410 /* Look up PetscInto the Hash Table */ 411 key = (row/bs)*Nbs+(col/bs)+1; 412 h1 = HASH(size,key,tmp); 413 414 415 idx = h1; 416 #if defined(PETSC_USE_DEBUG) 417 insert_ct++; 418 total_ct++; 419 if (HT[idx] != key) { 420 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 421 if (idx == size) { 422 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 423 if (idx == h1) { 424 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 425 } 426 } 427 } 428 #else 429 if (HT[idx] != key) { 430 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 431 if (idx == size) { 432 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 433 if (idx == h1) { 434 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 435 } 436 } 437 } 438 #endif 439 /* A HASH table entry is found, so insert the values at the correct address */ 440 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 441 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 442 } 443 } else { 444 if (!baij->donotstash) { 445 if (roworiented) { 446 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 447 } else { 448 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 449 } 450 } 451 } 452 } 453 #if defined(PETSC_USE_DEBUG) 454 baij->ht_total_ct = total_ct; 455 baij->ht_insert_ct = insert_ct; 456 #endif 457 PetscFunctionReturn(0); 458 } 459 460 #undef __FUNCT__ 461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 463 { 464 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 465 PetscBool roworiented = baij->roworiented; 466 PetscErrorCode ierr; 467 PetscInt i,j,ii,jj,row,col; 468 PetscInt rstart=baij->rstartbs; 469 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 470 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 471 PetscReal tmp; 472 MatScalar **HD = baij->hd,*baij_a; 473 const PetscScalar *v_t,*value; 474 #if defined(PETSC_USE_DEBUG) 475 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 476 #endif 477 478 PetscFunctionBegin; 479 480 if (roworiented) { 481 stepval = (n-1)*bs; 482 } else { 483 stepval = (m-1)*bs; 484 } 485 for (i=0; i<m; i++) { 486 #if defined(PETSC_USE_DEBUG) 487 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 488 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 489 #endif 490 row = im[i]; 491 v_t = v + i*nbs2; 492 if (row >= rstart && row < rend) { 493 for (j=0; j<n; j++) { 494 col = in[j]; 495 496 /* Look up into the Hash Table */ 497 key = row*Nbs+col+1; 498 h1 = HASH(size,key,tmp); 499 500 idx = h1; 501 #if defined(PETSC_USE_DEBUG) 502 total_ct++; 503 insert_ct++; 504 if (HT[idx] != key) { 505 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 506 if (idx == size) { 507 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 508 if (idx == h1) { 509 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 510 } 511 } 512 } 513 #else 514 if (HT[idx] != key) { 515 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 516 if (idx == size) { 517 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 518 if (idx == h1) { 519 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 520 } 521 } 522 } 523 #endif 524 baij_a = HD[idx]; 525 if (roworiented) { 526 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 527 /* value = v + (i*(stepval+bs)+j)*bs; */ 528 value = v_t; 529 v_t += bs; 530 if (addv == ADD_VALUES) { 531 for (ii=0; ii<bs; ii++,value+=stepval) { 532 for (jj=ii; jj<bs2; jj+=bs) { 533 baij_a[jj] += *value++; 534 } 535 } 536 } else { 537 for (ii=0; ii<bs; ii++,value+=stepval) { 538 for (jj=ii; jj<bs2; jj+=bs) { 539 baij_a[jj] = *value++; 540 } 541 } 542 } 543 } else { 544 value = v + j*(stepval+bs)*bs + i*bs; 545 if (addv == ADD_VALUES) { 546 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 547 for (jj=0; jj<bs; jj++) { 548 baij_a[jj] += *value++; 549 } 550 } 551 } else { 552 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 553 for (jj=0; jj<bs; jj++) { 554 baij_a[jj] = *value++; 555 } 556 } 557 } 558 } 559 } 560 } else { 561 if (!baij->donotstash) { 562 if (roworiented) { 563 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 564 } else { 565 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 566 } 567 } 568 } 569 } 570 #if defined(PETSC_USE_DEBUG) 571 baij->ht_total_ct = total_ct; 572 baij->ht_insert_ct = insert_ct; 573 #endif 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatGetValues_MPIBAIJ" 579 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 580 { 581 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 582 PetscErrorCode ierr; 583 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 584 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 585 586 PetscFunctionBegin; 587 for (i=0; i<m; i++) { 588 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 589 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 590 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 591 row = idxm[i] - bsrstart; 592 for (j=0; j<n; j++) { 593 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 594 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 595 if (idxn[j] >= bscstart && idxn[j] < bscend){ 596 col = idxn[j] - bscstart; 597 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 598 } else { 599 if (!baij->colmap) { 600 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 601 } 602 #if defined (PETSC_USE_CTABLE) 603 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 604 data --; 605 #else 606 data = baij->colmap[idxn[j]/bs]-1; 607 #endif 608 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 609 else { 610 col = data + idxn[j]%bs; 611 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 612 } 613 } 614 } 615 } else { 616 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 617 } 618 } 619 PetscFunctionReturn(0); 620 } 621 622 #undef __FUNCT__ 623 #define __FUNCT__ "MatNorm_MPIBAIJ" 624 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 625 { 626 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 627 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 628 PetscErrorCode ierr; 629 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 630 PetscReal sum = 0.0; 631 MatScalar *v; 632 633 PetscFunctionBegin; 634 if (baij->size == 1) { 635 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 636 } else { 637 if (type == NORM_FROBENIUS) { 638 v = amat->a; 639 nz = amat->nz*bs2; 640 for (i=0; i<nz; i++) { 641 #if defined(PETSC_USE_COMPLEX) 642 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 643 #else 644 sum += (*v)*(*v); v++; 645 #endif 646 } 647 v = bmat->a; 648 nz = bmat->nz*bs2; 649 for (i=0; i<nz; i++) { 650 #if defined(PETSC_USE_COMPLEX) 651 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 652 #else 653 sum += (*v)*(*v); v++; 654 #endif 655 } 656 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 657 *nrm = sqrt(*nrm); 658 } else if (type == NORM_1) { /* max column sum */ 659 PetscReal *tmp,*tmp2; 660 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 661 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 662 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 663 v = amat->a; jj = amat->j; 664 for (i=0; i<amat->nz; i++) { 665 for (j=0; j<bs; j++){ 666 col = bs*(cstart + *jj) + j; /* column index */ 667 for (row=0; row<bs; row++){ 668 tmp[col] += PetscAbsScalar(*v); v++; 669 } 670 } 671 jj++; 672 } 673 v = bmat->a; jj = bmat->j; 674 for (i=0; i<bmat->nz; i++) { 675 for (j=0; j<bs; j++){ 676 col = bs*garray[*jj] + j; 677 for (row=0; row<bs; row++){ 678 tmp[col] += PetscAbsScalar(*v); v++; 679 } 680 } 681 jj++; 682 } 683 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 684 *nrm = 0.0; 685 for (j=0; j<mat->cmap->N; j++) { 686 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 687 } 688 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 689 } else if (type == NORM_INFINITY) { /* max row sum */ 690 PetscReal *sums; 691 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 692 sum = 0.0; 693 for (j=0; j<amat->mbs; j++) { 694 for (row=0; row<bs; row++) sums[row] = 0.0; 695 v = amat->a + bs2*amat->i[j]; 696 nz = amat->i[j+1]-amat->i[j]; 697 for (i=0; i<nz; i++) { 698 for (col=0; col<bs; col++){ 699 for (row=0; row<bs; row++){ 700 sums[row] += PetscAbsScalar(*v); v++; 701 } 702 } 703 } 704 v = bmat->a + bs2*bmat->i[j]; 705 nz = bmat->i[j+1]-bmat->i[j]; 706 for (i=0; i<nz; i++) { 707 for (col=0; col<bs; col++){ 708 for (row=0; row<bs; row++){ 709 sums[row] += PetscAbsScalar(*v); v++; 710 } 711 } 712 } 713 for (row=0; row<bs; row++){ 714 if (sums[row] > sum) sum = sums[row]; 715 } 716 } 717 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 718 ierr = PetscFree(sums);CHKERRQ(ierr); 719 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet"); 720 } 721 PetscFunctionReturn(0); 722 } 723 724 /* 725 Creates the hash table, and sets the table 726 This table is created only once. 727 If new entried need to be added to the matrix 728 then the hash table has to be destroyed and 729 recreated. 730 */ 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 733 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 734 { 735 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 736 Mat A = baij->A,B=baij->B; 737 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 738 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 739 PetscErrorCode ierr; 740 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 741 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 742 PetscInt *HT,key; 743 MatScalar **HD; 744 PetscReal tmp; 745 #if defined(PETSC_USE_INFO) 746 PetscInt ct=0,max=0; 747 #endif 748 749 PetscFunctionBegin; 750 if (baij->ht) PetscFunctionReturn(0); 751 752 baij->ht_size = (PetscInt)(factor*nz); 753 ht_size = baij->ht_size; 754 755 /* Allocate Memory for Hash Table */ 756 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 757 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 758 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 759 HD = baij->hd; 760 HT = baij->ht; 761 762 /* Loop Over A */ 763 for (i=0; i<a->mbs; i++) { 764 for (j=ai[i]; j<ai[i+1]; j++) { 765 row = i+rstart; 766 col = aj[j]+cstart; 767 768 key = row*Nbs + col + 1; 769 h1 = HASH(ht_size,key,tmp); 770 for (k=0; k<ht_size; k++){ 771 if (!HT[(h1+k)%ht_size]) { 772 HT[(h1+k)%ht_size] = key; 773 HD[(h1+k)%ht_size] = a->a + j*bs2; 774 break; 775 #if defined(PETSC_USE_INFO) 776 } else { 777 ct++; 778 #endif 779 } 780 } 781 #if defined(PETSC_USE_INFO) 782 if (k> max) max = k; 783 #endif 784 } 785 } 786 /* Loop Over B */ 787 for (i=0; i<b->mbs; i++) { 788 for (j=bi[i]; j<bi[i+1]; j++) { 789 row = i+rstart; 790 col = garray[bj[j]]; 791 key = row*Nbs + col + 1; 792 h1 = HASH(ht_size,key,tmp); 793 for (k=0; k<ht_size; k++){ 794 if (!HT[(h1+k)%ht_size]) { 795 HT[(h1+k)%ht_size] = key; 796 HD[(h1+k)%ht_size] = b->a + j*bs2; 797 break; 798 #if defined(PETSC_USE_INFO) 799 } else { 800 ct++; 801 #endif 802 } 803 } 804 #if defined(PETSC_USE_INFO) 805 if (k> max) max = k; 806 #endif 807 } 808 } 809 810 /* Print Summary */ 811 #if defined(PETSC_USE_INFO) 812 for (i=0,j=0; i<ht_size; i++) { 813 if (HT[i]) {j++;} 814 } 815 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 816 #endif 817 PetscFunctionReturn(0); 818 } 819 820 #undef __FUNCT__ 821 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 822 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 823 { 824 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 825 PetscErrorCode ierr; 826 PetscInt nstash,reallocs; 827 InsertMode addv; 828 829 PetscFunctionBegin; 830 if (baij->donotstash || mat->nooffprocentries) { 831 PetscFunctionReturn(0); 832 } 833 834 /* make sure all processors are either in INSERTMODE or ADDMODE */ 835 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 836 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 837 mat->insertmode = addv; /* in case this processor had no cache */ 838 839 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 840 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 841 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 842 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 843 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 844 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 845 PetscFunctionReturn(0); 846 } 847 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 850 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 851 { 852 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 853 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 854 PetscErrorCode ierr; 855 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 856 PetscInt *row,*col; 857 PetscBool r1,r2,r3,other_disassembled; 858 MatScalar *val; 859 InsertMode addv = mat->insertmode; 860 PetscMPIInt n; 861 862 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 863 PetscFunctionBegin; 864 if (!baij->donotstash && !mat->nooffprocentries) { 865 while (1) { 866 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 867 if (!flg) break; 868 869 for (i=0; i<n;) { 870 /* Now identify the consecutive vals belonging to the same row */ 871 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 872 if (j < n) ncols = j-i; 873 else ncols = n-i; 874 /* Now assemble all these values with a single function call */ 875 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 876 i = j; 877 } 878 } 879 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 880 /* Now process the block-stash. Since the values are stashed column-oriented, 881 set the roworiented flag to column oriented, and after MatSetValues() 882 restore the original flags */ 883 r1 = baij->roworiented; 884 r2 = a->roworiented; 885 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 886 baij->roworiented = PETSC_FALSE; 887 a->roworiented = PETSC_FALSE; 888 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 889 while (1) { 890 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 891 if (!flg) break; 892 893 for (i=0; i<n;) { 894 /* Now identify the consecutive vals belonging to the same row */ 895 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 896 if (j < n) ncols = j-i; 897 else ncols = n-i; 898 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 899 i = j; 900 } 901 } 902 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 903 baij->roworiented = r1; 904 a->roworiented = r2; 905 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 906 } 907 908 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 909 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 910 911 /* determine if any processor has disassembled, if so we must 912 also disassemble ourselfs, in order that we may reassemble. */ 913 /* 914 if nonzero structure of submatrix B cannot change then we know that 915 no processor disassembled thus we can skip this stuff 916 */ 917 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 918 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 919 if (mat->was_assembled && !other_disassembled) { 920 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 921 } 922 } 923 924 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 925 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 926 } 927 ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 928 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 929 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 930 931 #if defined(PETSC_USE_INFO) 932 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 933 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 934 baij->ht_total_ct = 0; 935 baij->ht_insert_ct = 0; 936 } 937 #endif 938 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 939 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 940 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 941 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 942 } 943 944 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 945 baij->rowvalues = 0; 946 PetscFunctionReturn(0); 947 } 948 949 #undef __FUNCT__ 950 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 951 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 952 { 953 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 954 PetscErrorCode ierr; 955 PetscMPIInt size = baij->size,rank = baij->rank; 956 PetscInt bs = mat->rmap->bs; 957 PetscBool iascii,isdraw; 958 PetscViewer sviewer; 959 PetscViewerFormat format; 960 961 PetscFunctionBegin; 962 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 963 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 964 if (iascii) { 965 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 966 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 967 MatInfo info; 968 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 969 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 970 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 971 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 972 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 973 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 974 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 975 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 976 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 977 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 978 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 979 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 980 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 981 PetscFunctionReturn(0); 982 } else if (format == PETSC_VIEWER_ASCII_INFO) { 983 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 984 PetscFunctionReturn(0); 985 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 986 PetscFunctionReturn(0); 987 } 988 } 989 990 if (isdraw) { 991 PetscDraw draw; 992 PetscBool isnull; 993 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 994 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 995 } 996 997 if (size == 1) { 998 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 999 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1000 } else { 1001 /* assemble the entire matrix onto first processor. */ 1002 Mat A; 1003 Mat_SeqBAIJ *Aloc; 1004 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1005 MatScalar *a; 1006 1007 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1008 /* Perhaps this should be the type of mat? */ 1009 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1010 if (!rank) { 1011 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1012 } else { 1013 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1014 } 1015 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1016 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1017 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1018 1019 /* copy over the A part */ 1020 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1021 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1022 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1023 1024 for (i=0; i<mbs; i++) { 1025 rvals[0] = bs*(baij->rstartbs + i); 1026 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1027 for (j=ai[i]; j<ai[i+1]; j++) { 1028 col = (baij->cstartbs+aj[j])*bs; 1029 for (k=0; k<bs; k++) { 1030 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1031 col++; a += bs; 1032 } 1033 } 1034 } 1035 /* copy over the B part */ 1036 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1037 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1038 for (i=0; i<mbs; i++) { 1039 rvals[0] = bs*(baij->rstartbs + i); 1040 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1041 for (j=ai[i]; j<ai[i+1]; j++) { 1042 col = baij->garray[aj[j]]*bs; 1043 for (k=0; k<bs; k++) { 1044 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1045 col++; a += bs; 1046 } 1047 } 1048 } 1049 ierr = PetscFree(rvals);CHKERRQ(ierr); 1050 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1051 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1052 /* 1053 Everyone has to call to draw the matrix since the graphics waits are 1054 synchronized across all processors that share the PetscDraw object 1055 */ 1056 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1057 if (!rank) { 1058 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1059 /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/ 1060 PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ); 1061 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1062 } 1063 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1064 ierr = MatDestroy(A);CHKERRQ(ierr); 1065 } 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1071 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1072 { 1073 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1074 Mat_SeqBAIJ* A = (Mat_SeqBAIJ*)a->A->data; 1075 Mat_SeqBAIJ* B = (Mat_SeqBAIJ*)a->B->data; 1076 PetscErrorCode ierr; 1077 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1078 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1079 int fd; 1080 PetscScalar *column_values; 1081 FILE *file; 1082 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1083 PetscInt message_count,flowcontrolcount; 1084 1085 PetscFunctionBegin; 1086 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1087 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1088 nz = bs2*(A->nz + B->nz); 1089 rlen = mat->rmap->n; 1090 if (!rank) { 1091 header[0] = MAT_FILE_CLASSID; 1092 header[1] = mat->rmap->N; 1093 header[2] = mat->cmap->N; 1094 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1095 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1096 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1097 /* get largest number of rows any processor has */ 1098 range = mat->rmap->range; 1099 for (i=1; i<size; i++) { 1100 rlen = PetscMax(rlen,range[i+1] - range[i]); 1101 } 1102 } else { 1103 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1104 } 1105 1106 ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr); 1107 /* compute lengths of each row */ 1108 for (i=0; i<a->mbs; i++) { 1109 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1110 } 1111 /* store the row lengths to the file */ 1112 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1113 if (!rank) { 1114 MPI_Status status; 1115 ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr); 1116 rlen = (range[1] - range[0])/bs; 1117 for (i=0; i<rlen; i++) { 1118 for (j=0; j<bs; j++) { 1119 row_lens[i*bs+j] = bs*crow_lens[i]; 1120 } 1121 } 1122 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1123 for (i=1; i<size; i++) { 1124 rlen = (range[i+1] - range[i])/bs; 1125 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1126 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1127 for (k=0; k<rlen; k++) { 1128 for (j=0; j<bs; j++) { 1129 row_lens[k*bs+j] = bs*crow_lens[k]; 1130 } 1131 } 1132 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1133 } 1134 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1135 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1136 } else { 1137 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1138 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1139 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1140 } 1141 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1142 1143 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1144 information needed to make it for each row from a block row. This does require more communication but still not more than 1145 the communication needed for the nonzero values */ 1146 nzmax = nz; /* space a largest processor needs */ 1147 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1148 ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1149 cnt = 0; 1150 for (i=0; i<a->mbs; i++) { 1151 pcnt = cnt; 1152 for (j=B->i[i]; j<B->i[i+1]; j++) { 1153 if ( (col = garray[B->j[j]]) > cstart) break; 1154 for (l=0; l<bs; l++) { 1155 column_indices[cnt++] = bs*col+l; 1156 } 1157 } 1158 for (k=A->i[i]; k<A->i[i+1]; k++) { 1159 for (l=0; l<bs; l++) { 1160 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1161 } 1162 } 1163 for (; j<B->i[i+1]; j++) { 1164 for (l=0; l<bs; l++) { 1165 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1166 } 1167 } 1168 len = cnt - pcnt; 1169 for (k=1; k<bs; k++) { 1170 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1171 cnt += len; 1172 } 1173 } 1174 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1175 1176 /* store the columns to the file */ 1177 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1178 if (!rank) { 1179 MPI_Status status; 1180 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1181 for (i=1; i<size; i++) { 1182 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1183 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1184 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1185 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1186 } 1187 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1188 } else { 1189 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1190 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1191 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1192 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1193 } 1194 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1195 1196 /* load up the numerical values */ 1197 ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1198 cnt = 0; 1199 for (i=0; i<a->mbs; i++) { 1200 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1201 for (j=B->i[i]; j<B->i[i+1]; j++) { 1202 if ( garray[B->j[j]] > cstart) break; 1203 for (l=0; l<bs; l++) { 1204 for (ll=0; ll<bs; ll++) { 1205 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1206 } 1207 } 1208 cnt += bs; 1209 } 1210 for (k=A->i[i]; k<A->i[i+1]; k++) { 1211 for (l=0; l<bs; l++) { 1212 for (ll=0; ll<bs; ll++) { 1213 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1214 } 1215 } 1216 cnt += bs; 1217 } 1218 for (; j<B->i[i+1]; j++) { 1219 for (l=0; l<bs; l++) { 1220 for (ll=0; ll<bs; ll++) { 1221 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1222 } 1223 } 1224 cnt += bs; 1225 } 1226 cnt += (bs-1)*rlen; 1227 } 1228 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1229 1230 /* store the column values to the file */ 1231 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1232 if (!rank) { 1233 MPI_Status status; 1234 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1235 for (i=1; i<size; i++) { 1236 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1237 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1238 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1239 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1240 } 1241 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1242 } else { 1243 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1244 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1245 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1246 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1247 } 1248 ierr = PetscFree(column_values);CHKERRQ(ierr); 1249 1250 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1251 if (file) { 1252 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1253 } 1254 PetscFunctionReturn(0); 1255 } 1256 1257 #undef __FUNCT__ 1258 #define __FUNCT__ "MatView_MPIBAIJ" 1259 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1260 { 1261 PetscErrorCode ierr; 1262 PetscBool iascii,isdraw,issocket,isbinary; 1263 1264 PetscFunctionBegin; 1265 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1266 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1267 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1268 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1269 if (iascii || isdraw || issocket) { 1270 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1271 } else if (isbinary) { 1272 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1273 } else { 1274 SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1275 } 1276 PetscFunctionReturn(0); 1277 } 1278 1279 #undef __FUNCT__ 1280 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1281 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1282 { 1283 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1284 PetscErrorCode ierr; 1285 1286 PetscFunctionBegin; 1287 #if defined(PETSC_USE_LOG) 1288 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1289 #endif 1290 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1291 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1292 if (baij->A){ierr = MatDestroy(baij->A);CHKERRQ(ierr);} 1293 if (baij->B){ierr = MatDestroy(baij->B);CHKERRQ(ierr);} 1294 #if defined (PETSC_USE_CTABLE) 1295 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 1296 #else 1297 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1298 #endif 1299 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1300 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1301 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1302 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1303 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1304 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1305 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1306 ierr = PetscFree(baij);CHKERRQ(ierr); 1307 1308 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1309 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1310 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1311 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1312 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1313 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1314 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1315 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1316 PetscFunctionReturn(0); 1317 } 1318 1319 #undef __FUNCT__ 1320 #define __FUNCT__ "MatMult_MPIBAIJ" 1321 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1322 { 1323 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1324 PetscErrorCode ierr; 1325 PetscInt nt; 1326 1327 PetscFunctionBegin; 1328 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1329 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1330 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1331 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1332 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1333 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1334 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1335 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1336 PetscFunctionReturn(0); 1337 } 1338 1339 #undef __FUNCT__ 1340 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1341 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1342 { 1343 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1344 PetscErrorCode ierr; 1345 1346 PetscFunctionBegin; 1347 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1348 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1349 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1350 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1351 PetscFunctionReturn(0); 1352 } 1353 1354 #undef __FUNCT__ 1355 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1356 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1357 { 1358 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1359 PetscErrorCode ierr; 1360 PetscBool merged; 1361 1362 PetscFunctionBegin; 1363 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1364 /* do nondiagonal part */ 1365 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1366 if (!merged) { 1367 /* send it on its way */ 1368 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1369 /* do local part */ 1370 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1371 /* receive remote parts: note this assumes the values are not actually */ 1372 /* inserted in yy until the next line */ 1373 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1374 } else { 1375 /* do local part */ 1376 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1377 /* send it on its way */ 1378 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1379 /* values actually were received in the Begin() but we need to call this nop */ 1380 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1381 } 1382 PetscFunctionReturn(0); 1383 } 1384 1385 #undef __FUNCT__ 1386 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1387 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1388 { 1389 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1390 PetscErrorCode ierr; 1391 1392 PetscFunctionBegin; 1393 /* do nondiagonal part */ 1394 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1395 /* send it on its way */ 1396 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1397 /* do local part */ 1398 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1399 /* receive remote parts: note this assumes the values are not actually */ 1400 /* inserted in yy until the next line, which is true for my implementation*/ 1401 /* but is not perhaps always true. */ 1402 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1403 PetscFunctionReturn(0); 1404 } 1405 1406 /* 1407 This only works correctly for square matrices where the subblock A->A is the 1408 diagonal block 1409 */ 1410 #undef __FUNCT__ 1411 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1412 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1413 { 1414 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1415 PetscErrorCode ierr; 1416 1417 PetscFunctionBegin; 1418 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1419 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1420 PetscFunctionReturn(0); 1421 } 1422 1423 #undef __FUNCT__ 1424 #define __FUNCT__ "MatScale_MPIBAIJ" 1425 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1426 { 1427 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1428 PetscErrorCode ierr; 1429 1430 PetscFunctionBegin; 1431 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1432 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #undef __FUNCT__ 1437 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1438 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1439 { 1440 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1441 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1442 PetscErrorCode ierr; 1443 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1444 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1445 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1446 1447 PetscFunctionBegin; 1448 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1449 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1450 mat->getrowactive = PETSC_TRUE; 1451 1452 if (!mat->rowvalues && (idx || v)) { 1453 /* 1454 allocate enough space to hold information from the longest row. 1455 */ 1456 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1457 PetscInt max = 1,mbs = mat->mbs,tmp; 1458 for (i=0; i<mbs; i++) { 1459 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1460 if (max < tmp) { max = tmp; } 1461 } 1462 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1463 } 1464 lrow = row - brstart; 1465 1466 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1467 if (!v) {pvA = 0; pvB = 0;} 1468 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1469 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1470 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1471 nztot = nzA + nzB; 1472 1473 cmap = mat->garray; 1474 if (v || idx) { 1475 if (nztot) { 1476 /* Sort by increasing column numbers, assuming A and B already sorted */ 1477 PetscInt imark = -1; 1478 if (v) { 1479 *v = v_p = mat->rowvalues; 1480 for (i=0; i<nzB; i++) { 1481 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1482 else break; 1483 } 1484 imark = i; 1485 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1486 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1487 } 1488 if (idx) { 1489 *idx = idx_p = mat->rowindices; 1490 if (imark > -1) { 1491 for (i=0; i<imark; i++) { 1492 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1493 } 1494 } else { 1495 for (i=0; i<nzB; i++) { 1496 if (cmap[cworkB[i]/bs] < cstart) 1497 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1498 else break; 1499 } 1500 imark = i; 1501 } 1502 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1503 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1504 } 1505 } else { 1506 if (idx) *idx = 0; 1507 if (v) *v = 0; 1508 } 1509 } 1510 *nz = nztot; 1511 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1512 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1513 PetscFunctionReturn(0); 1514 } 1515 1516 #undef __FUNCT__ 1517 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1518 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1519 { 1520 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1521 1522 PetscFunctionBegin; 1523 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1524 baij->getrowactive = PETSC_FALSE; 1525 PetscFunctionReturn(0); 1526 } 1527 1528 #undef __FUNCT__ 1529 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1530 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1531 { 1532 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1533 PetscErrorCode ierr; 1534 1535 PetscFunctionBegin; 1536 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1537 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1538 PetscFunctionReturn(0); 1539 } 1540 1541 #undef __FUNCT__ 1542 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1543 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1544 { 1545 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1546 Mat A = a->A,B = a->B; 1547 PetscErrorCode ierr; 1548 PetscReal isend[5],irecv[5]; 1549 1550 PetscFunctionBegin; 1551 info->block_size = (PetscReal)matin->rmap->bs; 1552 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1553 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1554 isend[3] = info->memory; isend[4] = info->mallocs; 1555 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1556 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1557 isend[3] += info->memory; isend[4] += info->mallocs; 1558 if (flag == MAT_LOCAL) { 1559 info->nz_used = isend[0]; 1560 info->nz_allocated = isend[1]; 1561 info->nz_unneeded = isend[2]; 1562 info->memory = isend[3]; 1563 info->mallocs = isend[4]; 1564 } else if (flag == MAT_GLOBAL_MAX) { 1565 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1566 info->nz_used = irecv[0]; 1567 info->nz_allocated = irecv[1]; 1568 info->nz_unneeded = irecv[2]; 1569 info->memory = irecv[3]; 1570 info->mallocs = irecv[4]; 1571 } else if (flag == MAT_GLOBAL_SUM) { 1572 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1573 info->nz_used = irecv[0]; 1574 info->nz_allocated = irecv[1]; 1575 info->nz_unneeded = irecv[2]; 1576 info->memory = irecv[3]; 1577 info->mallocs = irecv[4]; 1578 } else { 1579 SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1580 } 1581 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1582 info->fill_ratio_needed = 0; 1583 info->factor_mallocs = 0; 1584 PetscFunctionReturn(0); 1585 } 1586 1587 #undef __FUNCT__ 1588 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1589 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1590 { 1591 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1592 PetscErrorCode ierr; 1593 1594 PetscFunctionBegin; 1595 switch (op) { 1596 case MAT_NEW_NONZERO_LOCATIONS: 1597 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1598 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1599 case MAT_KEEP_NONZERO_PATTERN: 1600 case MAT_NEW_NONZERO_LOCATION_ERR: 1601 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1602 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1603 break; 1604 case MAT_ROW_ORIENTED: 1605 a->roworiented = flg; 1606 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1607 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1608 break; 1609 case MAT_NEW_DIAGONALS: 1610 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1611 break; 1612 case MAT_IGNORE_OFF_PROC_ENTRIES: 1613 a->donotstash = flg; 1614 break; 1615 case MAT_USE_HASH_TABLE: 1616 a->ht_flag = flg; 1617 break; 1618 case MAT_SYMMETRIC: 1619 case MAT_STRUCTURALLY_SYMMETRIC: 1620 case MAT_HERMITIAN: 1621 case MAT_SYMMETRY_ETERNAL: 1622 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1623 break; 1624 default: 1625 SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op); 1626 } 1627 PetscFunctionReturn(0); 1628 } 1629 1630 #undef __FUNCT__ 1631 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1632 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1633 { 1634 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1635 Mat_SeqBAIJ *Aloc; 1636 Mat B; 1637 PetscErrorCode ierr; 1638 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1639 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1640 MatScalar *a; 1641 1642 PetscFunctionBegin; 1643 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1644 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1645 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1646 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1647 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1648 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1649 } else { 1650 B = *matout; 1651 } 1652 1653 /* copy over the A part */ 1654 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1655 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1656 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1657 1658 for (i=0; i<mbs; i++) { 1659 rvals[0] = bs*(baij->rstartbs + i); 1660 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1661 for (j=ai[i]; j<ai[i+1]; j++) { 1662 col = (baij->cstartbs+aj[j])*bs; 1663 for (k=0; k<bs; k++) { 1664 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1665 col++; a += bs; 1666 } 1667 } 1668 } 1669 /* copy over the B part */ 1670 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1671 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1672 for (i=0; i<mbs; i++) { 1673 rvals[0] = bs*(baij->rstartbs + i); 1674 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1675 for (j=ai[i]; j<ai[i+1]; j++) { 1676 col = baij->garray[aj[j]]*bs; 1677 for (k=0; k<bs; k++) { 1678 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1679 col++; a += bs; 1680 } 1681 } 1682 } 1683 ierr = PetscFree(rvals);CHKERRQ(ierr); 1684 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1685 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1686 1687 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1688 *matout = B; 1689 } else { 1690 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1691 } 1692 PetscFunctionReturn(0); 1693 } 1694 1695 #undef __FUNCT__ 1696 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1697 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1698 { 1699 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1700 Mat a = baij->A,b = baij->B; 1701 PetscErrorCode ierr; 1702 PetscInt s1,s2,s3; 1703 1704 PetscFunctionBegin; 1705 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1706 if (rr) { 1707 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1708 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1709 /* Overlap communication with computation. */ 1710 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1711 } 1712 if (ll) { 1713 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1714 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1715 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1716 } 1717 /* scale the diagonal block */ 1718 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1719 1720 if (rr) { 1721 /* Do a scatter end and then right scale the off-diagonal block */ 1722 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1723 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1724 } 1725 1726 PetscFunctionReturn(0); 1727 } 1728 1729 #undef __FUNCT__ 1730 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1731 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1732 { 1733 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1734 PetscErrorCode ierr; 1735 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1736 PetscInt i,*owners = A->rmap->range; 1737 PetscInt *nprocs,j,idx,nsends,row; 1738 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1739 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1740 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1741 MPI_Comm comm = ((PetscObject)A)->comm; 1742 MPI_Request *send_waits,*recv_waits; 1743 MPI_Status recv_status,*send_status; 1744 const PetscScalar *xx; 1745 PetscScalar *bb; 1746 #if defined(PETSC_DEBUG) 1747 PetscBool found = PETSC_FALSE; 1748 #endif 1749 1750 PetscFunctionBegin; 1751 /* first count number of contributors to each processor */ 1752 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1753 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1754 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1755 j = 0; 1756 for (i=0; i<N; i++) { 1757 if (lastidx > (idx = rows[i])) j = 0; 1758 lastidx = idx; 1759 for (; j<size; j++) { 1760 if (idx >= owners[j] && idx < owners[j+1]) { 1761 nprocs[2*j]++; 1762 nprocs[2*j+1] = 1; 1763 owner[i] = j; 1764 #if defined(PETSC_DEBUG) 1765 found = PETSC_TRUE; 1766 #endif 1767 break; 1768 } 1769 } 1770 #if defined(PETSC_DEBUG) 1771 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1772 found = PETSC_FALSE; 1773 #endif 1774 } 1775 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1776 1777 if (A->nooffproczerorows) { 1778 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 1779 nrecvs = nsends; 1780 nmax = N; 1781 } else { 1782 /* inform other processors of number of messages and max length*/ 1783 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1784 } 1785 1786 /* post receives: */ 1787 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1788 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1789 for (i=0; i<nrecvs; i++) { 1790 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1791 } 1792 1793 /* do sends: 1794 1) starts[i] gives the starting index in svalues for stuff going to 1795 the ith processor 1796 */ 1797 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1798 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1799 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1800 starts[0] = 0; 1801 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1802 for (i=0; i<N; i++) { 1803 svalues[starts[owner[i]]++] = rows[i]; 1804 } 1805 1806 starts[0] = 0; 1807 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1808 count = 0; 1809 for (i=0; i<size; i++) { 1810 if (nprocs[2*i+1]) { 1811 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1812 } 1813 } 1814 ierr = PetscFree(starts);CHKERRQ(ierr); 1815 1816 base = owners[rank]; 1817 1818 /* wait on receives */ 1819 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1820 count = nrecvs; 1821 slen = 0; 1822 while (count) { 1823 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1824 /* unpack receives into our local space */ 1825 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1826 source[imdex] = recv_status.MPI_SOURCE; 1827 lens[imdex] = n; 1828 slen += n; 1829 count--; 1830 } 1831 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1832 1833 /* move the data into the send scatter */ 1834 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1835 count = 0; 1836 for (i=0; i<nrecvs; i++) { 1837 values = rvalues + i*nmax; 1838 for (j=0; j<lens[i]; j++) { 1839 lrows[count++] = values[j] - base; 1840 } 1841 } 1842 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1843 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1844 ierr = PetscFree(owner);CHKERRQ(ierr); 1845 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1846 1847 /* fix right hand side if needed */ 1848 if (x && b) { 1849 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1850 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1851 for (i=0; i<slen; i++) { 1852 bb[lrows[i]] = diag*xx[lrows[i]]; 1853 } 1854 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1855 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1856 } 1857 1858 /* actually zap the local rows */ 1859 /* 1860 Zero the required rows. If the "diagonal block" of the matrix 1861 is square and the user wishes to set the diagonal we use separate 1862 code so that MatSetValues() is not called for each diagonal allocating 1863 new memory, thus calling lots of mallocs and slowing things down. 1864 1865 */ 1866 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1867 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1868 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1869 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 1870 } else if (diag != 0.0) { 1871 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1872 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1873 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1874 for (i=0; i<slen; i++) { 1875 row = lrows[i] + rstart_bs; 1876 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1877 } 1878 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1879 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1880 } else { 1881 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1882 } 1883 1884 ierr = PetscFree(lrows);CHKERRQ(ierr); 1885 1886 /* wait on sends */ 1887 if (nsends) { 1888 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1889 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1890 ierr = PetscFree(send_status);CHKERRQ(ierr); 1891 } 1892 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1893 ierr = PetscFree(svalues);CHKERRQ(ierr); 1894 1895 PetscFunctionReturn(0); 1896 } 1897 1898 #undef __FUNCT__ 1899 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1900 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1901 { 1902 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1903 PetscErrorCode ierr; 1904 1905 PetscFunctionBegin; 1906 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1907 PetscFunctionReturn(0); 1908 } 1909 1910 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1911 1912 #undef __FUNCT__ 1913 #define __FUNCT__ "MatEqual_MPIBAIJ" 1914 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1915 { 1916 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1917 Mat a,b,c,d; 1918 PetscBool flg; 1919 PetscErrorCode ierr; 1920 1921 PetscFunctionBegin; 1922 a = matA->A; b = matA->B; 1923 c = matB->A; d = matB->B; 1924 1925 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1926 if (flg) { 1927 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1928 } 1929 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1930 PetscFunctionReturn(0); 1931 } 1932 1933 #undef __FUNCT__ 1934 #define __FUNCT__ "MatCopy_MPIBAIJ" 1935 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1936 { 1937 PetscErrorCode ierr; 1938 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1939 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1940 1941 PetscFunctionBegin; 1942 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1943 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1944 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1945 } else { 1946 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1947 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1948 } 1949 PetscFunctionReturn(0); 1950 } 1951 1952 #undef __FUNCT__ 1953 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1954 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1955 { 1956 PetscErrorCode ierr; 1957 1958 PetscFunctionBegin; 1959 ierr = MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1960 PetscFunctionReturn(0); 1961 } 1962 1963 #undef __FUNCT__ 1964 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1965 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1966 { 1967 PetscErrorCode ierr; 1968 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1969 PetscBLASInt bnz,one=1; 1970 Mat_SeqBAIJ *x,*y; 1971 1972 PetscFunctionBegin; 1973 if (str == SAME_NONZERO_PATTERN) { 1974 PetscScalar alpha = a; 1975 x = (Mat_SeqBAIJ *)xx->A->data; 1976 y = (Mat_SeqBAIJ *)yy->A->data; 1977 bnz = PetscBLASIntCast(x->nz); 1978 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1979 x = (Mat_SeqBAIJ *)xx->B->data; 1980 y = (Mat_SeqBAIJ *)yy->B->data; 1981 bnz = PetscBLASIntCast(x->nz); 1982 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1983 } else { 1984 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1985 } 1986 PetscFunctionReturn(0); 1987 } 1988 1989 #undef __FUNCT__ 1990 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ" 1991 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs) 1992 { 1993 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1994 PetscInt rbs,cbs; 1995 PetscErrorCode ierr; 1996 1997 PetscFunctionBegin; 1998 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1999 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 2000 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 2001 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 2002 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 2003 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 2004 PetscFunctionReturn(0); 2005 } 2006 2007 #undef __FUNCT__ 2008 #define __FUNCT__ "MatRealPart_MPIBAIJ" 2009 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 2010 { 2011 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2012 PetscErrorCode ierr; 2013 2014 PetscFunctionBegin; 2015 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2016 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2017 PetscFunctionReturn(0); 2018 } 2019 2020 #undef __FUNCT__ 2021 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2022 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2023 { 2024 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2025 PetscErrorCode ierr; 2026 2027 PetscFunctionBegin; 2028 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2029 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2030 PetscFunctionReturn(0); 2031 } 2032 2033 #undef __FUNCT__ 2034 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2035 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2036 { 2037 PetscErrorCode ierr; 2038 IS iscol_local; 2039 PetscInt csize; 2040 2041 PetscFunctionBegin; 2042 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2043 if (call == MAT_REUSE_MATRIX) { 2044 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2045 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2046 } else { 2047 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2048 } 2049 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2050 if (call == MAT_INITIAL_MATRIX) { 2051 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2052 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 2053 } 2054 PetscFunctionReturn(0); 2055 } 2056 2057 #undef __FUNCT__ 2058 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2059 /* 2060 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2061 in local and then by concatenating the local matrices the end result. 2062 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2063 */ 2064 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2065 { 2066 PetscErrorCode ierr; 2067 PetscMPIInt rank,size; 2068 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2069 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2070 Mat *local,M,Mreuse; 2071 MatScalar *vwork,*aa; 2072 MPI_Comm comm = ((PetscObject)mat)->comm; 2073 Mat_SeqBAIJ *aij; 2074 2075 2076 PetscFunctionBegin; 2077 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2078 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2079 2080 if (call == MAT_REUSE_MATRIX) { 2081 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2082 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2083 local = &Mreuse; 2084 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2085 } else { 2086 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2087 Mreuse = *local; 2088 ierr = PetscFree(local);CHKERRQ(ierr); 2089 } 2090 2091 /* 2092 m - number of local rows 2093 n - number of columns (same on all processors) 2094 rstart - first row in new global matrix generated 2095 */ 2096 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2097 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2098 m = m/bs; 2099 n = n/bs; 2100 2101 if (call == MAT_INITIAL_MATRIX) { 2102 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2103 ii = aij->i; 2104 jj = aij->j; 2105 2106 /* 2107 Determine the number of non-zeros in the diagonal and off-diagonal 2108 portions of the matrix in order to do correct preallocation 2109 */ 2110 2111 /* first get start and end of "diagonal" columns */ 2112 if (csize == PETSC_DECIDE) { 2113 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2114 if (mglobal == n*bs) { /* square matrix */ 2115 nlocal = m; 2116 } else { 2117 nlocal = n/size + ((n % size) > rank); 2118 } 2119 } else { 2120 nlocal = csize/bs; 2121 } 2122 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2123 rstart = rend - nlocal; 2124 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2125 2126 /* next, compute all the lengths */ 2127 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2128 olens = dlens + m; 2129 for (i=0; i<m; i++) { 2130 jend = ii[i+1] - ii[i]; 2131 olen = 0; 2132 dlen = 0; 2133 for (j=0; j<jend; j++) { 2134 if (*jj < rstart || *jj >= rend) olen++; 2135 else dlen++; 2136 jj++; 2137 } 2138 olens[i] = olen; 2139 dlens[i] = dlen; 2140 } 2141 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2142 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2143 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2144 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2145 ierr = PetscFree(dlens);CHKERRQ(ierr); 2146 } else { 2147 PetscInt ml,nl; 2148 2149 M = *newmat; 2150 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2151 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2152 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2153 /* 2154 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2155 rather than the slower MatSetValues(). 2156 */ 2157 M->was_assembled = PETSC_TRUE; 2158 M->assembled = PETSC_FALSE; 2159 } 2160 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2161 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2162 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2163 ii = aij->i; 2164 jj = aij->j; 2165 aa = aij->a; 2166 for (i=0; i<m; i++) { 2167 row = rstart/bs + i; 2168 nz = ii[i+1] - ii[i]; 2169 cwork = jj; jj += nz; 2170 vwork = aa; aa += nz; 2171 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2172 } 2173 2174 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2175 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2176 *newmat = M; 2177 2178 /* save submatrix used in processor for next request */ 2179 if (call == MAT_INITIAL_MATRIX) { 2180 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2181 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2182 } 2183 2184 PetscFunctionReturn(0); 2185 } 2186 2187 #undef __FUNCT__ 2188 #define __FUNCT__ "MatPermute_MPIBAIJ" 2189 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2190 { 2191 MPI_Comm comm,pcomm; 2192 PetscInt first,local_size,nrows; 2193 const PetscInt *rows; 2194 PetscMPIInt size; 2195 IS crowp,growp,irowp,lrowp,lcolp,icolp; 2196 PetscErrorCode ierr; 2197 2198 PetscFunctionBegin; 2199 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2200 /* make a collective version of 'rowp' */ 2201 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2202 if (pcomm==comm) { 2203 crowp = rowp; 2204 } else { 2205 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2206 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2207 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2208 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2209 } 2210 /* collect the global row permutation and invert it */ 2211 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2212 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2213 if (pcomm!=comm) { 2214 ierr = ISDestroy(crowp);CHKERRQ(ierr); 2215 } 2216 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2217 /* get the local target indices */ 2218 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2219 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 2220 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2221 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 2222 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2223 ierr = ISDestroy(irowp);CHKERRQ(ierr); 2224 /* the column permutation is so much easier; 2225 make a local version of 'colp' and invert it */ 2226 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2227 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2228 if (size==1) { 2229 lcolp = colp; 2230 } else { 2231 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 2232 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 2233 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 2234 } 2235 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2236 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 2237 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 2238 if (size>1) { 2239 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 2240 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 2241 } 2242 /* now we just get the submatrix */ 2243 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2244 /* clean up */ 2245 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 2246 ierr = ISDestroy(icolp);CHKERRQ(ierr); 2247 PetscFunctionReturn(0); 2248 } 2249 2250 #undef __FUNCT__ 2251 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2252 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2253 { 2254 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2255 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2256 2257 PetscFunctionBegin; 2258 if (nghosts) { *nghosts = B->nbs;} 2259 if (ghosts) {*ghosts = baij->garray;} 2260 PetscFunctionReturn(0); 2261 } 2262 2263 extern PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat); 2264 2265 #undef __FUNCT__ 2266 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2267 /* 2268 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2269 */ 2270 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2271 { 2272 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2273 PetscErrorCode ierr; 2274 PetscMPIInt size,*ncolsonproc,*disp,nn; 2275 PetscInt bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col; 2276 const PetscInt *is; 2277 PetscInt nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj; 2278 PetscInt *rowhit,M,cstart,cend,colb; 2279 PetscInt *columnsforrow,l; 2280 IS *isa; 2281 PetscBool done,flg; 2282 ISLocalToGlobalMapping map = mat->cbmapping; 2283 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2284 2285 PetscFunctionBegin; 2286 if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2287 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2288 2289 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2290 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2291 M = mat->rmap->n/bs; 2292 cstart = mat->cmap->rstart/bs; 2293 cend = mat->cmap->rend/bs; 2294 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2295 c->N = mat->cmap->N/bs; 2296 c->m = mat->rmap->n/bs; 2297 c->rstart = mat->rmap->rstart/bs; 2298 2299 c->ncolors = nis; 2300 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2301 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2302 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2303 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2304 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2305 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2306 2307 /* Allow access to data structures of local part of matrix */ 2308 if (!baij->colmap) { 2309 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2310 } 2311 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2312 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2313 2314 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2315 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2316 2317 for (i=0; i<nis; i++) { 2318 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2319 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2320 c->ncolumns[i] = n; 2321 if (n) { 2322 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2323 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2324 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2325 } else { 2326 c->columns[i] = 0; 2327 } 2328 2329 if (ctype == IS_COLORING_GLOBAL){ 2330 /* Determine the total (parallel) number of columns of this color */ 2331 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2332 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2333 2334 nn = PetscMPIIntCast(n); 2335 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2336 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2337 if (!nctot) { 2338 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2339 } 2340 2341 disp[0] = 0; 2342 for (j=1; j<size; j++) { 2343 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2344 } 2345 2346 /* Get complete list of columns for color on each processor */ 2347 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2348 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2349 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2350 } else if (ctype == IS_COLORING_GHOSTED){ 2351 /* Determine local number of columns of this color on this process, including ghost points */ 2352 nctot = n; 2353 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2354 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2355 } else { 2356 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2357 } 2358 2359 /* 2360 Mark all rows affect by these columns 2361 */ 2362 /* Temporary option to allow for debugging/testing */ 2363 flg = PETSC_FALSE; 2364 ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2365 if (!flg) {/*-----------------------------------------------------------------------------*/ 2366 /* crude, fast version */ 2367 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2368 /* loop over columns*/ 2369 for (j=0; j<nctot; j++) { 2370 if (ctype == IS_COLORING_GHOSTED) { 2371 col = ltog[cols[j]]; 2372 } else { 2373 col = cols[j]; 2374 } 2375 if (col >= cstart && col < cend) { 2376 /* column is in diagonal block of matrix */ 2377 rows = A_cj + A_ci[col-cstart]; 2378 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2379 } else { 2380 #if defined (PETSC_USE_CTABLE) 2381 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2382 colb --; 2383 #else 2384 colb = baij->colmap[col] - 1; 2385 #endif 2386 if (colb == -1) { 2387 m = 0; 2388 } else { 2389 colb = colb/bs; 2390 rows = B_cj + B_ci[colb]; 2391 m = B_ci[colb+1] - B_ci[colb]; 2392 } 2393 } 2394 /* loop over columns marking them in rowhit */ 2395 for (k=0; k<m; k++) { 2396 rowhit[*rows++] = col + 1; 2397 } 2398 } 2399 2400 /* count the number of hits */ 2401 nrows = 0; 2402 for (j=0; j<M; j++) { 2403 if (rowhit[j]) nrows++; 2404 } 2405 c->nrows[i] = nrows; 2406 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2407 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2408 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2409 nrows = 0; 2410 for (j=0; j<M; j++) { 2411 if (rowhit[j]) { 2412 c->rows[i][nrows] = j; 2413 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2414 nrows++; 2415 } 2416 } 2417 } else {/*-------------------------------------------------------------------------------*/ 2418 /* slow version, using rowhit as a linked list */ 2419 PetscInt currentcol,fm,mfm; 2420 rowhit[M] = M; 2421 nrows = 0; 2422 /* loop over columns*/ 2423 for (j=0; j<nctot; j++) { 2424 if (ctype == IS_COLORING_GHOSTED) { 2425 col = ltog[cols[j]]; 2426 } else { 2427 col = cols[j]; 2428 } 2429 if (col >= cstart && col < cend) { 2430 /* column is in diagonal block of matrix */ 2431 rows = A_cj + A_ci[col-cstart]; 2432 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2433 } else { 2434 #if defined (PETSC_USE_CTABLE) 2435 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2436 colb --; 2437 #else 2438 colb = baij->colmap[col] - 1; 2439 #endif 2440 if (colb == -1) { 2441 m = 0; 2442 } else { 2443 colb = colb/bs; 2444 rows = B_cj + B_ci[colb]; 2445 m = B_ci[colb+1] - B_ci[colb]; 2446 } 2447 } 2448 2449 /* loop over columns marking them in rowhit */ 2450 fm = M; /* fm points to first entry in linked list */ 2451 for (k=0; k<m; k++) { 2452 currentcol = *rows++; 2453 /* is it already in the list? */ 2454 do { 2455 mfm = fm; 2456 fm = rowhit[fm]; 2457 } while (fm < currentcol); 2458 /* not in list so add it */ 2459 if (fm != currentcol) { 2460 nrows++; 2461 columnsforrow[currentcol] = col; 2462 /* next three lines insert new entry into linked list */ 2463 rowhit[mfm] = currentcol; 2464 rowhit[currentcol] = fm; 2465 fm = currentcol; 2466 /* fm points to present position in list since we know the columns are sorted */ 2467 } else { 2468 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2469 } 2470 } 2471 } 2472 c->nrows[i] = nrows; 2473 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2474 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2475 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2476 /* now store the linked list of rows into c->rows[i] */ 2477 nrows = 0; 2478 fm = rowhit[M]; 2479 do { 2480 c->rows[i][nrows] = fm; 2481 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2482 fm = rowhit[fm]; 2483 } while (fm < M); 2484 } /* ---------------------------------------------------------------------------------------*/ 2485 ierr = PetscFree(cols);CHKERRQ(ierr); 2486 } 2487 2488 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2489 /* 2490 vscale will contain the "diagonal" on processor scalings followed by the off processor 2491 */ 2492 if (ctype == IS_COLORING_GLOBAL) { 2493 PetscInt *garray; 2494 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2495 for (i=0; i<baij->B->cmap->n/bs; i++) { 2496 for (j=0; j<bs; j++) { 2497 garray[i*bs+j] = bs*baij->garray[i]+j; 2498 } 2499 } 2500 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2501 ierr = PetscFree(garray);CHKERRQ(ierr); 2502 CHKMEMQ; 2503 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2504 for (k=0; k<c->ncolors; k++) { 2505 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2506 for (l=0; l<c->nrows[k]; l++) { 2507 col = c->columnsforrow[k][l]; 2508 if (col >= cstart && col < cend) { 2509 /* column is in diagonal block of matrix */ 2510 colb = col - cstart; 2511 } else { 2512 /* column is in "off-processor" part */ 2513 #if defined (PETSC_USE_CTABLE) 2514 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2515 colb --; 2516 #else 2517 colb = baij->colmap[col] - 1; 2518 #endif 2519 colb = colb/bs; 2520 colb += cend - cstart; 2521 } 2522 c->vscaleforrow[k][l] = colb; 2523 } 2524 } 2525 } else if (ctype == IS_COLORING_GHOSTED) { 2526 /* Get gtol mapping */ 2527 PetscInt N = mat->cmap->N, *gtol; 2528 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2529 for (i=0; i<N; i++) gtol[i] = -1; 2530 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2531 2532 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2533 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2534 for (k=0; k<c->ncolors; k++) { 2535 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2536 for (l=0; l<c->nrows[k]; l++) { 2537 col = c->columnsforrow[k][l]; /* global column index */ 2538 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2539 } 2540 } 2541 ierr = PetscFree(gtol);CHKERRQ(ierr); 2542 } 2543 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2544 2545 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2546 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2547 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2548 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2549 CHKMEMQ; 2550 PetscFunctionReturn(0); 2551 } 2552 2553 #undef __FUNCT__ 2554 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ" 2555 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat) 2556 { 2557 Mat B; 2558 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2559 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2560 Mat_SeqAIJ *b; 2561 PetscErrorCode ierr; 2562 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2563 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2564 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2565 2566 PetscFunctionBegin; 2567 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2568 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2569 2570 /* ---------------------------------------------------------------- 2571 Tell every processor the number of nonzeros per row 2572 */ 2573 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2574 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2575 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2576 } 2577 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2578 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2579 displs = recvcounts + size; 2580 for (i=0; i<size; i++) { 2581 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2582 displs[i] = A->rmap->range[i]/bs; 2583 } 2584 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2585 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2586 #else 2587 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2588 #endif 2589 /* --------------------------------------------------------------- 2590 Create the sequential matrix of the same type as the local block diagonal 2591 */ 2592 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2593 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2594 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2595 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2596 b = (Mat_SeqAIJ *)B->data; 2597 2598 /*-------------------------------------------------------------------- 2599 Copy my part of matrix column indices over 2600 */ 2601 sendcount = ad->nz + bd->nz; 2602 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2603 a_jsendbuf = ad->j; 2604 b_jsendbuf = bd->j; 2605 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2606 cnt = 0; 2607 for (i=0; i<n; i++) { 2608 2609 /* put in lower diagonal portion */ 2610 m = bd->i[i+1] - bd->i[i]; 2611 while (m > 0) { 2612 /* is it above diagonal (in bd (compressed) numbering) */ 2613 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2614 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2615 m--; 2616 } 2617 2618 /* put in diagonal portion */ 2619 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2620 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2621 } 2622 2623 /* put in upper diagonal portion */ 2624 while (m-- > 0) { 2625 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2626 } 2627 } 2628 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2629 2630 /*-------------------------------------------------------------------- 2631 Gather all column indices to all processors 2632 */ 2633 for (i=0; i<size; i++) { 2634 recvcounts[i] = 0; 2635 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2636 recvcounts[i] += lens[j]; 2637 } 2638 } 2639 displs[0] = 0; 2640 for (i=1; i<size; i++) { 2641 displs[i] = displs[i-1] + recvcounts[i-1]; 2642 } 2643 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2644 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2645 #else 2646 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2647 #endif 2648 /*-------------------------------------------------------------------- 2649 Assemble the matrix into useable form (note numerical values not yet set) 2650 */ 2651 /* set the b->ilen (length of each row) values */ 2652 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2653 /* set the b->i indices */ 2654 b->i[0] = 0; 2655 for (i=1; i<=A->rmap->N/bs; i++) { 2656 b->i[i] = b->i[i-1] + lens[i-1]; 2657 } 2658 ierr = PetscFree(lens);CHKERRQ(ierr); 2659 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2660 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2661 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2662 2663 if (A->symmetric){ 2664 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2665 } else if (A->hermitian) { 2666 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2667 } else if (A->structurally_symmetric) { 2668 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2669 } 2670 *newmat = B; 2671 PetscFunctionReturn(0); 2672 } 2673 2674 #undef __FUNCT__ 2675 #define __FUNCT__ "MatSOR_MPIBAIJ" 2676 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2677 { 2678 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2679 PetscErrorCode ierr; 2680 Vec bb1 = 0; 2681 2682 PetscFunctionBegin; 2683 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2684 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2685 } 2686 2687 if (flag == SOR_APPLY_UPPER) { 2688 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2689 PetscFunctionReturn(0); 2690 } 2691 2692 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2693 if (flag & SOR_ZERO_INITIAL_GUESS) { 2694 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2695 its--; 2696 } 2697 2698 while (its--) { 2699 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2700 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2701 2702 /* update rhs: bb1 = bb - B*x */ 2703 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2704 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2705 2706 /* local sweep */ 2707 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2708 } 2709 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 2710 if (flag & SOR_ZERO_INITIAL_GUESS) { 2711 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2712 its--; 2713 } 2714 while (its--) { 2715 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2716 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2717 2718 /* update rhs: bb1 = bb - B*x */ 2719 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2720 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2721 2722 /* local sweep */ 2723 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2724 } 2725 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 2726 if (flag & SOR_ZERO_INITIAL_GUESS) { 2727 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2728 its--; 2729 } 2730 while (its--) { 2731 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2732 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2733 2734 /* update rhs: bb1 = bb - B*x */ 2735 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2736 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2737 2738 /* local sweep */ 2739 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2740 } 2741 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2742 2743 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 2744 PetscFunctionReturn(0); 2745 } 2746 2747 extern PetscErrorCode MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2748 2749 2750 /* -------------------------------------------------------------------*/ 2751 static struct _MatOps MatOps_Values = { 2752 MatSetValues_MPIBAIJ, 2753 MatGetRow_MPIBAIJ, 2754 MatRestoreRow_MPIBAIJ, 2755 MatMult_MPIBAIJ, 2756 /* 4*/ MatMultAdd_MPIBAIJ, 2757 MatMultTranspose_MPIBAIJ, 2758 MatMultTransposeAdd_MPIBAIJ, 2759 0, 2760 0, 2761 0, 2762 /*10*/ 0, 2763 0, 2764 0, 2765 MatSOR_MPIBAIJ, 2766 MatTranspose_MPIBAIJ, 2767 /*15*/ MatGetInfo_MPIBAIJ, 2768 MatEqual_MPIBAIJ, 2769 MatGetDiagonal_MPIBAIJ, 2770 MatDiagonalScale_MPIBAIJ, 2771 MatNorm_MPIBAIJ, 2772 /*20*/ MatAssemblyBegin_MPIBAIJ, 2773 MatAssemblyEnd_MPIBAIJ, 2774 MatSetOption_MPIBAIJ, 2775 MatZeroEntries_MPIBAIJ, 2776 /*24*/ MatZeroRows_MPIBAIJ, 2777 0, 2778 0, 2779 0, 2780 0, 2781 /*29*/ MatSetUpPreallocation_MPIBAIJ, 2782 0, 2783 0, 2784 0, 2785 0, 2786 /*34*/ MatDuplicate_MPIBAIJ, 2787 0, 2788 0, 2789 0, 2790 0, 2791 /*39*/ MatAXPY_MPIBAIJ, 2792 MatGetSubMatrices_MPIBAIJ, 2793 MatIncreaseOverlap_MPIBAIJ, 2794 MatGetValues_MPIBAIJ, 2795 MatCopy_MPIBAIJ, 2796 /*44*/ 0, 2797 MatScale_MPIBAIJ, 2798 0, 2799 0, 2800 0, 2801 /*49*/ MatSetBlockSize_MPIBAIJ, 2802 0, 2803 0, 2804 0, 2805 0, 2806 /*54*/ MatFDColoringCreate_MPIBAIJ, 2807 0, 2808 MatSetUnfactored_MPIBAIJ, 2809 MatPermute_MPIBAIJ, 2810 MatSetValuesBlocked_MPIBAIJ, 2811 /*59*/ MatGetSubMatrix_MPIBAIJ, 2812 MatDestroy_MPIBAIJ, 2813 MatView_MPIBAIJ, 2814 0, 2815 0, 2816 /*64*/ 0, 2817 0, 2818 0, 2819 0, 2820 0, 2821 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2822 0, 2823 0, 2824 0, 2825 0, 2826 /*74*/ 0, 2827 MatFDColoringApply_BAIJ, 2828 0, 2829 0, 2830 0, 2831 /*79*/ 0, 2832 0, 2833 0, 2834 0, 2835 MatLoad_MPIBAIJ, 2836 /*84*/ 0, 2837 0, 2838 0, 2839 0, 2840 0, 2841 /*89*/ 0, 2842 0, 2843 0, 2844 0, 2845 0, 2846 /*94*/ 0, 2847 0, 2848 0, 2849 0, 2850 0, 2851 /*99*/ 0, 2852 0, 2853 0, 2854 0, 2855 0, 2856 /*104*/0, 2857 MatRealPart_MPIBAIJ, 2858 MatImaginaryPart_MPIBAIJ, 2859 0, 2860 0, 2861 /*109*/0, 2862 0, 2863 0, 2864 0, 2865 0, 2866 /*114*/MatGetSeqNonzerostructure_MPIBAIJ, 2867 0, 2868 MatGetGhosts_MPIBAIJ, 2869 0, 2870 0, 2871 /*119*/0, 2872 0, 2873 0, 2874 0 2875 }; 2876 2877 EXTERN_C_BEGIN 2878 #undef __FUNCT__ 2879 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2880 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscBool *iscopy,MatReuse reuse,Mat *a) 2881 { 2882 PetscFunctionBegin; 2883 *a = ((Mat_MPIBAIJ *)A->data)->A; 2884 *iscopy = PETSC_FALSE; 2885 PetscFunctionReturn(0); 2886 } 2887 EXTERN_C_END 2888 2889 EXTERN_C_BEGIN 2890 extern PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2891 EXTERN_C_END 2892 2893 EXTERN_C_BEGIN 2894 #undef __FUNCT__ 2895 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2896 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2897 { 2898 PetscInt m,rstart,cstart,cend; 2899 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2900 const PetscInt *JJ=0; 2901 PetscScalar *values=0; 2902 PetscErrorCode ierr; 2903 2904 PetscFunctionBegin; 2905 2906 if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2907 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2908 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2909 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2910 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2911 m = B->rmap->n/bs; 2912 rstart = B->rmap->rstart/bs; 2913 cstart = B->cmap->rstart/bs; 2914 cend = B->cmap->rend/bs; 2915 2916 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2917 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2918 for (i=0; i<m; i++) { 2919 nz = ii[i+1] - ii[i]; 2920 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2921 nz_max = PetscMax(nz_max,nz); 2922 JJ = jj + ii[i]; 2923 for (j=0; j<nz; j++) { 2924 if (*JJ >= cstart) break; 2925 JJ++; 2926 } 2927 d = 0; 2928 for (; j<nz; j++) { 2929 if (*JJ++ >= cend) break; 2930 d++; 2931 } 2932 d_nnz[i] = d; 2933 o_nnz[i] = nz - d; 2934 } 2935 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2936 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2937 2938 values = (PetscScalar*)V; 2939 if (!values) { 2940 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2941 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2942 } 2943 for (i=0; i<m; i++) { 2944 PetscInt row = i + rstart; 2945 PetscInt ncols = ii[i+1] - ii[i]; 2946 const PetscInt *icols = jj + ii[i]; 2947 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2948 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2949 } 2950 2951 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2952 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2953 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2954 2955 PetscFunctionReturn(0); 2956 } 2957 EXTERN_C_END 2958 2959 #undef __FUNCT__ 2960 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2961 /*@C 2962 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2963 (the default parallel PETSc format). 2964 2965 Collective on MPI_Comm 2966 2967 Input Parameters: 2968 + A - the matrix 2969 . bs - the block size 2970 . i - the indices into j for the start of each local row (starts with zero) 2971 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2972 - v - optional values in the matrix 2973 2974 Level: developer 2975 2976 .keywords: matrix, aij, compressed row, sparse, parallel 2977 2978 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2979 @*/ 2980 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2981 { 2982 PetscErrorCode ierr; 2983 2984 PetscFunctionBegin; 2985 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2986 PetscFunctionReturn(0); 2987 } 2988 2989 EXTERN_C_BEGIN 2990 #undef __FUNCT__ 2991 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2992 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2993 { 2994 Mat_MPIBAIJ *b; 2995 PetscErrorCode ierr; 2996 PetscInt i, newbs = PetscAbs(bs); 2997 2998 PetscFunctionBegin; 2999 if (bs < 0) { 3000 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 3001 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 3002 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3003 bs = PetscAbs(bs); 3004 } 3005 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 3006 bs = newbs; 3007 3008 3009 if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 3010 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3011 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3012 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3013 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3014 3015 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3016 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3017 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3018 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3019 3020 if (d_nnz) { 3021 for (i=0; i<B->rmap->n/bs; i++) { 3022 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 3023 } 3024 } 3025 if (o_nnz) { 3026 for (i=0; i<B->rmap->n/bs; i++) { 3027 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 3028 } 3029 } 3030 3031 b = (Mat_MPIBAIJ*)B->data; 3032 b->bs2 = bs*bs; 3033 b->mbs = B->rmap->n/bs; 3034 b->nbs = B->cmap->n/bs; 3035 b->Mbs = B->rmap->N/bs; 3036 b->Nbs = B->cmap->N/bs; 3037 3038 for (i=0; i<=b->size; i++) { 3039 b->rangebs[i] = B->rmap->range[i]/bs; 3040 } 3041 b->rstartbs = B->rmap->rstart/bs; 3042 b->rendbs = B->rmap->rend/bs; 3043 b->cstartbs = B->cmap->rstart/bs; 3044 b->cendbs = B->cmap->rend/bs; 3045 3046 if (!B->preallocated) { 3047 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3048 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3049 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 3050 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3051 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3052 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3053 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 3054 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3055 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 3056 } 3057 3058 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3059 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 3060 B->preallocated = PETSC_TRUE; 3061 PetscFunctionReturn(0); 3062 } 3063 EXTERN_C_END 3064 3065 EXTERN_C_BEGIN 3066 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 3067 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 3068 EXTERN_C_END 3069 3070 3071 EXTERN_C_BEGIN 3072 #undef __FUNCT__ 3073 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 3074 PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj) 3075 { 3076 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 3077 PetscErrorCode ierr; 3078 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 3079 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 3080 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 3081 3082 PetscFunctionBegin; 3083 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 3084 ii[0] = 0; 3085 CHKMEMQ; 3086 for (i=0; i<M; i++) { 3087 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 3088 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 3089 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 3090 /* remove one from count of matrix has diagonal */ 3091 for (j=id[i]; j<id[i+1]; j++) { 3092 if (jd[j] == i) {ii[i+1]--;break;} 3093 } 3094 CHKMEMQ; 3095 } 3096 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3097 cnt = 0; 3098 for (i=0; i<M; i++) { 3099 for (j=io[i]; j<io[i+1]; j++) { 3100 if (garray[jo[j]] > rstart) break; 3101 jj[cnt++] = garray[jo[j]]; 3102 CHKMEMQ; 3103 } 3104 for (k=id[i]; k<id[i+1]; k++) { 3105 if (jd[k] != i) { 3106 jj[cnt++] = rstart + jd[k]; 3107 CHKMEMQ; 3108 } 3109 } 3110 for (;j<io[i+1]; j++) { 3111 jj[cnt++] = garray[jo[j]]; 3112 CHKMEMQ; 3113 } 3114 } 3115 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 3116 PetscFunctionReturn(0); 3117 } 3118 EXTERN_C_END 3119 3120 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3121 EXTERN_C_BEGIN 3122 PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*); 3123 EXTERN_C_END 3124 3125 EXTERN_C_BEGIN 3126 #undef __FUNCT__ 3127 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 3128 PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat) 3129 { 3130 PetscErrorCode ierr; 3131 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3132 Mat B; 3133 Mat_MPIAIJ *b; 3134 3135 PetscFunctionBegin; 3136 if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled"); 3137 3138 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 3139 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3140 ierr = MatSetType(B,newtype);CHKERRQ(ierr); 3141 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 3142 b = (Mat_MPIAIJ*) B->data; 3143 3144 ierr = MatDestroy(b->A);CHKERRQ(ierr); 3145 ierr = MatDestroy(b->B);CHKERRQ(ierr); 3146 ierr = DisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 3147 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 3148 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 3149 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3150 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3151 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3152 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3153 if (reuse == MAT_REUSE_MATRIX) { 3154 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3155 } else { 3156 *newmat = B; 3157 } 3158 PetscFunctionReturn(0); 3159 } 3160 EXTERN_C_END 3161 3162 EXTERN_C_BEGIN 3163 #if defined(PETSC_HAVE_MUMPS) 3164 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3165 #endif 3166 EXTERN_C_END 3167 3168 /*MC 3169 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3170 3171 Options Database Keys: 3172 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3173 . -mat_block_size <bs> - set the blocksize used to store the matrix 3174 - -mat_use_hash_table <fact> 3175 3176 Level: beginner 3177 3178 .seealso: MatCreateMPIBAIJ 3179 M*/ 3180 3181 EXTERN_C_BEGIN 3182 #undef __FUNCT__ 3183 #define __FUNCT__ "MatCreate_MPIBAIJ" 3184 PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3185 { 3186 Mat_MPIBAIJ *b; 3187 PetscErrorCode ierr; 3188 PetscBool flg; 3189 3190 PetscFunctionBegin; 3191 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 3192 B->data = (void*)b; 3193 3194 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3195 B->assembled = PETSC_FALSE; 3196 3197 B->insertmode = NOT_SET_VALUES; 3198 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 3199 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 3200 3201 /* build local table of row and column ownerships */ 3202 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 3203 3204 /* build cache for off array entries formed */ 3205 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 3206 b->donotstash = PETSC_FALSE; 3207 b->colmap = PETSC_NULL; 3208 b->garray = PETSC_NULL; 3209 b->roworiented = PETSC_TRUE; 3210 3211 /* stuff used in block assembly */ 3212 b->barray = 0; 3213 3214 /* stuff used for matrix vector multiply */ 3215 b->lvec = 0; 3216 b->Mvctx = 0; 3217 3218 /* stuff for MatGetRow() */ 3219 b->rowindices = 0; 3220 b->rowvalues = 0; 3221 b->getrowactive = PETSC_FALSE; 3222 3223 /* hash table stuff */ 3224 b->ht = 0; 3225 b->hd = 0; 3226 b->ht_size = 0; 3227 b->ht_flag = PETSC_FALSE; 3228 b->ht_fact = 0; 3229 b->ht_total_ct = 0; 3230 b->ht_insert_ct = 0; 3231 3232 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3233 b->ijonly = PETSC_FALSE; 3234 3235 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3236 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3237 if (flg) { 3238 PetscReal fact = 1.39; 3239 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3240 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 3241 if (fact <= 1.0) fact = 1.39; 3242 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3243 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3244 } 3245 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3246 3247 #if defined(PETSC_HAVE_MUMPS) 3248 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3249 #endif 3250 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3251 "MatConvert_MPIBAIJ_MPIAdj", 3252 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3253 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C", 3254 "MatConvert_MPIBAIJ_MPIAIJ", 3255 MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3256 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3257 "MatStoreValues_MPIBAIJ", 3258 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3259 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3260 "MatRetrieveValues_MPIBAIJ", 3261 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3262 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3263 "MatGetDiagonalBlock_MPIBAIJ", 3264 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3265 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3266 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3267 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3268 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3269 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3270 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3271 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3272 "MatDiagonalScaleLocal_MPIBAIJ", 3273 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3274 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3275 "MatSetHashTableFactor_MPIBAIJ", 3276 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3277 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3278 PetscFunctionReturn(0); 3279 } 3280 EXTERN_C_END 3281 3282 /*MC 3283 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3284 3285 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3286 and MATMPIBAIJ otherwise. 3287 3288 Options Database Keys: 3289 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3290 3291 Level: beginner 3292 3293 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3294 M*/ 3295 3296 EXTERN_C_BEGIN 3297 #undef __FUNCT__ 3298 #define __FUNCT__ "MatCreate_BAIJ" 3299 PetscErrorCode MatCreate_BAIJ(Mat A) 3300 { 3301 PetscErrorCode ierr; 3302 PetscMPIInt size; 3303 3304 PetscFunctionBegin; 3305 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3306 if (size == 1) { 3307 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 3308 } else { 3309 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 3310 } 3311 PetscFunctionReturn(0); 3312 } 3313 EXTERN_C_END 3314 3315 #undef __FUNCT__ 3316 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3317 /*@C 3318 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3319 (block compressed row). For good matrix assembly performance 3320 the user should preallocate the matrix storage by setting the parameters 3321 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3322 performance can be increased by more than a factor of 50. 3323 3324 Collective on Mat 3325 3326 Input Parameters: 3327 + A - the matrix 3328 . bs - size of blockk 3329 . d_nz - number of block nonzeros per block row in diagonal portion of local 3330 submatrix (same for all local rows) 3331 . d_nnz - array containing the number of block nonzeros in the various block rows 3332 of the in diagonal portion of the local (possibly different for each block 3333 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3334 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3335 submatrix (same for all local rows). 3336 - o_nnz - array containing the number of nonzeros in the various block rows of the 3337 off-diagonal portion of the local submatrix (possibly different for 3338 each block row) or PETSC_NULL. 3339 3340 If the *_nnz parameter is given then the *_nz parameter is ignored 3341 3342 Options Database Keys: 3343 + -mat_block_size - size of the blocks to use 3344 - -mat_use_hash_table <fact> 3345 3346 Notes: 3347 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3348 than it must be used on all processors that share the object for that argument. 3349 3350 Storage Information: 3351 For a square global matrix we define each processor's diagonal portion 3352 to be its local rows and the corresponding columns (a square submatrix); 3353 each processor's off-diagonal portion encompasses the remainder of the 3354 local matrix (a rectangular submatrix). 3355 3356 The user can specify preallocated storage for the diagonal part of 3357 the local submatrix with either d_nz or d_nnz (not both). Set 3358 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3359 memory allocation. Likewise, specify preallocated storage for the 3360 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3361 3362 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3363 the figure below we depict these three local rows and all columns (0-11). 3364 3365 .vb 3366 0 1 2 3 4 5 6 7 8 9 10 11 3367 ------------------- 3368 row 3 | o o o d d d o o o o o o 3369 row 4 | o o o d d d o o o o o o 3370 row 5 | o o o d d d o o o o o o 3371 ------------------- 3372 .ve 3373 3374 Thus, any entries in the d locations are stored in the d (diagonal) 3375 submatrix, and any entries in the o locations are stored in the 3376 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3377 stored simply in the MATSEQBAIJ format for compressed row storage. 3378 3379 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3380 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3381 In general, for PDE problems in which most nonzeros are near the diagonal, 3382 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3383 or you will get TERRIBLE performance; see the users' manual chapter on 3384 matrices. 3385 3386 You can call MatGetInfo() to get information on how effective the preallocation was; 3387 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3388 You can also run with the option -info and look for messages with the string 3389 malloc in them to see if additional memory allocation was needed. 3390 3391 Level: intermediate 3392 3393 .keywords: matrix, block, aij, compressed row, sparse, parallel 3394 3395 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 3396 @*/ 3397 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3398 { 3399 PetscErrorCode ierr; 3400 3401 PetscFunctionBegin; 3402 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3403 PetscFunctionReturn(0); 3404 } 3405 3406 #undef __FUNCT__ 3407 #define __FUNCT__ "MatCreateMPIBAIJ" 3408 /*@C 3409 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 3410 (block compressed row). For good matrix assembly performance 3411 the user should preallocate the matrix storage by setting the parameters 3412 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3413 performance can be increased by more than a factor of 50. 3414 3415 Collective on MPI_Comm 3416 3417 Input Parameters: 3418 + comm - MPI communicator 3419 . bs - size of blockk 3420 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3421 This value should be the same as the local size used in creating the 3422 y vector for the matrix-vector product y = Ax. 3423 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3424 This value should be the same as the local size used in creating the 3425 x vector for the matrix-vector product y = Ax. 3426 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3427 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3428 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3429 submatrix (same for all local rows) 3430 . d_nnz - array containing the number of nonzero blocks in the various block rows 3431 of the in diagonal portion of the local (possibly different for each block 3432 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3433 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3434 submatrix (same for all local rows). 3435 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3436 off-diagonal portion of the local submatrix (possibly different for 3437 each block row) or PETSC_NULL. 3438 3439 Output Parameter: 3440 . A - the matrix 3441 3442 Options Database Keys: 3443 + -mat_block_size - size of the blocks to use 3444 - -mat_use_hash_table <fact> 3445 3446 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3447 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3448 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3449 3450 Notes: 3451 If the *_nnz parameter is given then the *_nz parameter is ignored 3452 3453 A nonzero block is any block that as 1 or more nonzeros in it 3454 3455 The user MUST specify either the local or global matrix dimensions 3456 (possibly both). 3457 3458 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3459 than it must be used on all processors that share the object for that argument. 3460 3461 Storage Information: 3462 For a square global matrix we define each processor's diagonal portion 3463 to be its local rows and the corresponding columns (a square submatrix); 3464 each processor's off-diagonal portion encompasses the remainder of the 3465 local matrix (a rectangular submatrix). 3466 3467 The user can specify preallocated storage for the diagonal part of 3468 the local submatrix with either d_nz or d_nnz (not both). Set 3469 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3470 memory allocation. Likewise, specify preallocated storage for the 3471 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3472 3473 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3474 the figure below we depict these three local rows and all columns (0-11). 3475 3476 .vb 3477 0 1 2 3 4 5 6 7 8 9 10 11 3478 ------------------- 3479 row 3 | o o o d d d o o o o o o 3480 row 4 | o o o d d d o o o o o o 3481 row 5 | o o o d d d o o o o o o 3482 ------------------- 3483 .ve 3484 3485 Thus, any entries in the d locations are stored in the d (diagonal) 3486 submatrix, and any entries in the o locations are stored in the 3487 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3488 stored simply in the MATSEQBAIJ format for compressed row storage. 3489 3490 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3491 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3492 In general, for PDE problems in which most nonzeros are near the diagonal, 3493 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3494 or you will get TERRIBLE performance; see the users' manual chapter on 3495 matrices. 3496 3497 Level: intermediate 3498 3499 .keywords: matrix, block, aij, compressed row, sparse, parallel 3500 3501 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3502 @*/ 3503 PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3504 { 3505 PetscErrorCode ierr; 3506 PetscMPIInt size; 3507 3508 PetscFunctionBegin; 3509 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3510 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3511 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3512 if (size > 1) { 3513 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3514 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3515 } else { 3516 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3517 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3518 } 3519 PetscFunctionReturn(0); 3520 } 3521 3522 #undef __FUNCT__ 3523 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3524 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3525 { 3526 Mat mat; 3527 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3528 PetscErrorCode ierr; 3529 PetscInt len=0; 3530 3531 PetscFunctionBegin; 3532 *newmat = 0; 3533 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3534 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3535 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3536 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3537 3538 mat->factortype = matin->factortype; 3539 mat->preallocated = PETSC_TRUE; 3540 mat->assembled = PETSC_TRUE; 3541 mat->insertmode = NOT_SET_VALUES; 3542 3543 a = (Mat_MPIBAIJ*)mat->data; 3544 mat->rmap->bs = matin->rmap->bs; 3545 a->bs2 = oldmat->bs2; 3546 a->mbs = oldmat->mbs; 3547 a->nbs = oldmat->nbs; 3548 a->Mbs = oldmat->Mbs; 3549 a->Nbs = oldmat->Nbs; 3550 3551 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3552 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3553 3554 a->size = oldmat->size; 3555 a->rank = oldmat->rank; 3556 a->donotstash = oldmat->donotstash; 3557 a->roworiented = oldmat->roworiented; 3558 a->rowindices = 0; 3559 a->rowvalues = 0; 3560 a->getrowactive = PETSC_FALSE; 3561 a->barray = 0; 3562 a->rstartbs = oldmat->rstartbs; 3563 a->rendbs = oldmat->rendbs; 3564 a->cstartbs = oldmat->cstartbs; 3565 a->cendbs = oldmat->cendbs; 3566 3567 /* hash table stuff */ 3568 a->ht = 0; 3569 a->hd = 0; 3570 a->ht_size = 0; 3571 a->ht_flag = oldmat->ht_flag; 3572 a->ht_fact = oldmat->ht_fact; 3573 a->ht_total_ct = 0; 3574 a->ht_insert_ct = 0; 3575 3576 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3577 if (oldmat->colmap) { 3578 #if defined (PETSC_USE_CTABLE) 3579 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3580 #else 3581 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3582 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3583 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3584 #endif 3585 } else a->colmap = 0; 3586 3587 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3588 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3589 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3590 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3591 } else a->garray = 0; 3592 3593 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3594 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3595 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3596 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3597 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3598 3599 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3600 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3601 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3602 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3603 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3604 *newmat = mat; 3605 3606 PetscFunctionReturn(0); 3607 } 3608 3609 #undef __FUNCT__ 3610 #define __FUNCT__ "MatLoad_MPIBAIJ" 3611 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3612 { 3613 PetscErrorCode ierr; 3614 int fd; 3615 PetscInt i,nz,j,rstart,rend; 3616 PetscScalar *vals,*buf; 3617 MPI_Comm comm = ((PetscObject)viewer)->comm; 3618 MPI_Status status; 3619 PetscMPIInt rank,size,maxnz; 3620 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3621 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3622 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3623 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3624 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3625 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3626 3627 PetscFunctionBegin; 3628 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3629 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3630 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3631 3632 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3633 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3634 if (!rank) { 3635 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3636 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3637 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3638 } 3639 3640 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3641 3642 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3643 M = header[1]; N = header[2]; 3644 3645 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3646 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3647 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3648 3649 /* If global sizes are set, check if they are consistent with that given in the file */ 3650 if (sizesset) { 3651 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3652 } 3653 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3654 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3655 3656 if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices"); 3657 3658 /* 3659 This code adds extra rows to make sure the number of rows is 3660 divisible by the blocksize 3661 */ 3662 Mbs = M/bs; 3663 extra_rows = bs - M + bs*Mbs; 3664 if (extra_rows == bs) extra_rows = 0; 3665 else Mbs++; 3666 if (extra_rows && !rank) { 3667 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3668 } 3669 3670 /* determine ownership of all rows */ 3671 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3672 mbs = Mbs/size + ((Mbs % size) > rank); 3673 m = mbs*bs; 3674 } else { /* User set */ 3675 m = newmat->rmap->n; 3676 mbs = m/bs; 3677 } 3678 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3679 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3680 3681 /* process 0 needs enough room for process with most rows */ 3682 if (!rank) { 3683 mmax = rowners[1]; 3684 for (i=2; i<size; i++) { 3685 mmax = PetscMax(mmax,rowners[i]); 3686 } 3687 mmax*=bs; 3688 } else mmax = m; 3689 3690 rowners[0] = 0; 3691 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3692 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3693 rstart = rowners[rank]; 3694 rend = rowners[rank+1]; 3695 3696 /* distribute row lengths to all processors */ 3697 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3698 if (!rank) { 3699 mend = m; 3700 if (size == 1) mend = mend - extra_rows; 3701 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3702 for (j=mend; j<m; j++) locrowlens[j] = 1; 3703 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3704 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3705 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3706 for (j=0; j<m; j++) { 3707 procsnz[0] += locrowlens[j]; 3708 } 3709 for (i=1; i<size; i++) { 3710 mend = browners[i+1] - browners[i]; 3711 if (i == size-1) mend = mend - extra_rows; 3712 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3713 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3714 /* calculate the number of nonzeros on each processor */ 3715 for (j=0; j<browners[i+1]-browners[i]; j++) { 3716 procsnz[i] += rowlengths[j]; 3717 } 3718 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3719 } 3720 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3721 } else { 3722 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3723 } 3724 3725 if (!rank) { 3726 /* determine max buffer needed and allocate it */ 3727 maxnz = procsnz[0]; 3728 for (i=1; i<size; i++) { 3729 maxnz = PetscMax(maxnz,procsnz[i]); 3730 } 3731 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3732 3733 /* read in my part of the matrix column indices */ 3734 nz = procsnz[0]; 3735 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3736 mycols = ibuf; 3737 if (size == 1) nz -= extra_rows; 3738 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3739 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3740 3741 /* read in every ones (except the last) and ship off */ 3742 for (i=1; i<size-1; i++) { 3743 nz = procsnz[i]; 3744 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3745 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3746 } 3747 /* read in the stuff for the last proc */ 3748 if (size != 1) { 3749 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3750 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3751 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3752 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3753 } 3754 ierr = PetscFree(cols);CHKERRQ(ierr); 3755 } else { 3756 /* determine buffer space needed for message */ 3757 nz = 0; 3758 for (i=0; i<m; i++) { 3759 nz += locrowlens[i]; 3760 } 3761 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3762 mycols = ibuf; 3763 /* receive message of column indices*/ 3764 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3765 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3766 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3767 } 3768 3769 /* loop over local rows, determining number of off diagonal entries */ 3770 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3771 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3772 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3773 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3774 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3775 rowcount = 0; nzcount = 0; 3776 for (i=0; i<mbs; i++) { 3777 dcount = 0; 3778 odcount = 0; 3779 for (j=0; j<bs; j++) { 3780 kmax = locrowlens[rowcount]; 3781 for (k=0; k<kmax; k++) { 3782 tmp = mycols[nzcount++]/bs; 3783 if (!mask[tmp]) { 3784 mask[tmp] = 1; 3785 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3786 else masked1[dcount++] = tmp; 3787 } 3788 } 3789 rowcount++; 3790 } 3791 3792 dlens[i] = dcount; 3793 odlens[i] = odcount; 3794 3795 /* zero out the mask elements we set */ 3796 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3797 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3798 } 3799 3800 3801 if (!sizesset) { 3802 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3803 } 3804 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3805 3806 if (!rank) { 3807 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3808 /* read in my part of the matrix numerical values */ 3809 nz = procsnz[0]; 3810 vals = buf; 3811 mycols = ibuf; 3812 if (size == 1) nz -= extra_rows; 3813 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3814 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3815 3816 /* insert into matrix */ 3817 jj = rstart*bs; 3818 for (i=0; i<m; i++) { 3819 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3820 mycols += locrowlens[i]; 3821 vals += locrowlens[i]; 3822 jj++; 3823 } 3824 /* read in other processors (except the last one) and ship out */ 3825 for (i=1; i<size-1; i++) { 3826 nz = procsnz[i]; 3827 vals = buf; 3828 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3829 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3830 } 3831 /* the last proc */ 3832 if (size != 1){ 3833 nz = procsnz[i] - extra_rows; 3834 vals = buf; 3835 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3836 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3837 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3838 } 3839 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3840 } else { 3841 /* receive numeric values */ 3842 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3843 3844 /* receive message of values*/ 3845 vals = buf; 3846 mycols = ibuf; 3847 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 3848 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3849 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3850 3851 /* insert into matrix */ 3852 jj = rstart*bs; 3853 for (i=0; i<m; i++) { 3854 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3855 mycols += locrowlens[i]; 3856 vals += locrowlens[i]; 3857 jj++; 3858 } 3859 } 3860 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3861 ierr = PetscFree(buf);CHKERRQ(ierr); 3862 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3863 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3864 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3865 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3866 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3867 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3868 3869 PetscFunctionReturn(0); 3870 } 3871 3872 #undef __FUNCT__ 3873 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3874 /*@ 3875 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3876 3877 Input Parameters: 3878 . mat - the matrix 3879 . fact - factor 3880 3881 Not Collective, each process can use a different factor 3882 3883 Level: advanced 3884 3885 Notes: 3886 This can also be set by the command line option: -mat_use_hash_table <fact> 3887 3888 .keywords: matrix, hashtable, factor, HT 3889 3890 .seealso: MatSetOption() 3891 @*/ 3892 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3893 { 3894 PetscErrorCode ierr; 3895 3896 PetscFunctionBegin; 3897 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3898 PetscFunctionReturn(0); 3899 } 3900 3901 EXTERN_C_BEGIN 3902 #undef __FUNCT__ 3903 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3904 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3905 { 3906 Mat_MPIBAIJ *baij; 3907 3908 PetscFunctionBegin; 3909 baij = (Mat_MPIBAIJ*)mat->data; 3910 baij->ht_fact = fact; 3911 PetscFunctionReturn(0); 3912 } 3913 EXTERN_C_END 3914 3915 #undef __FUNCT__ 3916 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3917 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3918 { 3919 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3920 PetscFunctionBegin; 3921 *Ad = a->A; 3922 *Ao = a->B; 3923 *colmap = a->garray; 3924 PetscFunctionReturn(0); 3925 } 3926 3927 /* 3928 Special version for direct calls from Fortran (to eliminate two function call overheads 3929 */ 3930 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3931 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3932 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3933 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3934 #endif 3935 3936 #undef __FUNCT__ 3937 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3938 /*@C 3939 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3940 3941 Collective on Mat 3942 3943 Input Parameters: 3944 + mat - the matrix 3945 . min - number of input rows 3946 . im - input rows 3947 . nin - number of input columns 3948 . in - input columns 3949 . v - numerical values input 3950 - addvin - INSERT_VALUES or ADD_VALUES 3951 3952 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3953 3954 Level: advanced 3955 3956 .seealso: MatSetValuesBlocked() 3957 @*/ 3958 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3959 { 3960 /* convert input arguments to C version */ 3961 Mat mat = *matin; 3962 PetscInt m = *min, n = *nin; 3963 InsertMode addv = *addvin; 3964 3965 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3966 const MatScalar *value; 3967 MatScalar *barray=baij->barray; 3968 PetscBool roworiented = baij->roworiented; 3969 PetscErrorCode ierr; 3970 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3971 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3972 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3973 3974 PetscFunctionBegin; 3975 /* tasks normally handled by MatSetValuesBlocked() */ 3976 if (mat->insertmode == NOT_SET_VALUES) { 3977 mat->insertmode = addv; 3978 } 3979 #if defined(PETSC_USE_DEBUG) 3980 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3981 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3982 #endif 3983 if (mat->assembled) { 3984 mat->was_assembled = PETSC_TRUE; 3985 mat->assembled = PETSC_FALSE; 3986 } 3987 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3988 3989 3990 if(!barray) { 3991 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3992 baij->barray = barray; 3993 } 3994 3995 if (roworiented) { 3996 stepval = (n-1)*bs; 3997 } else { 3998 stepval = (m-1)*bs; 3999 } 4000 for (i=0; i<m; i++) { 4001 if (im[i] < 0) continue; 4002 #if defined(PETSC_USE_DEBUG) 4003 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 4004 #endif 4005 if (im[i] >= rstart && im[i] < rend) { 4006 row = im[i] - rstart; 4007 for (j=0; j<n; j++) { 4008 /* If NumCol = 1 then a copy is not required */ 4009 if ((roworiented) && (n == 1)) { 4010 barray = (MatScalar*)v + i*bs2; 4011 } else if((!roworiented) && (m == 1)) { 4012 barray = (MatScalar*)v + j*bs2; 4013 } else { /* Here a copy is required */ 4014 if (roworiented) { 4015 value = v + i*(stepval+bs)*bs + j*bs; 4016 } else { 4017 value = v + j*(stepval+bs)*bs + i*bs; 4018 } 4019 for (ii=0; ii<bs; ii++,value+=stepval) { 4020 for (jj=0; jj<bs; jj++) { 4021 *barray++ = *value++; 4022 } 4023 } 4024 barray -=bs2; 4025 } 4026 4027 if (in[j] >= cstart && in[j] < cend){ 4028 col = in[j] - cstart; 4029 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4030 } 4031 else if (in[j] < 0) continue; 4032 #if defined(PETSC_USE_DEBUG) 4033 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 4034 #endif 4035 else { 4036 if (mat->was_assembled) { 4037 if (!baij->colmap) { 4038 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 4039 } 4040 4041 #if defined(PETSC_USE_DEBUG) 4042 #if defined (PETSC_USE_CTABLE) 4043 { PetscInt data; 4044 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 4045 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4046 } 4047 #else 4048 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4049 #endif 4050 #endif 4051 #if defined (PETSC_USE_CTABLE) 4052 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4053 col = (col - 1)/bs; 4054 #else 4055 col = (baij->colmap[in[j]] - 1)/bs; 4056 #endif 4057 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 4058 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 4059 col = in[j]; 4060 } 4061 } 4062 else col = in[j]; 4063 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4064 } 4065 } 4066 } else { 4067 if (!baij->donotstash) { 4068 if (roworiented) { 4069 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4070 } else { 4071 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4072 } 4073 } 4074 } 4075 } 4076 4077 /* task normally handled by MatSetValuesBlocked() */ 4078 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4079 PetscFunctionReturn(0); 4080 } 4081 4082 #undef __FUNCT__ 4083 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 4084 /*@ 4085 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 4086 CSR format the local rows. 4087 4088 Collective on MPI_Comm 4089 4090 Input Parameters: 4091 + comm - MPI communicator 4092 . bs - the block size, only a block size of 1 is supported 4093 . m - number of local rows (Cannot be PETSC_DECIDE) 4094 . n - This value should be the same as the local size used in creating the 4095 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4096 calculated if N is given) For square matrices n is almost always m. 4097 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4098 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4099 . i - row indices 4100 . j - column indices 4101 - a - matrix values 4102 4103 Output Parameter: 4104 . mat - the matrix 4105 4106 Level: intermediate 4107 4108 Notes: 4109 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4110 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4111 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4112 4113 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4114 4115 .keywords: matrix, aij, compressed row, sparse, parallel 4116 4117 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4118 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 4119 @*/ 4120 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4121 { 4122 PetscErrorCode ierr; 4123 4124 4125 PetscFunctionBegin; 4126 if (i[0]) { 4127 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4128 } 4129 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4130 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4131 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4132 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 4133 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 4134 PetscFunctionReturn(0); 4135 } 4136