1 2 #include "../src/mat/impls/baij/mpi/mpibaij.h" /*I "petscmat.h" I*/ 3 #include "petscblaslapack.h" 4 5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 extern PetscErrorCode DisAssemble_MPIBAIJ(Mat); 7 extern PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt); 8 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]); 9 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 10 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 11 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 12 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 14 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 15 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 18 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 19 { 20 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 21 PetscErrorCode ierr; 22 PetscInt i,*idxb = 0; 23 PetscScalar *va,*vb; 24 Vec vtmp; 25 26 PetscFunctionBegin; 27 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 28 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 29 if (idx) { 30 for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;} 31 } 32 33 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 34 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 35 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 36 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 37 38 for (i=0; i<A->rmap->n; i++){ 39 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);} 40 } 41 42 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 43 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 44 ierr = PetscFree(idxb);CHKERRQ(ierr); 45 ierr = VecDestroy(vtmp);CHKERRQ(ierr); 46 PetscFunctionReturn(0); 47 } 48 49 EXTERN_C_BEGIN 50 #undef __FUNCT__ 51 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 52 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 53 { 54 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 55 PetscErrorCode ierr; 56 57 PetscFunctionBegin; 58 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 59 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 60 PetscFunctionReturn(0); 61 } 62 EXTERN_C_END 63 64 EXTERN_C_BEGIN 65 #undef __FUNCT__ 66 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 67 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 68 { 69 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 70 PetscErrorCode ierr; 71 72 PetscFunctionBegin; 73 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 74 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 75 PetscFunctionReturn(0); 76 } 77 EXTERN_C_END 78 79 /* 80 Local utility routine that creates a mapping from the global column 81 number to the local number in the off-diagonal part of the local 82 storage of the matrix. This is done in a non scalable way since the 83 length of colmap equals the global matrix length. 84 */ 85 #undef __FUNCT__ 86 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private" 87 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat) 88 { 89 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 90 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 91 PetscErrorCode ierr; 92 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 93 94 PetscFunctionBegin; 95 #if defined (PETSC_USE_CTABLE) 96 ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr); 97 for (i=0; i<nbs; i++){ 98 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 99 } 100 #else 101 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 102 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 103 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 105 #endif 106 PetscFunctionReturn(0); 107 } 108 109 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 110 { \ 111 \ 112 brow = row/bs; \ 113 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 114 rmax = aimax[brow]; nrow = ailen[brow]; \ 115 bcol = col/bs; \ 116 ridx = row % bs; cidx = col % bs; \ 117 low = 0; high = nrow; \ 118 while (high-low > 3) { \ 119 t = (low+high)/2; \ 120 if (rp[t] > bcol) high = t; \ 121 else low = t; \ 122 } \ 123 for (_i=low; _i<high; _i++) { \ 124 if (rp[_i] > bcol) break; \ 125 if (rp[_i] == bcol) { \ 126 bap = ap + bs2*_i + bs*cidx + ridx; \ 127 if (addv == ADD_VALUES) *bap += value; \ 128 else *bap = value; \ 129 goto a_noinsert; \ 130 } \ 131 } \ 132 if (a->nonew == 1) goto a_noinsert; \ 133 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 134 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 135 N = nrow++ - 1; \ 136 /* shift up all the later entries in this row */ \ 137 for (ii=N; ii>=_i; ii--) { \ 138 rp[ii+1] = rp[ii]; \ 139 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 140 } \ 141 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 142 rp[_i] = bcol; \ 143 ap[bs2*_i + bs*cidx + ridx] = value; \ 144 a_noinsert:; \ 145 ailen[brow] = nrow; \ 146 } 147 148 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 149 { \ 150 brow = row/bs; \ 151 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 152 rmax = bimax[brow]; nrow = bilen[brow]; \ 153 bcol = col/bs; \ 154 ridx = row % bs; cidx = col % bs; \ 155 low = 0; high = nrow; \ 156 while (high-low > 3) { \ 157 t = (low+high)/2; \ 158 if (rp[t] > bcol) high = t; \ 159 else low = t; \ 160 } \ 161 for (_i=low; _i<high; _i++) { \ 162 if (rp[_i] > bcol) break; \ 163 if (rp[_i] == bcol) { \ 164 bap = ap + bs2*_i + bs*cidx + ridx; \ 165 if (addv == ADD_VALUES) *bap += value; \ 166 else *bap = value; \ 167 goto b_noinsert; \ 168 } \ 169 } \ 170 if (b->nonew == 1) goto b_noinsert; \ 171 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 172 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 173 CHKMEMQ;\ 174 N = nrow++ - 1; \ 175 /* shift up all the later entries in this row */ \ 176 for (ii=N; ii>=_i; ii--) { \ 177 rp[ii+1] = rp[ii]; \ 178 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 179 } \ 180 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 181 rp[_i] = bcol; \ 182 ap[bs2*_i + bs*cidx + ridx] = value; \ 183 b_noinsert:; \ 184 bilen[brow] = nrow; \ 185 } 186 187 #undef __FUNCT__ 188 #define __FUNCT__ "MatSetValues_MPIBAIJ" 189 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 190 { 191 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 192 MatScalar value; 193 PetscBool roworiented = baij->roworiented; 194 PetscErrorCode ierr; 195 PetscInt i,j,row,col; 196 PetscInt rstart_orig=mat->rmap->rstart; 197 PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart; 198 PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs; 199 200 /* Some Variables required in the macro */ 201 Mat A = baij->A; 202 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 203 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 204 MatScalar *aa=a->a; 205 206 Mat B = baij->B; 207 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 208 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 209 MatScalar *ba=b->a; 210 211 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 212 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 213 MatScalar *ap,*bap; 214 215 PetscFunctionBegin; 216 if (v) PetscValidScalarPointer(v,6); 217 for (i=0; i<m; i++) { 218 if (im[i] < 0) continue; 219 #if defined(PETSC_USE_DEBUG) 220 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 221 #endif 222 if (im[i] >= rstart_orig && im[i] < rend_orig) { 223 row = im[i] - rstart_orig; 224 for (j=0; j<n; j++) { 225 if (in[j] >= cstart_orig && in[j] < cend_orig){ 226 col = in[j] - cstart_orig; 227 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 228 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 229 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 230 } else if (in[j] < 0) continue; 231 #if defined(PETSC_USE_DEBUG) 232 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 233 #endif 234 else { 235 if (mat->was_assembled) { 236 if (!baij->colmap) { 237 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 238 } 239 #if defined (PETSC_USE_CTABLE) 240 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 241 col = col - 1; 242 #else 243 col = baij->colmap[in[j]/bs] - 1; 244 #endif 245 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 246 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 247 col = in[j]; 248 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 249 B = baij->B; 250 b = (Mat_SeqBAIJ*)(B)->data; 251 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 252 ba=b->a; 253 } else col += in[j]%bs; 254 } else col = in[j]; 255 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 256 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 257 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 258 } 259 } 260 } else { 261 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 262 if (!baij->donotstash) { 263 if (roworiented) { 264 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 265 } else { 266 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 267 } 268 } 269 } 270 } 271 PetscFunctionReturn(0); 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 276 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 277 { 278 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 279 const PetscScalar *value; 280 MatScalar *barray=baij->barray; 281 PetscBool roworiented = baij->roworiented; 282 PetscErrorCode ierr; 283 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 284 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 285 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 286 287 PetscFunctionBegin; 288 if(!barray) { 289 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 290 baij->barray = barray; 291 } 292 293 if (roworiented) { 294 stepval = (n-1)*bs; 295 } else { 296 stepval = (m-1)*bs; 297 } 298 for (i=0; i<m; i++) { 299 if (im[i] < 0) continue; 300 #if defined(PETSC_USE_DEBUG) 301 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 302 #endif 303 if (im[i] >= rstart && im[i] < rend) { 304 row = im[i] - rstart; 305 for (j=0; j<n; j++) { 306 /* If NumCol = 1 then a copy is not required */ 307 if ((roworiented) && (n == 1)) { 308 barray = (MatScalar*)v + i*bs2; 309 } else if((!roworiented) && (m == 1)) { 310 barray = (MatScalar*)v + j*bs2; 311 } else { /* Here a copy is required */ 312 if (roworiented) { 313 value = v + (i*(stepval+bs) + j)*bs; 314 } else { 315 value = v + (j*(stepval+bs) + i)*bs; 316 } 317 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 318 for (jj=0; jj<bs; jj++) { 319 barray[jj] = value[jj]; 320 } 321 barray += bs; 322 } 323 barray -= bs2; 324 } 325 326 if (in[j] >= cstart && in[j] < cend){ 327 col = in[j] - cstart; 328 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 329 } 330 else if (in[j] < 0) continue; 331 #if defined(PETSC_USE_DEBUG) 332 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 333 #endif 334 else { 335 if (mat->was_assembled) { 336 if (!baij->colmap) { 337 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 338 } 339 340 #if defined(PETSC_USE_DEBUG) 341 #if defined (PETSC_USE_CTABLE) 342 { PetscInt data; 343 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 344 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 345 } 346 #else 347 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 348 #endif 349 #endif 350 #if defined (PETSC_USE_CTABLE) 351 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 352 col = (col - 1)/bs; 353 #else 354 col = (baij->colmap[in[j]] - 1)/bs; 355 #endif 356 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 357 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 358 col = in[j]; 359 } 360 } 361 else col = in[j]; 362 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 363 } 364 } 365 } else { 366 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 367 if (!baij->donotstash) { 368 if (roworiented) { 369 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 370 } else { 371 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 372 } 373 } 374 } 375 } 376 PetscFunctionReturn(0); 377 } 378 379 #define HASH_KEY 0.6180339887 380 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 381 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 382 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 383 #undef __FUNCT__ 384 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 385 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 386 { 387 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 388 PetscBool roworiented = baij->roworiented; 389 PetscErrorCode ierr; 390 PetscInt i,j,row,col; 391 PetscInt rstart_orig=mat->rmap->rstart; 392 PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs; 393 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 394 PetscReal tmp; 395 MatScalar **HD = baij->hd,value; 396 #if defined(PETSC_USE_DEBUG) 397 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 398 #endif 399 400 PetscFunctionBegin; 401 if (v) PetscValidScalarPointer(v,6); 402 for (i=0; i<m; i++) { 403 #if defined(PETSC_USE_DEBUG) 404 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 405 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 406 #endif 407 row = im[i]; 408 if (row >= rstart_orig && row < rend_orig) { 409 for (j=0; j<n; j++) { 410 col = in[j]; 411 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 412 /* Look up PetscInto the Hash Table */ 413 key = (row/bs)*Nbs+(col/bs)+1; 414 h1 = HASH(size,key,tmp); 415 416 417 idx = h1; 418 #if defined(PETSC_USE_DEBUG) 419 insert_ct++; 420 total_ct++; 421 if (HT[idx] != key) { 422 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 423 if (idx == size) { 424 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 425 if (idx == h1) { 426 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 427 } 428 } 429 } 430 #else 431 if (HT[idx] != key) { 432 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 433 if (idx == size) { 434 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 435 if (idx == h1) { 436 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 437 } 438 } 439 } 440 #endif 441 /* A HASH table entry is found, so insert the values at the correct address */ 442 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 443 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 444 } 445 } else { 446 if (!baij->donotstash) { 447 if (roworiented) { 448 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 449 } else { 450 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 451 } 452 } 453 } 454 } 455 #if defined(PETSC_USE_DEBUG) 456 baij->ht_total_ct = total_ct; 457 baij->ht_insert_ct = insert_ct; 458 #endif 459 PetscFunctionReturn(0); 460 } 461 462 #undef __FUNCT__ 463 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 464 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 465 { 466 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 467 PetscBool roworiented = baij->roworiented; 468 PetscErrorCode ierr; 469 PetscInt i,j,ii,jj,row,col; 470 PetscInt rstart=baij->rstartbs; 471 PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 472 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 473 PetscReal tmp; 474 MatScalar **HD = baij->hd,*baij_a; 475 const PetscScalar *v_t,*value; 476 #if defined(PETSC_USE_DEBUG) 477 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 478 #endif 479 480 PetscFunctionBegin; 481 482 if (roworiented) { 483 stepval = (n-1)*bs; 484 } else { 485 stepval = (m-1)*bs; 486 } 487 for (i=0; i<m; i++) { 488 #if defined(PETSC_USE_DEBUG) 489 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 490 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 491 #endif 492 row = im[i]; 493 v_t = v + i*nbs2; 494 if (row >= rstart && row < rend) { 495 for (j=0; j<n; j++) { 496 col = in[j]; 497 498 /* Look up into the Hash Table */ 499 key = row*Nbs+col+1; 500 h1 = HASH(size,key,tmp); 501 502 idx = h1; 503 #if defined(PETSC_USE_DEBUG) 504 total_ct++; 505 insert_ct++; 506 if (HT[idx] != key) { 507 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 508 if (idx == size) { 509 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 510 if (idx == h1) { 511 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 512 } 513 } 514 } 515 #else 516 if (HT[idx] != key) { 517 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 518 if (idx == size) { 519 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 520 if (idx == h1) { 521 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 522 } 523 } 524 } 525 #endif 526 baij_a = HD[idx]; 527 if (roworiented) { 528 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 529 /* value = v + (i*(stepval+bs)+j)*bs; */ 530 value = v_t; 531 v_t += bs; 532 if (addv == ADD_VALUES) { 533 for (ii=0; ii<bs; ii++,value+=stepval) { 534 for (jj=ii; jj<bs2; jj+=bs) { 535 baij_a[jj] += *value++; 536 } 537 } 538 } else { 539 for (ii=0; ii<bs; ii++,value+=stepval) { 540 for (jj=ii; jj<bs2; jj+=bs) { 541 baij_a[jj] = *value++; 542 } 543 } 544 } 545 } else { 546 value = v + j*(stepval+bs)*bs + i*bs; 547 if (addv == ADD_VALUES) { 548 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 549 for (jj=0; jj<bs; jj++) { 550 baij_a[jj] += *value++; 551 } 552 } 553 } else { 554 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 555 for (jj=0; jj<bs; jj++) { 556 baij_a[jj] = *value++; 557 } 558 } 559 } 560 } 561 } 562 } else { 563 if (!baij->donotstash) { 564 if (roworiented) { 565 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 566 } else { 567 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 568 } 569 } 570 } 571 } 572 #if defined(PETSC_USE_DEBUG) 573 baij->ht_total_ct = total_ct; 574 baij->ht_insert_ct = insert_ct; 575 #endif 576 PetscFunctionReturn(0); 577 } 578 579 #undef __FUNCT__ 580 #define __FUNCT__ "MatGetValues_MPIBAIJ" 581 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 582 { 583 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 584 PetscErrorCode ierr; 585 PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 586 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 587 588 PetscFunctionBegin; 589 for (i=0; i<m; i++) { 590 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 591 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 592 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 593 row = idxm[i] - bsrstart; 594 for (j=0; j<n; j++) { 595 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 596 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 597 if (idxn[j] >= bscstart && idxn[j] < bscend){ 598 col = idxn[j] - bscstart; 599 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 600 } else { 601 if (!baij->colmap) { 602 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 603 } 604 #if defined (PETSC_USE_CTABLE) 605 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 606 data --; 607 #else 608 data = baij->colmap[idxn[j]/bs]-1; 609 #endif 610 if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 611 else { 612 col = data + idxn[j]%bs; 613 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 614 } 615 } 616 } 617 } else { 618 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 619 } 620 } 621 PetscFunctionReturn(0); 622 } 623 624 #undef __FUNCT__ 625 #define __FUNCT__ "MatNorm_MPIBAIJ" 626 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 627 { 628 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 629 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 630 PetscErrorCode ierr; 631 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 632 PetscReal sum = 0.0; 633 MatScalar *v; 634 635 PetscFunctionBegin; 636 if (baij->size == 1) { 637 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 638 } else { 639 if (type == NORM_FROBENIUS) { 640 v = amat->a; 641 nz = amat->nz*bs2; 642 for (i=0; i<nz; i++) { 643 #if defined(PETSC_USE_COMPLEX) 644 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 645 #else 646 sum += (*v)*(*v); v++; 647 #endif 648 } 649 v = bmat->a; 650 nz = bmat->nz*bs2; 651 for (i=0; i<nz; i++) { 652 #if defined(PETSC_USE_COMPLEX) 653 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 654 #else 655 sum += (*v)*(*v); v++; 656 #endif 657 } 658 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 659 *nrm = sqrt(*nrm); 660 } else if (type == NORM_1) { /* max column sum */ 661 PetscReal *tmp,*tmp2; 662 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 663 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 664 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 665 v = amat->a; jj = amat->j; 666 for (i=0; i<amat->nz; i++) { 667 for (j=0; j<bs; j++){ 668 col = bs*(cstart + *jj) + j; /* column index */ 669 for (row=0; row<bs; row++){ 670 tmp[col] += PetscAbsScalar(*v); v++; 671 } 672 } 673 jj++; 674 } 675 v = bmat->a; jj = bmat->j; 676 for (i=0; i<bmat->nz; i++) { 677 for (j=0; j<bs; j++){ 678 col = bs*garray[*jj] + j; 679 for (row=0; row<bs; row++){ 680 tmp[col] += PetscAbsScalar(*v); v++; 681 } 682 } 683 jj++; 684 } 685 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 686 *nrm = 0.0; 687 for (j=0; j<mat->cmap->N; j++) { 688 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 689 } 690 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 691 } else if (type == NORM_INFINITY) { /* max row sum */ 692 PetscReal *sums; 693 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 694 sum = 0.0; 695 for (j=0; j<amat->mbs; j++) { 696 for (row=0; row<bs; row++) sums[row] = 0.0; 697 v = amat->a + bs2*amat->i[j]; 698 nz = amat->i[j+1]-amat->i[j]; 699 for (i=0; i<nz; i++) { 700 for (col=0; col<bs; col++){ 701 for (row=0; row<bs; row++){ 702 sums[row] += PetscAbsScalar(*v); v++; 703 } 704 } 705 } 706 v = bmat->a + bs2*bmat->i[j]; 707 nz = bmat->i[j+1]-bmat->i[j]; 708 for (i=0; i<nz; i++) { 709 for (col=0; col<bs; col++){ 710 for (row=0; row<bs; row++){ 711 sums[row] += PetscAbsScalar(*v); v++; 712 } 713 } 714 } 715 for (row=0; row<bs; row++){ 716 if (sums[row] > sum) sum = sums[row]; 717 } 718 } 719 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 720 ierr = PetscFree(sums);CHKERRQ(ierr); 721 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet"); 722 } 723 PetscFunctionReturn(0); 724 } 725 726 /* 727 Creates the hash table, and sets the table 728 This table is created only once. 729 If new entried need to be added to the matrix 730 then the hash table has to be destroyed and 731 recreated. 732 */ 733 #undef __FUNCT__ 734 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 735 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 736 { 737 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 738 Mat A = baij->A,B=baij->B; 739 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 740 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 741 PetscErrorCode ierr; 742 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 743 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 744 PetscInt *HT,key; 745 MatScalar **HD; 746 PetscReal tmp; 747 #if defined(PETSC_USE_INFO) 748 PetscInt ct=0,max=0; 749 #endif 750 751 PetscFunctionBegin; 752 if (baij->ht) PetscFunctionReturn(0); 753 754 baij->ht_size = (PetscInt)(factor*nz); 755 ht_size = baij->ht_size; 756 757 /* Allocate Memory for Hash Table */ 758 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 759 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 760 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 761 HD = baij->hd; 762 HT = baij->ht; 763 764 /* Loop Over A */ 765 for (i=0; i<a->mbs; i++) { 766 for (j=ai[i]; j<ai[i+1]; j++) { 767 row = i+rstart; 768 col = aj[j]+cstart; 769 770 key = row*Nbs + col + 1; 771 h1 = HASH(ht_size,key,tmp); 772 for (k=0; k<ht_size; k++){ 773 if (!HT[(h1+k)%ht_size]) { 774 HT[(h1+k)%ht_size] = key; 775 HD[(h1+k)%ht_size] = a->a + j*bs2; 776 break; 777 #if defined(PETSC_USE_INFO) 778 } else { 779 ct++; 780 #endif 781 } 782 } 783 #if defined(PETSC_USE_INFO) 784 if (k> max) max = k; 785 #endif 786 } 787 } 788 /* Loop Over B */ 789 for (i=0; i<b->mbs; i++) { 790 for (j=bi[i]; j<bi[i+1]; j++) { 791 row = i+rstart; 792 col = garray[bj[j]]; 793 key = row*Nbs + col + 1; 794 h1 = HASH(ht_size,key,tmp); 795 for (k=0; k<ht_size; k++){ 796 if (!HT[(h1+k)%ht_size]) { 797 HT[(h1+k)%ht_size] = key; 798 HD[(h1+k)%ht_size] = b->a + j*bs2; 799 break; 800 #if defined(PETSC_USE_INFO) 801 } else { 802 ct++; 803 #endif 804 } 805 } 806 #if defined(PETSC_USE_INFO) 807 if (k> max) max = k; 808 #endif 809 } 810 } 811 812 /* Print Summary */ 813 #if defined(PETSC_USE_INFO) 814 for (i=0,j=0; i<ht_size; i++) { 815 if (HT[i]) {j++;} 816 } 817 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 818 #endif 819 PetscFunctionReturn(0); 820 } 821 822 #undef __FUNCT__ 823 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 824 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 825 { 826 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 827 PetscErrorCode ierr; 828 PetscInt nstash,reallocs; 829 InsertMode addv; 830 831 PetscFunctionBegin; 832 if (baij->donotstash || mat->nooffprocentries) { 833 PetscFunctionReturn(0); 834 } 835 836 /* make sure all processors are either in INSERTMODE or ADDMODE */ 837 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 838 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 839 mat->insertmode = addv; /* in case this processor had no cache */ 840 841 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 842 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 843 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 844 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 845 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 846 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 847 PetscFunctionReturn(0); 848 } 849 850 #undef __FUNCT__ 851 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 852 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 853 { 854 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 855 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data; 856 PetscErrorCode ierr; 857 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 858 PetscInt *row,*col; 859 PetscBool r1,r2,r3,other_disassembled; 860 MatScalar *val; 861 InsertMode addv = mat->insertmode; 862 PetscMPIInt n; 863 864 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 865 PetscFunctionBegin; 866 if (!baij->donotstash && !mat->nooffprocentries) { 867 while (1) { 868 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 869 if (!flg) break; 870 871 for (i=0; i<n;) { 872 /* Now identify the consecutive vals belonging to the same row */ 873 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 874 if (j < n) ncols = j-i; 875 else ncols = n-i; 876 /* Now assemble all these values with a single function call */ 877 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 878 i = j; 879 } 880 } 881 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 882 /* Now process the block-stash. Since the values are stashed column-oriented, 883 set the roworiented flag to column oriented, and after MatSetValues() 884 restore the original flags */ 885 r1 = baij->roworiented; 886 r2 = a->roworiented; 887 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 888 baij->roworiented = PETSC_FALSE; 889 a->roworiented = PETSC_FALSE; 890 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 891 while (1) { 892 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 893 if (!flg) break; 894 895 for (i=0; i<n;) { 896 /* Now identify the consecutive vals belonging to the same row */ 897 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 898 if (j < n) ncols = j-i; 899 else ncols = n-i; 900 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 901 i = j; 902 } 903 } 904 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 905 baij->roworiented = r1; 906 a->roworiented = r2; 907 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 908 } 909 910 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 911 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 912 913 /* determine if any processor has disassembled, if so we must 914 also disassemble ourselfs, in order that we may reassemble. */ 915 /* 916 if nonzero structure of submatrix B cannot change then we know that 917 no processor disassembled thus we can skip this stuff 918 */ 919 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 920 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 921 if (mat->was_assembled && !other_disassembled) { 922 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 923 } 924 } 925 926 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 927 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 928 } 929 ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 930 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 931 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 932 933 #if defined(PETSC_USE_INFO) 934 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 935 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 936 baij->ht_total_ct = 0; 937 baij->ht_insert_ct = 0; 938 } 939 #endif 940 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 941 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 942 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 943 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 944 } 945 946 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 947 baij->rowvalues = 0; 948 PetscFunctionReturn(0); 949 } 950 951 #undef __FUNCT__ 952 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 953 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 954 { 955 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 956 PetscErrorCode ierr; 957 PetscMPIInt size = baij->size,rank = baij->rank; 958 PetscInt bs = mat->rmap->bs; 959 PetscBool iascii,isdraw; 960 PetscViewer sviewer; 961 PetscViewerFormat format; 962 963 PetscFunctionBegin; 964 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 965 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 966 if (iascii) { 967 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 968 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 969 MatInfo info; 970 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 971 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 972 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 973 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 974 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 975 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 976 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 977 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 978 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 979 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 980 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 981 PetscFunctionReturn(0); 982 } else if (format == PETSC_VIEWER_ASCII_INFO) { 983 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 984 PetscFunctionReturn(0); 985 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 986 PetscFunctionReturn(0); 987 } 988 } 989 990 if (isdraw) { 991 PetscDraw draw; 992 PetscBool isnull; 993 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 994 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 995 } 996 997 if (size == 1) { 998 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 999 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1000 } else { 1001 /* assemble the entire matrix onto first processor. */ 1002 Mat A; 1003 Mat_SeqBAIJ *Aloc; 1004 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1005 MatScalar *a; 1006 1007 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1008 /* Perhaps this should be the type of mat? */ 1009 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1010 if (!rank) { 1011 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1012 } else { 1013 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1014 } 1015 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1016 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1017 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1018 1019 /* copy over the A part */ 1020 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1021 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1022 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1023 1024 for (i=0; i<mbs; i++) { 1025 rvals[0] = bs*(baij->rstartbs + i); 1026 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1027 for (j=ai[i]; j<ai[i+1]; j++) { 1028 col = (baij->cstartbs+aj[j])*bs; 1029 for (k=0; k<bs; k++) { 1030 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1031 col++; a += bs; 1032 } 1033 } 1034 } 1035 /* copy over the B part */ 1036 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1037 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1038 for (i=0; i<mbs; i++) { 1039 rvals[0] = bs*(baij->rstartbs + i); 1040 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1041 for (j=ai[i]; j<ai[i+1]; j++) { 1042 col = baij->garray[aj[j]]*bs; 1043 for (k=0; k<bs; k++) { 1044 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1045 col++; a += bs; 1046 } 1047 } 1048 } 1049 ierr = PetscFree(rvals);CHKERRQ(ierr); 1050 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1051 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1052 /* 1053 Everyone has to call to draw the matrix since the graphics waits are 1054 synchronized across all processors that share the PetscDraw object 1055 */ 1056 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1057 if (!rank) { 1058 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1059 /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/ 1060 PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ); 1061 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1062 } 1063 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1064 ierr = MatDestroy(A);CHKERRQ(ierr); 1065 } 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1071 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1072 { 1073 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1074 Mat_SeqBAIJ* A = (Mat_SeqBAIJ*)a->A->data; 1075 Mat_SeqBAIJ* B = (Mat_SeqBAIJ*)a->B->data; 1076 PetscErrorCode ierr; 1077 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1078 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1079 int fd; 1080 PetscScalar *column_values; 1081 FILE *file; 1082 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1083 PetscInt message_count,flowcontrolcount; 1084 1085 PetscFunctionBegin; 1086 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1087 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1088 nz = bs2*(A->nz + B->nz); 1089 rlen = mat->rmap->n; 1090 if (!rank) { 1091 header[0] = MAT_FILE_CLASSID; 1092 header[1] = mat->rmap->N; 1093 header[2] = mat->cmap->N; 1094 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1095 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1096 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1097 /* get largest number of rows any processor has */ 1098 range = mat->rmap->range; 1099 for (i=1; i<size; i++) { 1100 rlen = PetscMax(rlen,range[i+1] - range[i]); 1101 } 1102 } else { 1103 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1104 } 1105 1106 ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr); 1107 /* compute lengths of each row */ 1108 for (i=0; i<a->mbs; i++) { 1109 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1110 } 1111 /* store the row lengths to the file */ 1112 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1113 if (!rank) { 1114 MPI_Status status; 1115 ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr); 1116 rlen = (range[1] - range[0])/bs; 1117 for (i=0; i<rlen; i++) { 1118 for (j=0; j<bs; j++) { 1119 row_lens[i*bs+j] = bs*crow_lens[i]; 1120 } 1121 } 1122 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1123 for (i=1; i<size; i++) { 1124 rlen = (range[i+1] - range[i])/bs; 1125 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1126 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1127 for (k=0; k<rlen; k++) { 1128 for (j=0; j<bs; j++) { 1129 row_lens[k*bs+j] = bs*crow_lens[k]; 1130 } 1131 } 1132 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1133 } 1134 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1135 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1136 } else { 1137 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1138 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1139 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1140 } 1141 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1142 1143 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1144 information needed to make it for each row from a block row. This does require more communication but still not more than 1145 the communication needed for the nonzero values */ 1146 nzmax = nz; /* space a largest processor needs */ 1147 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1148 ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1149 cnt = 0; 1150 for (i=0; i<a->mbs; i++) { 1151 pcnt = cnt; 1152 for (j=B->i[i]; j<B->i[i+1]; j++) { 1153 if ( (col = garray[B->j[j]]) > cstart) break; 1154 for (l=0; l<bs; l++) { 1155 column_indices[cnt++] = bs*col+l; 1156 } 1157 } 1158 for (k=A->i[i]; k<A->i[i+1]; k++) { 1159 for (l=0; l<bs; l++) { 1160 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1161 } 1162 } 1163 for (; j<B->i[i+1]; j++) { 1164 for (l=0; l<bs; l++) { 1165 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1166 } 1167 } 1168 len = cnt - pcnt; 1169 for (k=1; k<bs; k++) { 1170 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1171 cnt += len; 1172 } 1173 } 1174 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1175 1176 /* store the columns to the file */ 1177 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1178 if (!rank) { 1179 MPI_Status status; 1180 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1181 for (i=1; i<size; i++) { 1182 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1183 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1184 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1185 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1186 } 1187 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1188 } else { 1189 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1190 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1191 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1192 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1193 } 1194 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1195 1196 /* load up the numerical values */ 1197 ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1198 cnt = 0; 1199 for (i=0; i<a->mbs; i++) { 1200 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1201 for (j=B->i[i]; j<B->i[i+1]; j++) { 1202 if ( garray[B->j[j]] > cstart) break; 1203 for (l=0; l<bs; l++) { 1204 for (ll=0; ll<bs; ll++) { 1205 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1206 } 1207 } 1208 cnt += bs; 1209 } 1210 for (k=A->i[i]; k<A->i[i+1]; k++) { 1211 for (l=0; l<bs; l++) { 1212 for (ll=0; ll<bs; ll++) { 1213 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1214 } 1215 } 1216 cnt += bs; 1217 } 1218 for (; j<B->i[i+1]; j++) { 1219 for (l=0; l<bs; l++) { 1220 for (ll=0; ll<bs; ll++) { 1221 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1222 } 1223 } 1224 cnt += bs; 1225 } 1226 cnt += (bs-1)*rlen; 1227 } 1228 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1229 1230 /* store the column values to the file */ 1231 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1232 if (!rank) { 1233 MPI_Status status; 1234 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1235 for (i=1; i<size; i++) { 1236 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1237 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1238 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1239 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1240 } 1241 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1242 } else { 1243 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1244 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1245 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1246 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1247 } 1248 ierr = PetscFree(column_values);CHKERRQ(ierr); 1249 1250 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1251 if (file) { 1252 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1253 } 1254 PetscFunctionReturn(0); 1255 } 1256 1257 #undef __FUNCT__ 1258 #define __FUNCT__ "MatView_MPIBAIJ" 1259 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1260 { 1261 PetscErrorCode ierr; 1262 PetscBool iascii,isdraw,issocket,isbinary; 1263 1264 PetscFunctionBegin; 1265 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1266 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1267 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1268 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1269 if (iascii || isdraw || issocket) { 1270 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1271 } else if (isbinary) { 1272 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1273 } else { 1274 SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name); 1275 } 1276 PetscFunctionReturn(0); 1277 } 1278 1279 #undef __FUNCT__ 1280 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1281 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1282 { 1283 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1284 PetscErrorCode ierr; 1285 1286 PetscFunctionBegin; 1287 #if defined(PETSC_USE_LOG) 1288 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1289 #endif 1290 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1291 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1292 if (baij->A){ierr = MatDestroy(baij->A);CHKERRQ(ierr);} 1293 if (baij->B){ierr = MatDestroy(baij->B);CHKERRQ(ierr);} 1294 #if defined (PETSC_USE_CTABLE) 1295 if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);} 1296 #else 1297 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1298 #endif 1299 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1300 if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1301 if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1302 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1303 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1304 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1305 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1306 ierr = PetscFree(baij);CHKERRQ(ierr); 1307 1308 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1309 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1310 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1311 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1312 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1313 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1314 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1315 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr); 1316 PetscFunctionReturn(0); 1317 } 1318 1319 #undef __FUNCT__ 1320 #define __FUNCT__ "MatMult_MPIBAIJ" 1321 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1322 { 1323 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1324 PetscErrorCode ierr; 1325 PetscInt nt; 1326 1327 PetscFunctionBegin; 1328 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1329 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1330 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1331 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1332 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1333 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1334 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1335 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1336 PetscFunctionReturn(0); 1337 } 1338 1339 #undef __FUNCT__ 1340 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1341 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1342 { 1343 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1344 PetscErrorCode ierr; 1345 1346 PetscFunctionBegin; 1347 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1348 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1349 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1350 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1351 PetscFunctionReturn(0); 1352 } 1353 1354 #undef __FUNCT__ 1355 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1356 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1357 { 1358 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1359 PetscErrorCode ierr; 1360 PetscBool merged; 1361 1362 PetscFunctionBegin; 1363 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1364 /* do nondiagonal part */ 1365 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1366 if (!merged) { 1367 /* send it on its way */ 1368 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1369 /* do local part */ 1370 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1371 /* receive remote parts: note this assumes the values are not actually */ 1372 /* inserted in yy until the next line */ 1373 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1374 } else { 1375 /* do local part */ 1376 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1377 /* send it on its way */ 1378 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1379 /* values actually were received in the Begin() but we need to call this nop */ 1380 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1381 } 1382 PetscFunctionReturn(0); 1383 } 1384 1385 #undef __FUNCT__ 1386 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1387 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1388 { 1389 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1390 PetscErrorCode ierr; 1391 1392 PetscFunctionBegin; 1393 /* do nondiagonal part */ 1394 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1395 /* send it on its way */ 1396 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1397 /* do local part */ 1398 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1399 /* receive remote parts: note this assumes the values are not actually */ 1400 /* inserted in yy until the next line, which is true for my implementation*/ 1401 /* but is not perhaps always true. */ 1402 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1403 PetscFunctionReturn(0); 1404 } 1405 1406 /* 1407 This only works correctly for square matrices where the subblock A->A is the 1408 diagonal block 1409 */ 1410 #undef __FUNCT__ 1411 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1412 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1413 { 1414 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1415 PetscErrorCode ierr; 1416 1417 PetscFunctionBegin; 1418 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1419 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1420 PetscFunctionReturn(0); 1421 } 1422 1423 #undef __FUNCT__ 1424 #define __FUNCT__ "MatScale_MPIBAIJ" 1425 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1426 { 1427 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1428 PetscErrorCode ierr; 1429 1430 PetscFunctionBegin; 1431 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1432 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #undef __FUNCT__ 1437 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1438 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1439 { 1440 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1441 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1442 PetscErrorCode ierr; 1443 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1444 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1445 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1446 1447 PetscFunctionBegin; 1448 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1449 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1450 mat->getrowactive = PETSC_TRUE; 1451 1452 if (!mat->rowvalues && (idx || v)) { 1453 /* 1454 allocate enough space to hold information from the longest row. 1455 */ 1456 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1457 PetscInt max = 1,mbs = mat->mbs,tmp; 1458 for (i=0; i<mbs; i++) { 1459 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1460 if (max < tmp) { max = tmp; } 1461 } 1462 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1463 } 1464 lrow = row - brstart; 1465 1466 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1467 if (!v) {pvA = 0; pvB = 0;} 1468 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1469 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1470 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1471 nztot = nzA + nzB; 1472 1473 cmap = mat->garray; 1474 if (v || idx) { 1475 if (nztot) { 1476 /* Sort by increasing column numbers, assuming A and B already sorted */ 1477 PetscInt imark = -1; 1478 if (v) { 1479 *v = v_p = mat->rowvalues; 1480 for (i=0; i<nzB; i++) { 1481 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1482 else break; 1483 } 1484 imark = i; 1485 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1486 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1487 } 1488 if (idx) { 1489 *idx = idx_p = mat->rowindices; 1490 if (imark > -1) { 1491 for (i=0; i<imark; i++) { 1492 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1493 } 1494 } else { 1495 for (i=0; i<nzB; i++) { 1496 if (cmap[cworkB[i]/bs] < cstart) 1497 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1498 else break; 1499 } 1500 imark = i; 1501 } 1502 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1503 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1504 } 1505 } else { 1506 if (idx) *idx = 0; 1507 if (v) *v = 0; 1508 } 1509 } 1510 *nz = nztot; 1511 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1512 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1513 PetscFunctionReturn(0); 1514 } 1515 1516 #undef __FUNCT__ 1517 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1518 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1519 { 1520 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1521 1522 PetscFunctionBegin; 1523 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1524 baij->getrowactive = PETSC_FALSE; 1525 PetscFunctionReturn(0); 1526 } 1527 1528 #undef __FUNCT__ 1529 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1530 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1531 { 1532 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1533 PetscErrorCode ierr; 1534 1535 PetscFunctionBegin; 1536 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1537 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1538 PetscFunctionReturn(0); 1539 } 1540 1541 #undef __FUNCT__ 1542 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1543 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1544 { 1545 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1546 Mat A = a->A,B = a->B; 1547 PetscErrorCode ierr; 1548 PetscReal isend[5],irecv[5]; 1549 1550 PetscFunctionBegin; 1551 info->block_size = (PetscReal)matin->rmap->bs; 1552 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1553 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1554 isend[3] = info->memory; isend[4] = info->mallocs; 1555 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1556 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1557 isend[3] += info->memory; isend[4] += info->mallocs; 1558 if (flag == MAT_LOCAL) { 1559 info->nz_used = isend[0]; 1560 info->nz_allocated = isend[1]; 1561 info->nz_unneeded = isend[2]; 1562 info->memory = isend[3]; 1563 info->mallocs = isend[4]; 1564 } else if (flag == MAT_GLOBAL_MAX) { 1565 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1566 info->nz_used = irecv[0]; 1567 info->nz_allocated = irecv[1]; 1568 info->nz_unneeded = irecv[2]; 1569 info->memory = irecv[3]; 1570 info->mallocs = irecv[4]; 1571 } else if (flag == MAT_GLOBAL_SUM) { 1572 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1573 info->nz_used = irecv[0]; 1574 info->nz_allocated = irecv[1]; 1575 info->nz_unneeded = irecv[2]; 1576 info->memory = irecv[3]; 1577 info->mallocs = irecv[4]; 1578 } else { 1579 SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1580 } 1581 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1582 info->fill_ratio_needed = 0; 1583 info->factor_mallocs = 0; 1584 PetscFunctionReturn(0); 1585 } 1586 1587 #undef __FUNCT__ 1588 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1589 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1590 { 1591 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1592 PetscErrorCode ierr; 1593 1594 PetscFunctionBegin; 1595 switch (op) { 1596 case MAT_NEW_NONZERO_LOCATIONS: 1597 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1598 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1599 case MAT_KEEP_NONZERO_PATTERN: 1600 case MAT_NEW_NONZERO_LOCATION_ERR: 1601 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1602 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1603 break; 1604 case MAT_ROW_ORIENTED: 1605 a->roworiented = flg; 1606 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1607 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1608 break; 1609 case MAT_NEW_DIAGONALS: 1610 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1611 break; 1612 case MAT_IGNORE_OFF_PROC_ENTRIES: 1613 a->donotstash = flg; 1614 break; 1615 case MAT_USE_HASH_TABLE: 1616 a->ht_flag = flg; 1617 break; 1618 case MAT_SYMMETRIC: 1619 case MAT_STRUCTURALLY_SYMMETRIC: 1620 case MAT_HERMITIAN: 1621 case MAT_SYMMETRY_ETERNAL: 1622 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1623 break; 1624 default: 1625 SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op); 1626 } 1627 PetscFunctionReturn(0); 1628 } 1629 1630 #undef __FUNCT__ 1631 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1632 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1633 { 1634 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1635 Mat_SeqBAIJ *Aloc; 1636 Mat B; 1637 PetscErrorCode ierr; 1638 PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1639 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1640 MatScalar *a; 1641 1642 PetscFunctionBegin; 1643 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1644 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1645 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1646 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1647 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1648 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1649 } else { 1650 B = *matout; 1651 } 1652 1653 /* copy over the A part */ 1654 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1655 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1656 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1657 1658 for (i=0; i<mbs; i++) { 1659 rvals[0] = bs*(baij->rstartbs + i); 1660 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1661 for (j=ai[i]; j<ai[i+1]; j++) { 1662 col = (baij->cstartbs+aj[j])*bs; 1663 for (k=0; k<bs; k++) { 1664 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1665 col++; a += bs; 1666 } 1667 } 1668 } 1669 /* copy over the B part */ 1670 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1671 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1672 for (i=0; i<mbs; i++) { 1673 rvals[0] = bs*(baij->rstartbs + i); 1674 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1675 for (j=ai[i]; j<ai[i+1]; j++) { 1676 col = baij->garray[aj[j]]*bs; 1677 for (k=0; k<bs; k++) { 1678 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1679 col++; a += bs; 1680 } 1681 } 1682 } 1683 ierr = PetscFree(rvals);CHKERRQ(ierr); 1684 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1685 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1686 1687 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1688 *matout = B; 1689 } else { 1690 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1691 } 1692 PetscFunctionReturn(0); 1693 } 1694 1695 #undef __FUNCT__ 1696 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1697 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1698 { 1699 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1700 Mat a = baij->A,b = baij->B; 1701 PetscErrorCode ierr; 1702 PetscInt s1,s2,s3; 1703 1704 PetscFunctionBegin; 1705 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1706 if (rr) { 1707 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1708 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1709 /* Overlap communication with computation. */ 1710 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1711 } 1712 if (ll) { 1713 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1714 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1715 ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1716 } 1717 /* scale the diagonal block */ 1718 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1719 1720 if (rr) { 1721 /* Do a scatter end and then right scale the off-diagonal block */ 1722 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1723 ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1724 } 1725 1726 PetscFunctionReturn(0); 1727 } 1728 1729 #undef __FUNCT__ 1730 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1731 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1732 { 1733 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1734 PetscErrorCode ierr; 1735 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1736 PetscInt i,*owners = A->rmap->range; 1737 PetscInt *nprocs,j,idx,nsends,row; 1738 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1739 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1740 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1741 MPI_Comm comm = ((PetscObject)A)->comm; 1742 MPI_Request *send_waits,*recv_waits; 1743 MPI_Status recv_status,*send_status; 1744 const PetscScalar *xx; 1745 PetscScalar *bb; 1746 #if defined(PETSC_DEBUG) 1747 PetscBool found = PETSC_FALSE; 1748 #endif 1749 1750 PetscFunctionBegin; 1751 /* first count number of contributors to each processor */ 1752 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1753 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1754 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1755 j = 0; 1756 for (i=0; i<N; i++) { 1757 if (lastidx > (idx = rows[i])) j = 0; 1758 lastidx = idx; 1759 for (; j<size; j++) { 1760 if (idx >= owners[j] && idx < owners[j+1]) { 1761 nprocs[2*j]++; 1762 nprocs[2*j+1] = 1; 1763 owner[i] = j; 1764 #if defined(PETSC_DEBUG) 1765 found = PETSC_TRUE; 1766 #endif 1767 break; 1768 } 1769 } 1770 #if defined(PETSC_DEBUG) 1771 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1772 found = PETSC_FALSE; 1773 #endif 1774 } 1775 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 1776 1777 if (A->nooffproczerorows) { 1778 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 1779 nrecvs = nsends; 1780 nmax = N; 1781 } else { 1782 /* inform other processors of number of messages and max length*/ 1783 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1784 } 1785 1786 /* post receives: */ 1787 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1788 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1789 for (i=0; i<nrecvs; i++) { 1790 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1791 } 1792 1793 /* do sends: 1794 1) starts[i] gives the starting index in svalues for stuff going to 1795 the ith processor 1796 */ 1797 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1798 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1799 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1800 starts[0] = 0; 1801 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1802 for (i=0; i<N; i++) { 1803 svalues[starts[owner[i]]++] = rows[i]; 1804 } 1805 1806 starts[0] = 0; 1807 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 1808 count = 0; 1809 for (i=0; i<size; i++) { 1810 if (nprocs[2*i+1]) { 1811 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1812 } 1813 } 1814 ierr = PetscFree(starts);CHKERRQ(ierr); 1815 1816 base = owners[rank]; 1817 1818 /* wait on receives */ 1819 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1820 count = nrecvs; 1821 slen = 0; 1822 while (count) { 1823 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1824 /* unpack receives into our local space */ 1825 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1826 source[imdex] = recv_status.MPI_SOURCE; 1827 lens[imdex] = n; 1828 slen += n; 1829 count--; 1830 } 1831 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1832 1833 /* move the data into the send scatter */ 1834 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1835 count = 0; 1836 for (i=0; i<nrecvs; i++) { 1837 values = rvalues + i*nmax; 1838 for (j=0; j<lens[i]; j++) { 1839 lrows[count++] = values[j] - base; 1840 } 1841 } 1842 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1843 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1844 ierr = PetscFree(owner);CHKERRQ(ierr); 1845 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1846 1847 /* fix right hand side if needed */ 1848 if (x && b) { 1849 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1850 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1851 for (i=0; i<slen; i++) { 1852 bb[lrows[i]] = diag*xx[lrows[i]]; 1853 } 1854 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1855 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1856 } 1857 1858 /* actually zap the local rows */ 1859 /* 1860 Zero the required rows. If the "diagonal block" of the matrix 1861 is square and the user wishes to set the diagonal we use separate 1862 code so that MatSetValues() is not called for each diagonal allocating 1863 new memory, thus calling lots of mallocs and slowing things down. 1864 1865 */ 1866 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1867 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1868 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1869 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 1870 } else if (diag != 0.0) { 1871 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1872 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1873 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1874 for (i=0; i<slen; i++) { 1875 row = lrows[i] + rstart_bs; 1876 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1877 } 1878 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1879 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1880 } else { 1881 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1882 } 1883 1884 ierr = PetscFree(lrows);CHKERRQ(ierr); 1885 1886 /* wait on sends */ 1887 if (nsends) { 1888 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1889 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1890 ierr = PetscFree(send_status);CHKERRQ(ierr); 1891 } 1892 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1893 ierr = PetscFree(svalues);CHKERRQ(ierr); 1894 1895 PetscFunctionReturn(0); 1896 } 1897 1898 #undef __FUNCT__ 1899 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1900 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1901 { 1902 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1903 PetscErrorCode ierr; 1904 1905 PetscFunctionBegin; 1906 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1907 PetscFunctionReturn(0); 1908 } 1909 1910 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *); 1911 1912 #undef __FUNCT__ 1913 #define __FUNCT__ "MatEqual_MPIBAIJ" 1914 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1915 { 1916 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1917 Mat a,b,c,d; 1918 PetscBool flg; 1919 PetscErrorCode ierr; 1920 1921 PetscFunctionBegin; 1922 a = matA->A; b = matA->B; 1923 c = matB->A; d = matB->B; 1924 1925 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1926 if (flg) { 1927 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1928 } 1929 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1930 PetscFunctionReturn(0); 1931 } 1932 1933 #undef __FUNCT__ 1934 #define __FUNCT__ "MatCopy_MPIBAIJ" 1935 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1936 { 1937 PetscErrorCode ierr; 1938 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1939 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1940 1941 PetscFunctionBegin; 1942 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1943 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1944 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1945 } else { 1946 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1947 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1948 } 1949 PetscFunctionReturn(0); 1950 } 1951 1952 #undef __FUNCT__ 1953 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ" 1954 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A) 1955 { 1956 PetscErrorCode ierr; 1957 1958 PetscFunctionBegin; 1959 ierr = MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1960 PetscFunctionReturn(0); 1961 } 1962 1963 #undef __FUNCT__ 1964 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1965 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1966 { 1967 PetscErrorCode ierr; 1968 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data; 1969 PetscBLASInt bnz,one=1; 1970 Mat_SeqBAIJ *x,*y; 1971 1972 PetscFunctionBegin; 1973 if (str == SAME_NONZERO_PATTERN) { 1974 PetscScalar alpha = a; 1975 x = (Mat_SeqBAIJ *)xx->A->data; 1976 y = (Mat_SeqBAIJ *)yy->A->data; 1977 bnz = PetscBLASIntCast(x->nz); 1978 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1979 x = (Mat_SeqBAIJ *)xx->B->data; 1980 y = (Mat_SeqBAIJ *)yy->B->data; 1981 bnz = PetscBLASIntCast(x->nz); 1982 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1983 } else { 1984 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1985 } 1986 PetscFunctionReturn(0); 1987 } 1988 1989 #undef __FUNCT__ 1990 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ" 1991 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs) 1992 { 1993 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1994 PetscInt rbs,cbs; 1995 PetscErrorCode ierr; 1996 1997 PetscFunctionBegin; 1998 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1999 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 2000 ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr); 2001 ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr); 2002 if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs); 2003 if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs); 2004 PetscFunctionReturn(0); 2005 } 2006 2007 #undef __FUNCT__ 2008 #define __FUNCT__ "MatRealPart_MPIBAIJ" 2009 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 2010 { 2011 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2012 PetscErrorCode ierr; 2013 2014 PetscFunctionBegin; 2015 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2016 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2017 PetscFunctionReturn(0); 2018 } 2019 2020 #undef __FUNCT__ 2021 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2022 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2023 { 2024 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2025 PetscErrorCode ierr; 2026 2027 PetscFunctionBegin; 2028 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2029 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2030 PetscFunctionReturn(0); 2031 } 2032 2033 #undef __FUNCT__ 2034 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2035 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2036 { 2037 PetscErrorCode ierr; 2038 IS iscol_local; 2039 PetscInt csize; 2040 2041 PetscFunctionBegin; 2042 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2043 if (call == MAT_REUSE_MATRIX) { 2044 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2045 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2046 } else { 2047 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2048 } 2049 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2050 if (call == MAT_INITIAL_MATRIX) { 2051 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2052 ierr = ISDestroy(iscol_local);CHKERRQ(ierr); 2053 } 2054 PetscFunctionReturn(0); 2055 } 2056 2057 #undef __FUNCT__ 2058 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2059 /* 2060 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2061 in local and then by concatenating the local matrices the end result. 2062 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2063 */ 2064 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2065 { 2066 PetscErrorCode ierr; 2067 PetscMPIInt rank,size; 2068 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2069 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2070 Mat *local,M,Mreuse; 2071 MatScalar *vwork,*aa; 2072 MPI_Comm comm = ((PetscObject)mat)->comm; 2073 Mat_SeqBAIJ *aij; 2074 2075 2076 PetscFunctionBegin; 2077 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2078 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2079 2080 if (call == MAT_REUSE_MATRIX) { 2081 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2082 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2083 local = &Mreuse; 2084 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2085 } else { 2086 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2087 Mreuse = *local; 2088 ierr = PetscFree(local);CHKERRQ(ierr); 2089 } 2090 2091 /* 2092 m - number of local rows 2093 n - number of columns (same on all processors) 2094 rstart - first row in new global matrix generated 2095 */ 2096 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2097 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2098 m = m/bs; 2099 n = n/bs; 2100 2101 if (call == MAT_INITIAL_MATRIX) { 2102 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2103 ii = aij->i; 2104 jj = aij->j; 2105 2106 /* 2107 Determine the number of non-zeros in the diagonal and off-diagonal 2108 portions of the matrix in order to do correct preallocation 2109 */ 2110 2111 /* first get start and end of "diagonal" columns */ 2112 if (csize == PETSC_DECIDE) { 2113 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2114 if (mglobal == n*bs) { /* square matrix */ 2115 nlocal = m; 2116 } else { 2117 nlocal = n/size + ((n % size) > rank); 2118 } 2119 } else { 2120 nlocal = csize/bs; 2121 } 2122 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2123 rstart = rend - nlocal; 2124 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2125 2126 /* next, compute all the lengths */ 2127 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2128 olens = dlens + m; 2129 for (i=0; i<m; i++) { 2130 jend = ii[i+1] - ii[i]; 2131 olen = 0; 2132 dlen = 0; 2133 for (j=0; j<jend; j++) { 2134 if (*jj < rstart || *jj >= rend) olen++; 2135 else dlen++; 2136 jj++; 2137 } 2138 olens[i] = olen; 2139 dlens[i] = dlen; 2140 } 2141 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2142 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2143 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2144 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2145 ierr = PetscFree(dlens);CHKERRQ(ierr); 2146 } else { 2147 PetscInt ml,nl; 2148 2149 M = *newmat; 2150 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2151 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2152 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2153 /* 2154 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2155 rather than the slower MatSetValues(). 2156 */ 2157 M->was_assembled = PETSC_TRUE; 2158 M->assembled = PETSC_FALSE; 2159 } 2160 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2161 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2162 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2163 ii = aij->i; 2164 jj = aij->j; 2165 aa = aij->a; 2166 for (i=0; i<m; i++) { 2167 row = rstart/bs + i; 2168 nz = ii[i+1] - ii[i]; 2169 cwork = jj; jj += nz; 2170 vwork = aa; aa += nz; 2171 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2172 } 2173 2174 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2175 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2176 *newmat = M; 2177 2178 /* save submatrix used in processor for next request */ 2179 if (call == MAT_INITIAL_MATRIX) { 2180 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2181 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2182 } 2183 2184 PetscFunctionReturn(0); 2185 } 2186 2187 #undef __FUNCT__ 2188 #define __FUNCT__ "MatPermute_MPIBAIJ" 2189 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2190 { 2191 MPI_Comm comm,pcomm; 2192 PetscInt first,local_size,nrows; 2193 const PetscInt *rows; 2194 PetscMPIInt size; 2195 IS crowp,growp,irowp,lrowp,lcolp,icolp; 2196 PetscErrorCode ierr; 2197 2198 PetscFunctionBegin; 2199 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2200 /* make a collective version of 'rowp' */ 2201 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2202 if (pcomm==comm) { 2203 crowp = rowp; 2204 } else { 2205 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2206 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2207 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2208 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2209 } 2210 /* collect the global row permutation and invert it */ 2211 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2212 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2213 if (pcomm!=comm) { 2214 ierr = ISDestroy(crowp);CHKERRQ(ierr); 2215 } 2216 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2217 /* get the local target indices */ 2218 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 2219 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr); 2220 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2221 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 2222 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2223 ierr = ISDestroy(irowp);CHKERRQ(ierr); 2224 /* the column permutation is so much easier; 2225 make a local version of 'colp' and invert it */ 2226 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2227 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2228 if (size==1) { 2229 lcolp = colp; 2230 } else { 2231 ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr); 2232 ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr); 2233 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr); 2234 } 2235 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2236 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 2237 ierr = ISSetPermutation(icolp);CHKERRQ(ierr); 2238 if (size>1) { 2239 ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr); 2240 ierr = ISDestroy(lcolp);CHKERRQ(ierr); 2241 } 2242 /* now we just get the submatrix */ 2243 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2244 /* clean up */ 2245 ierr = ISDestroy(lrowp);CHKERRQ(ierr); 2246 ierr = ISDestroy(icolp);CHKERRQ(ierr); 2247 PetscFunctionReturn(0); 2248 } 2249 2250 #undef __FUNCT__ 2251 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2252 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2253 { 2254 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2255 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2256 2257 PetscFunctionBegin; 2258 if (nghosts) { *nghosts = B->nbs;} 2259 if (ghosts) {*ghosts = baij->garray;} 2260 PetscFunctionReturn(0); 2261 } 2262 2263 extern PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat); 2264 2265 #undef __FUNCT__ 2266 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2267 /* 2268 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2269 */ 2270 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2271 { 2272 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2273 PetscErrorCode ierr; 2274 PetscMPIInt size,*ncolsonproc,*disp,nn; 2275 PetscInt bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col; 2276 const PetscInt *is; 2277 PetscInt nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj; 2278 PetscInt *rowhit,M,cstart,cend,colb; 2279 PetscInt *columnsforrow,l; 2280 IS *isa; 2281 PetscBool done,flg; 2282 ISLocalToGlobalMapping map = mat->cbmapping; 2283 PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype; 2284 2285 PetscFunctionBegin; 2286 if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2287 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2288 2289 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2290 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2291 M = mat->rmap->n/bs; 2292 cstart = mat->cmap->rstart/bs; 2293 cend = mat->cmap->rend/bs; 2294 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2295 c->N = mat->cmap->N/bs; 2296 c->m = mat->rmap->n/bs; 2297 c->rstart = mat->rmap->rstart/bs; 2298 2299 c->ncolors = nis; 2300 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2301 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2302 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2303 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2304 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2305 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2306 2307 /* Allow access to data structures of local part of matrix */ 2308 if (!baij->colmap) { 2309 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2310 } 2311 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2312 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2313 2314 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2315 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2316 2317 for (i=0; i<nis; i++) { 2318 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2319 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2320 c->ncolumns[i] = n; 2321 if (n) { 2322 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2323 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2324 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2325 } else { 2326 c->columns[i] = 0; 2327 } 2328 2329 if (ctype == IS_COLORING_GLOBAL){ 2330 /* Determine the total (parallel) number of columns of this color */ 2331 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2332 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2333 2334 nn = PetscMPIIntCast(n); 2335 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2336 nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];} 2337 if (!nctot) { 2338 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2339 } 2340 2341 disp[0] = 0; 2342 for (j=1; j<size; j++) { 2343 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2344 } 2345 2346 /* Get complete list of columns for color on each processor */ 2347 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2348 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2349 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2350 } else if (ctype == IS_COLORING_GHOSTED){ 2351 /* Determine local number of columns of this color on this process, including ghost points */ 2352 nctot = n; 2353 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2354 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2355 } else { 2356 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2357 } 2358 2359 /* 2360 Mark all rows affect by these columns 2361 */ 2362 /* Temporary option to allow for debugging/testing */ 2363 flg = PETSC_FALSE; 2364 ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr); 2365 if (!flg) {/*-----------------------------------------------------------------------------*/ 2366 /* crude, fast version */ 2367 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2368 /* loop over columns*/ 2369 for (j=0; j<nctot; j++) { 2370 if (ctype == IS_COLORING_GHOSTED) { 2371 col = ltog[cols[j]]; 2372 } else { 2373 col = cols[j]; 2374 } 2375 if (col >= cstart && col < cend) { 2376 /* column is in diagonal block of matrix */ 2377 rows = A_cj + A_ci[col-cstart]; 2378 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2379 } else { 2380 #if defined (PETSC_USE_CTABLE) 2381 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2382 colb --; 2383 #else 2384 colb = baij->colmap[col] - 1; 2385 #endif 2386 if (colb == -1) { 2387 m = 0; 2388 } else { 2389 colb = colb/bs; 2390 rows = B_cj + B_ci[colb]; 2391 m = B_ci[colb+1] - B_ci[colb]; 2392 } 2393 } 2394 /* loop over columns marking them in rowhit */ 2395 for (k=0; k<m; k++) { 2396 rowhit[*rows++] = col + 1; 2397 } 2398 } 2399 2400 /* count the number of hits */ 2401 nrows = 0; 2402 for (j=0; j<M; j++) { 2403 if (rowhit[j]) nrows++; 2404 } 2405 c->nrows[i] = nrows; 2406 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2407 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2408 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2409 nrows = 0; 2410 for (j=0; j<M; j++) { 2411 if (rowhit[j]) { 2412 c->rows[i][nrows] = j; 2413 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2414 nrows++; 2415 } 2416 } 2417 } else {/*-------------------------------------------------------------------------------*/ 2418 /* slow version, using rowhit as a linked list */ 2419 PetscInt currentcol,fm,mfm; 2420 rowhit[M] = M; 2421 nrows = 0; 2422 /* loop over columns*/ 2423 for (j=0; j<nctot; j++) { 2424 if (ctype == IS_COLORING_GHOSTED) { 2425 col = ltog[cols[j]]; 2426 } else { 2427 col = cols[j]; 2428 } 2429 if (col >= cstart && col < cend) { 2430 /* column is in diagonal block of matrix */ 2431 rows = A_cj + A_ci[col-cstart]; 2432 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2433 } else { 2434 #if defined (PETSC_USE_CTABLE) 2435 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2436 colb --; 2437 #else 2438 colb = baij->colmap[col] - 1; 2439 #endif 2440 if (colb == -1) { 2441 m = 0; 2442 } else { 2443 colb = colb/bs; 2444 rows = B_cj + B_ci[colb]; 2445 m = B_ci[colb+1] - B_ci[colb]; 2446 } 2447 } 2448 2449 /* loop over columns marking them in rowhit */ 2450 fm = M; /* fm points to first entry in linked list */ 2451 for (k=0; k<m; k++) { 2452 currentcol = *rows++; 2453 /* is it already in the list? */ 2454 do { 2455 mfm = fm; 2456 fm = rowhit[fm]; 2457 } while (fm < currentcol); 2458 /* not in list so add it */ 2459 if (fm != currentcol) { 2460 nrows++; 2461 columnsforrow[currentcol] = col; 2462 /* next three lines insert new entry into linked list */ 2463 rowhit[mfm] = currentcol; 2464 rowhit[currentcol] = fm; 2465 fm = currentcol; 2466 /* fm points to present position in list since we know the columns are sorted */ 2467 } else { 2468 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2469 } 2470 } 2471 } 2472 c->nrows[i] = nrows; 2473 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2474 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2475 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2476 /* now store the linked list of rows into c->rows[i] */ 2477 nrows = 0; 2478 fm = rowhit[M]; 2479 do { 2480 c->rows[i][nrows] = fm; 2481 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2482 fm = rowhit[fm]; 2483 } while (fm < M); 2484 } /* ---------------------------------------------------------------------------------------*/ 2485 ierr = PetscFree(cols);CHKERRQ(ierr); 2486 } 2487 2488 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2489 /* 2490 vscale will contain the "diagonal" on processor scalings followed by the off processor 2491 */ 2492 if (ctype == IS_COLORING_GLOBAL) { 2493 PetscInt *garray; 2494 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2495 for (i=0; i<baij->B->cmap->n/bs; i++) { 2496 for (j=0; j<bs; j++) { 2497 garray[i*bs+j] = bs*baij->garray[i]+j; 2498 } 2499 } 2500 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2501 ierr = PetscFree(garray);CHKERRQ(ierr); 2502 CHKMEMQ; 2503 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2504 for (k=0; k<c->ncolors; k++) { 2505 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2506 for (l=0; l<c->nrows[k]; l++) { 2507 col = c->columnsforrow[k][l]; 2508 if (col >= cstart && col < cend) { 2509 /* column is in diagonal block of matrix */ 2510 colb = col - cstart; 2511 } else { 2512 /* column is in "off-processor" part */ 2513 #if defined (PETSC_USE_CTABLE) 2514 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2515 colb --; 2516 #else 2517 colb = baij->colmap[col] - 1; 2518 #endif 2519 colb = colb/bs; 2520 colb += cend - cstart; 2521 } 2522 c->vscaleforrow[k][l] = colb; 2523 } 2524 } 2525 } else if (ctype == IS_COLORING_GHOSTED) { 2526 /* Get gtol mapping */ 2527 PetscInt N = mat->cmap->N, *gtol; 2528 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2529 for (i=0; i<N; i++) gtol[i] = -1; 2530 for (i=0; i<map->n; i++) gtol[ltog[i]] = i; 2531 2532 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2533 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2534 for (k=0; k<c->ncolors; k++) { 2535 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2536 for (l=0; l<c->nrows[k]; l++) { 2537 col = c->columnsforrow[k][l]; /* global column index */ 2538 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2539 } 2540 } 2541 ierr = PetscFree(gtol);CHKERRQ(ierr); 2542 } 2543 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2544 2545 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2546 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2547 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2548 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2549 CHKMEMQ; 2550 PetscFunctionReturn(0); 2551 } 2552 2553 #undef __FUNCT__ 2554 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ" 2555 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat) 2556 { 2557 Mat B; 2558 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2559 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2560 Mat_SeqAIJ *b; 2561 PetscErrorCode ierr; 2562 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2563 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2564 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2565 2566 PetscFunctionBegin; 2567 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2568 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2569 2570 /* ---------------------------------------------------------------- 2571 Tell every processor the number of nonzeros per row 2572 */ 2573 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2574 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2575 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2576 } 2577 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2578 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2579 displs = recvcounts + size; 2580 for (i=0; i<size; i++) { 2581 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2582 displs[i] = A->rmap->range[i]/bs; 2583 } 2584 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2585 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2586 #else 2587 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2588 #endif 2589 /* --------------------------------------------------------------- 2590 Create the sequential matrix of the same type as the local block diagonal 2591 */ 2592 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2593 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2594 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2595 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2596 b = (Mat_SeqAIJ *)B->data; 2597 2598 /*-------------------------------------------------------------------- 2599 Copy my part of matrix column indices over 2600 */ 2601 sendcount = ad->nz + bd->nz; 2602 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2603 a_jsendbuf = ad->j; 2604 b_jsendbuf = bd->j; 2605 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2606 cnt = 0; 2607 for (i=0; i<n; i++) { 2608 2609 /* put in lower diagonal portion */ 2610 m = bd->i[i+1] - bd->i[i]; 2611 while (m > 0) { 2612 /* is it above diagonal (in bd (compressed) numbering) */ 2613 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2614 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2615 m--; 2616 } 2617 2618 /* put in diagonal portion */ 2619 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2620 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2621 } 2622 2623 /* put in upper diagonal portion */ 2624 while (m-- > 0) { 2625 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2626 } 2627 } 2628 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2629 2630 /*-------------------------------------------------------------------- 2631 Gather all column indices to all processors 2632 */ 2633 for (i=0; i<size; i++) { 2634 recvcounts[i] = 0; 2635 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2636 recvcounts[i] += lens[j]; 2637 } 2638 } 2639 displs[0] = 0; 2640 for (i=1; i<size; i++) { 2641 displs[i] = displs[i-1] + recvcounts[i-1]; 2642 } 2643 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2644 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2645 #else 2646 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2647 #endif 2648 /*-------------------------------------------------------------------- 2649 Assemble the matrix into useable form (note numerical values not yet set) 2650 */ 2651 /* set the b->ilen (length of each row) values */ 2652 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2653 /* set the b->i indices */ 2654 b->i[0] = 0; 2655 for (i=1; i<=A->rmap->N/bs; i++) { 2656 b->i[i] = b->i[i-1] + lens[i-1]; 2657 } 2658 ierr = PetscFree(lens);CHKERRQ(ierr); 2659 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2660 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2661 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2662 2663 if (A->symmetric){ 2664 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2665 } else if (A->hermitian) { 2666 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2667 } else if (A->structurally_symmetric) { 2668 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2669 } 2670 *newmat = B; 2671 PetscFunctionReturn(0); 2672 } 2673 2674 #undef __FUNCT__ 2675 #define __FUNCT__ "MatSOR_MPIBAIJ" 2676 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2677 { 2678 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2679 PetscErrorCode ierr; 2680 Vec bb1 = 0; 2681 2682 PetscFunctionBegin; 2683 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2684 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2685 } 2686 2687 if (flag == SOR_APPLY_UPPER) { 2688 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2689 PetscFunctionReturn(0); 2690 } 2691 2692 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 2693 if (flag & SOR_ZERO_INITIAL_GUESS) { 2694 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2695 its--; 2696 } 2697 2698 while (its--) { 2699 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2700 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2701 2702 /* update rhs: bb1 = bb - B*x */ 2703 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2704 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2705 2706 /* local sweep */ 2707 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2708 } 2709 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 2710 if (flag & SOR_ZERO_INITIAL_GUESS) { 2711 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2712 its--; 2713 } 2714 while (its--) { 2715 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2716 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2717 2718 /* update rhs: bb1 = bb - B*x */ 2719 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2720 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2721 2722 /* local sweep */ 2723 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2724 } 2725 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 2726 if (flag & SOR_ZERO_INITIAL_GUESS) { 2727 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2728 its--; 2729 } 2730 while (its--) { 2731 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2732 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2733 2734 /* update rhs: bb1 = bb - B*x */ 2735 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2736 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2737 2738 /* local sweep */ 2739 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2740 } 2741 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2742 2743 if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);} 2744 PetscFunctionReturn(0); 2745 } 2746 2747 extern PetscErrorCode MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2748 2749 2750 /* -------------------------------------------------------------------*/ 2751 static struct _MatOps MatOps_Values = { 2752 MatSetValues_MPIBAIJ, 2753 MatGetRow_MPIBAIJ, 2754 MatRestoreRow_MPIBAIJ, 2755 MatMult_MPIBAIJ, 2756 /* 4*/ MatMultAdd_MPIBAIJ, 2757 MatMultTranspose_MPIBAIJ, 2758 MatMultTransposeAdd_MPIBAIJ, 2759 0, 2760 0, 2761 0, 2762 /*10*/ 0, 2763 0, 2764 0, 2765 MatSOR_MPIBAIJ, 2766 MatTranspose_MPIBAIJ, 2767 /*15*/ MatGetInfo_MPIBAIJ, 2768 MatEqual_MPIBAIJ, 2769 MatGetDiagonal_MPIBAIJ, 2770 MatDiagonalScale_MPIBAIJ, 2771 MatNorm_MPIBAIJ, 2772 /*20*/ MatAssemblyBegin_MPIBAIJ, 2773 MatAssemblyEnd_MPIBAIJ, 2774 MatSetOption_MPIBAIJ, 2775 MatZeroEntries_MPIBAIJ, 2776 /*24*/ MatZeroRows_MPIBAIJ, 2777 0, 2778 0, 2779 0, 2780 0, 2781 /*29*/ MatSetUpPreallocation_MPIBAIJ, 2782 0, 2783 0, 2784 0, 2785 0, 2786 /*34*/ MatDuplicate_MPIBAIJ, 2787 0, 2788 0, 2789 0, 2790 0, 2791 /*39*/ MatAXPY_MPIBAIJ, 2792 MatGetSubMatrices_MPIBAIJ, 2793 MatIncreaseOverlap_MPIBAIJ, 2794 MatGetValues_MPIBAIJ, 2795 MatCopy_MPIBAIJ, 2796 /*44*/ 0, 2797 MatScale_MPIBAIJ, 2798 0, 2799 0, 2800 0, 2801 /*49*/ MatSetBlockSize_MPIBAIJ, 2802 0, 2803 0, 2804 0, 2805 0, 2806 /*54*/ MatFDColoringCreate_MPIBAIJ, 2807 0, 2808 MatSetUnfactored_MPIBAIJ, 2809 MatPermute_MPIBAIJ, 2810 MatSetValuesBlocked_MPIBAIJ, 2811 /*59*/ MatGetSubMatrix_MPIBAIJ, 2812 MatDestroy_MPIBAIJ, 2813 MatView_MPIBAIJ, 2814 0, 2815 0, 2816 /*64*/ 0, 2817 0, 2818 0, 2819 0, 2820 0, 2821 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2822 0, 2823 0, 2824 0, 2825 0, 2826 /*74*/ 0, 2827 MatFDColoringApply_BAIJ, 2828 0, 2829 0, 2830 0, 2831 /*79*/ 0, 2832 0, 2833 0, 2834 0, 2835 MatLoad_MPIBAIJ, 2836 /*84*/ 0, 2837 0, 2838 0, 2839 0, 2840 0, 2841 /*89*/ 0, 2842 0, 2843 0, 2844 0, 2845 0, 2846 /*94*/ 0, 2847 0, 2848 0, 2849 0, 2850 0, 2851 /*99*/ 0, 2852 0, 2853 0, 2854 0, 2855 0, 2856 /*104*/0, 2857 MatRealPart_MPIBAIJ, 2858 MatImaginaryPart_MPIBAIJ, 2859 0, 2860 0, 2861 /*109*/0, 2862 0, 2863 0, 2864 0, 2865 0, 2866 /*114*/MatGetSeqNonzerostructure_MPIBAIJ, 2867 0, 2868 MatGetGhosts_MPIBAIJ, 2869 0, 2870 0, 2871 /*119*/0, 2872 0, 2873 0, 2874 0 2875 }; 2876 2877 EXTERN_C_BEGIN 2878 #undef __FUNCT__ 2879 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2880 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscBool *iscopy,MatReuse reuse,Mat *a) 2881 { 2882 PetscFunctionBegin; 2883 *a = ((Mat_MPIBAIJ *)A->data)->A; 2884 *iscopy = PETSC_FALSE; 2885 PetscFunctionReturn(0); 2886 } 2887 EXTERN_C_END 2888 2889 EXTERN_C_BEGIN 2890 extern PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2891 EXTERN_C_END 2892 2893 EXTERN_C_BEGIN 2894 #undef __FUNCT__ 2895 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2896 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2897 { 2898 PetscInt m,rstart,cstart,cend; 2899 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2900 const PetscInt *JJ=0; 2901 PetscScalar *values=0; 2902 PetscErrorCode ierr; 2903 2904 PetscFunctionBegin; 2905 2906 if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2907 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2908 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2909 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2910 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2911 m = B->rmap->n/bs; 2912 rstart = B->rmap->rstart/bs; 2913 cstart = B->cmap->rstart/bs; 2914 cend = B->cmap->rend/bs; 2915 2916 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2917 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2918 for (i=0; i<m; i++) { 2919 nz = ii[i+1] - ii[i]; 2920 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2921 nz_max = PetscMax(nz_max,nz); 2922 JJ = jj + ii[i]; 2923 for (j=0; j<nz; j++) { 2924 if (*JJ >= cstart) break; 2925 JJ++; 2926 } 2927 d = 0; 2928 for (; j<nz; j++) { 2929 if (*JJ++ >= cend) break; 2930 d++; 2931 } 2932 d_nnz[i] = d; 2933 o_nnz[i] = nz - d; 2934 } 2935 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2936 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2937 2938 values = (PetscScalar*)V; 2939 if (!values) { 2940 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2941 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2942 } 2943 for (i=0; i<m; i++) { 2944 PetscInt row = i + rstart; 2945 PetscInt ncols = ii[i+1] - ii[i]; 2946 const PetscInt *icols = jj + ii[i]; 2947 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2948 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2949 } 2950 2951 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2952 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2953 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2954 2955 PetscFunctionReturn(0); 2956 } 2957 EXTERN_C_END 2958 2959 #undef __FUNCT__ 2960 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2961 /*@C 2962 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2963 (the default parallel PETSc format). 2964 2965 Collective on MPI_Comm 2966 2967 Input Parameters: 2968 + A - the matrix 2969 . bs - the block size 2970 . i - the indices into j for the start of each local row (starts with zero) 2971 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2972 - v - optional values in the matrix 2973 2974 Level: developer 2975 2976 .keywords: matrix, aij, compressed row, sparse, parallel 2977 2978 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2979 @*/ 2980 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2981 { 2982 PetscErrorCode ierr; 2983 2984 PetscFunctionBegin; 2985 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2986 PetscFunctionReturn(0); 2987 } 2988 2989 EXTERN_C_BEGIN 2990 #undef __FUNCT__ 2991 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2992 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 2993 { 2994 Mat_MPIBAIJ *b; 2995 PetscErrorCode ierr; 2996 PetscInt i, newbs = PetscAbs(bs); 2997 2998 PetscFunctionBegin; 2999 if (bs < 0) { 3000 ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr); 3001 ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 3002 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3003 bs = PetscAbs(bs); 3004 } 3005 if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz"); 3006 bs = newbs; 3007 3008 3009 if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive"); 3010 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3011 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3012 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3013 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3014 3015 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3016 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3017 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3018 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3019 3020 if (d_nnz) { 3021 for (i=0; i<B->rmap->n/bs; i++) { 3022 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 3023 } 3024 } 3025 if (o_nnz) { 3026 for (i=0; i<B->rmap->n/bs; i++) { 3027 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 3028 } 3029 } 3030 3031 b = (Mat_MPIBAIJ*)B->data; 3032 b->bs2 = bs*bs; 3033 b->mbs = B->rmap->n/bs; 3034 b->nbs = B->cmap->n/bs; 3035 b->Mbs = B->rmap->N/bs; 3036 b->Nbs = B->cmap->N/bs; 3037 3038 for (i=0; i<=b->size; i++) { 3039 b->rangebs[i] = B->rmap->range[i]/bs; 3040 } 3041 b->rstartbs = B->rmap->rstart/bs; 3042 b->rendbs = B->rmap->rend/bs; 3043 b->cstartbs = B->cmap->rstart/bs; 3044 b->cendbs = B->cmap->rend/bs; 3045 3046 if (!B->preallocated) { 3047 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3048 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3049 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 3050 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3051 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3052 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3053 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 3054 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3055 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 3056 } 3057 3058 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3059 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 3060 B->preallocated = PETSC_TRUE; 3061 PetscFunctionReturn(0); 3062 } 3063 EXTERN_C_END 3064 3065 EXTERN_C_BEGIN 3066 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 3067 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 3068 EXTERN_C_END 3069 3070 3071 EXTERN_C_BEGIN 3072 #undef __FUNCT__ 3073 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 3074 PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj) 3075 { 3076 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 3077 PetscErrorCode ierr; 3078 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 3079 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 3080 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 3081 3082 PetscFunctionBegin; 3083 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 3084 ii[0] = 0; 3085 CHKMEMQ; 3086 for (i=0; i<M; i++) { 3087 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 3088 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 3089 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 3090 /* remove one from count of matrix has diagonal */ 3091 for (j=id[i]; j<id[i+1]; j++) { 3092 if (jd[j] == i) {ii[i+1]--;break;} 3093 } 3094 CHKMEMQ; 3095 } 3096 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3097 cnt = 0; 3098 for (i=0; i<M; i++) { 3099 for (j=io[i]; j<io[i+1]; j++) { 3100 if (garray[jo[j]] > rstart) break; 3101 jj[cnt++] = garray[jo[j]]; 3102 CHKMEMQ; 3103 } 3104 for (k=id[i]; k<id[i+1]; k++) { 3105 if (jd[k] != i) { 3106 jj[cnt++] = rstart + jd[k]; 3107 CHKMEMQ; 3108 } 3109 } 3110 for (;j<io[i+1]; j++) { 3111 jj[cnt++] = garray[jo[j]]; 3112 CHKMEMQ; 3113 } 3114 } 3115 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr); 3116 PetscFunctionReturn(0); 3117 } 3118 EXTERN_C_END 3119 3120 #include "../src/mat/impls/aij/mpi/mpiaij.h" 3121 EXTERN_C_BEGIN 3122 PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*); 3123 EXTERN_C_END 3124 3125 EXTERN_C_BEGIN 3126 #undef __FUNCT__ 3127 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 3128 PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat) 3129 { 3130 PetscErrorCode ierr; 3131 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3132 Mat B; 3133 Mat_MPIAIJ *b; 3134 3135 PetscFunctionBegin; 3136 if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled"); 3137 3138 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 3139 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3140 ierr = MatSetType(B,newtype);CHKERRQ(ierr); 3141 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 3142 b = (Mat_MPIAIJ*) B->data; 3143 3144 ierr = MatDestroy(b->A);CHKERRQ(ierr); 3145 ierr = MatDestroy(b->B);CHKERRQ(ierr); 3146 ierr = DisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 3147 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 3148 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 3149 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3150 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3151 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3152 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3153 if (reuse == MAT_REUSE_MATRIX) { 3154 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3155 } else { 3156 *newmat = B; 3157 } 3158 PetscFunctionReturn(0); 3159 } 3160 EXTERN_C_END 3161 3162 EXTERN_C_BEGIN 3163 #if defined(PETSC_HAVE_MUMPS) 3164 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3165 #endif 3166 EXTERN_C_END 3167 3168 /*MC 3169 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3170 3171 Options Database Keys: 3172 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3173 . -mat_block_size <bs> - set the blocksize used to store the matrix 3174 - -mat_use_hash_table <fact> 3175 3176 Level: beginner 3177 3178 .seealso: MatCreateMPIBAIJ 3179 M*/ 3180 3181 EXTERN_C_BEGIN 3182 #undef __FUNCT__ 3183 #define __FUNCT__ "MatCreate_MPIBAIJ" 3184 PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3185 { 3186 Mat_MPIBAIJ *b; 3187 PetscErrorCode ierr; 3188 PetscBool flg; 3189 3190 PetscFunctionBegin; 3191 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 3192 B->data = (void*)b; 3193 3194 3195 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3196 B->assembled = PETSC_FALSE; 3197 3198 B->insertmode = NOT_SET_VALUES; 3199 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 3200 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 3201 3202 /* build local table of row and column ownerships */ 3203 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 3204 3205 /* build cache for off array entries formed */ 3206 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 3207 b->donotstash = PETSC_FALSE; 3208 b->colmap = PETSC_NULL; 3209 b->garray = PETSC_NULL; 3210 b->roworiented = PETSC_TRUE; 3211 3212 /* stuff used in block assembly */ 3213 b->barray = 0; 3214 3215 /* stuff used for matrix vector multiply */ 3216 b->lvec = 0; 3217 b->Mvctx = 0; 3218 3219 /* stuff for MatGetRow() */ 3220 b->rowindices = 0; 3221 b->rowvalues = 0; 3222 b->getrowactive = PETSC_FALSE; 3223 3224 /* hash table stuff */ 3225 b->ht = 0; 3226 b->hd = 0; 3227 b->ht_size = 0; 3228 b->ht_flag = PETSC_FALSE; 3229 b->ht_fact = 0; 3230 b->ht_total_ct = 0; 3231 b->ht_insert_ct = 0; 3232 3233 ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3234 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 3235 if (flg) { 3236 PetscReal fact = 1.39; 3237 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3238 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr); 3239 if (fact <= 1.0) fact = 1.39; 3240 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3241 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3242 } 3243 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3244 3245 #if defined(PETSC_HAVE_MUMPS) 3246 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3247 #endif 3248 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3249 "MatConvert_MPIBAIJ_MPIAdj", 3250 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3251 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C", 3252 "MatConvert_MPIBAIJ_MPIAIJ", 3253 MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3254 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3255 "MatStoreValues_MPIBAIJ", 3256 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3257 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3258 "MatRetrieveValues_MPIBAIJ", 3259 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3260 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3261 "MatGetDiagonalBlock_MPIBAIJ", 3262 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3263 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3264 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3265 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3266 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3267 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3268 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3269 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3270 "MatDiagonalScaleLocal_MPIBAIJ", 3271 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3272 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3273 "MatSetHashTableFactor_MPIBAIJ", 3274 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3275 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3276 PetscFunctionReturn(0); 3277 } 3278 EXTERN_C_END 3279 3280 /*MC 3281 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3282 3283 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3284 and MATMPIBAIJ otherwise. 3285 3286 Options Database Keys: 3287 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3288 3289 Level: beginner 3290 3291 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3292 M*/ 3293 3294 EXTERN_C_BEGIN 3295 #undef __FUNCT__ 3296 #define __FUNCT__ "MatCreate_BAIJ" 3297 PetscErrorCode MatCreate_BAIJ(Mat A) 3298 { 3299 PetscErrorCode ierr; 3300 PetscMPIInt size; 3301 3302 PetscFunctionBegin; 3303 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 3304 if (size == 1) { 3305 ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr); 3306 } else { 3307 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 3308 } 3309 PetscFunctionReturn(0); 3310 } 3311 EXTERN_C_END 3312 3313 #undef __FUNCT__ 3314 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3315 /*@C 3316 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3317 (block compressed row). For good matrix assembly performance 3318 the user should preallocate the matrix storage by setting the parameters 3319 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3320 performance can be increased by more than a factor of 50. 3321 3322 Collective on Mat 3323 3324 Input Parameters: 3325 + A - the matrix 3326 . bs - size of blockk 3327 . d_nz - number of block nonzeros per block row in diagonal portion of local 3328 submatrix (same for all local rows) 3329 . d_nnz - array containing the number of block nonzeros in the various block rows 3330 of the in diagonal portion of the local (possibly different for each block 3331 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3332 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3333 submatrix (same for all local rows). 3334 - o_nnz - array containing the number of nonzeros in the various block rows of the 3335 off-diagonal portion of the local submatrix (possibly different for 3336 each block row) or PETSC_NULL. 3337 3338 If the *_nnz parameter is given then the *_nz parameter is ignored 3339 3340 Options Database Keys: 3341 + -mat_block_size - size of the blocks to use 3342 - -mat_use_hash_table <fact> 3343 3344 Notes: 3345 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3346 than it must be used on all processors that share the object for that argument. 3347 3348 Storage Information: 3349 For a square global matrix we define each processor's diagonal portion 3350 to be its local rows and the corresponding columns (a square submatrix); 3351 each processor's off-diagonal portion encompasses the remainder of the 3352 local matrix (a rectangular submatrix). 3353 3354 The user can specify preallocated storage for the diagonal part of 3355 the local submatrix with either d_nz or d_nnz (not both). Set 3356 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3357 memory allocation. Likewise, specify preallocated storage for the 3358 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3359 3360 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3361 the figure below we depict these three local rows and all columns (0-11). 3362 3363 .vb 3364 0 1 2 3 4 5 6 7 8 9 10 11 3365 ------------------- 3366 row 3 | o o o d d d o o o o o o 3367 row 4 | o o o d d d o o o o o o 3368 row 5 | o o o d d d o o o o o o 3369 ------------------- 3370 .ve 3371 3372 Thus, any entries in the d locations are stored in the d (diagonal) 3373 submatrix, and any entries in the o locations are stored in the 3374 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3375 stored simply in the MATSEQBAIJ format for compressed row storage. 3376 3377 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3378 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3379 In general, for PDE problems in which most nonzeros are near the diagonal, 3380 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3381 or you will get TERRIBLE performance; see the users' manual chapter on 3382 matrices. 3383 3384 You can call MatGetInfo() to get information on how effective the preallocation was; 3385 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3386 You can also run with the option -info and look for messages with the string 3387 malloc in them to see if additional memory allocation was needed. 3388 3389 Level: intermediate 3390 3391 .keywords: matrix, block, aij, compressed row, sparse, parallel 3392 3393 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR() 3394 @*/ 3395 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3396 { 3397 PetscErrorCode ierr; 3398 3399 PetscFunctionBegin; 3400 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3401 PetscFunctionReturn(0); 3402 } 3403 3404 #undef __FUNCT__ 3405 #define __FUNCT__ "MatCreateMPIBAIJ" 3406 /*@C 3407 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 3408 (block compressed row). For good matrix assembly performance 3409 the user should preallocate the matrix storage by setting the parameters 3410 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3411 performance can be increased by more than a factor of 50. 3412 3413 Collective on MPI_Comm 3414 3415 Input Parameters: 3416 + comm - MPI communicator 3417 . bs - size of blockk 3418 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3419 This value should be the same as the local size used in creating the 3420 y vector for the matrix-vector product y = Ax. 3421 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3422 This value should be the same as the local size used in creating the 3423 x vector for the matrix-vector product y = Ax. 3424 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3425 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3426 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3427 submatrix (same for all local rows) 3428 . d_nnz - array containing the number of nonzero blocks in the various block rows 3429 of the in diagonal portion of the local (possibly different for each block 3430 row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 3431 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3432 submatrix (same for all local rows). 3433 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3434 off-diagonal portion of the local submatrix (possibly different for 3435 each block row) or PETSC_NULL. 3436 3437 Output Parameter: 3438 . A - the matrix 3439 3440 Options Database Keys: 3441 + -mat_block_size - size of the blocks to use 3442 - -mat_use_hash_table <fact> 3443 3444 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3445 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3446 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3447 3448 Notes: 3449 If the *_nnz parameter is given then the *_nz parameter is ignored 3450 3451 A nonzero block is any block that as 1 or more nonzeros in it 3452 3453 The user MUST specify either the local or global matrix dimensions 3454 (possibly both). 3455 3456 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3457 than it must be used on all processors that share the object for that argument. 3458 3459 Storage Information: 3460 For a square global matrix we define each processor's diagonal portion 3461 to be its local rows and the corresponding columns (a square submatrix); 3462 each processor's off-diagonal portion encompasses the remainder of the 3463 local matrix (a rectangular submatrix). 3464 3465 The user can specify preallocated storage for the diagonal part of 3466 the local submatrix with either d_nz or d_nnz (not both). Set 3467 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 3468 memory allocation. Likewise, specify preallocated storage for the 3469 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3470 3471 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3472 the figure below we depict these three local rows and all columns (0-11). 3473 3474 .vb 3475 0 1 2 3 4 5 6 7 8 9 10 11 3476 ------------------- 3477 row 3 | o o o d d d o o o o o o 3478 row 4 | o o o d d d o o o o o o 3479 row 5 | o o o d d d o o o o o o 3480 ------------------- 3481 .ve 3482 3483 Thus, any entries in the d locations are stored in the d (diagonal) 3484 submatrix, and any entries in the o locations are stored in the 3485 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3486 stored simply in the MATSEQBAIJ format for compressed row storage. 3487 3488 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3489 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3490 In general, for PDE problems in which most nonzeros are near the diagonal, 3491 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3492 or you will get TERRIBLE performance; see the users' manual chapter on 3493 matrices. 3494 3495 Level: intermediate 3496 3497 .keywords: matrix, block, aij, compressed row, sparse, parallel 3498 3499 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3500 @*/ 3501 PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3502 { 3503 PetscErrorCode ierr; 3504 PetscMPIInt size; 3505 3506 PetscFunctionBegin; 3507 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3508 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3509 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3510 if (size > 1) { 3511 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3512 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3513 } else { 3514 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3515 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3516 } 3517 PetscFunctionReturn(0); 3518 } 3519 3520 #undef __FUNCT__ 3521 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3522 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3523 { 3524 Mat mat; 3525 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3526 PetscErrorCode ierr; 3527 PetscInt len=0; 3528 3529 PetscFunctionBegin; 3530 *newmat = 0; 3531 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3532 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3533 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3534 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3535 3536 mat->factortype = matin->factortype; 3537 mat->preallocated = PETSC_TRUE; 3538 mat->assembled = PETSC_TRUE; 3539 mat->insertmode = NOT_SET_VALUES; 3540 3541 a = (Mat_MPIBAIJ*)mat->data; 3542 mat->rmap->bs = matin->rmap->bs; 3543 a->bs2 = oldmat->bs2; 3544 a->mbs = oldmat->mbs; 3545 a->nbs = oldmat->nbs; 3546 a->Mbs = oldmat->Mbs; 3547 a->Nbs = oldmat->Nbs; 3548 3549 ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3550 ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3551 3552 a->size = oldmat->size; 3553 a->rank = oldmat->rank; 3554 a->donotstash = oldmat->donotstash; 3555 a->roworiented = oldmat->roworiented; 3556 a->rowindices = 0; 3557 a->rowvalues = 0; 3558 a->getrowactive = PETSC_FALSE; 3559 a->barray = 0; 3560 a->rstartbs = oldmat->rstartbs; 3561 a->rendbs = oldmat->rendbs; 3562 a->cstartbs = oldmat->cstartbs; 3563 a->cendbs = oldmat->cendbs; 3564 3565 /* hash table stuff */ 3566 a->ht = 0; 3567 a->hd = 0; 3568 a->ht_size = 0; 3569 a->ht_flag = oldmat->ht_flag; 3570 a->ht_fact = oldmat->ht_fact; 3571 a->ht_total_ct = 0; 3572 a->ht_insert_ct = 0; 3573 3574 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3575 if (oldmat->colmap) { 3576 #if defined (PETSC_USE_CTABLE) 3577 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3578 #else 3579 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3580 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3581 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3582 #endif 3583 } else a->colmap = 0; 3584 3585 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3586 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3587 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3588 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3589 } else a->garray = 0; 3590 3591 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3592 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3593 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3594 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3595 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3596 3597 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3598 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3599 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3600 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3601 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3602 *newmat = mat; 3603 3604 PetscFunctionReturn(0); 3605 } 3606 3607 #undef __FUNCT__ 3608 #define __FUNCT__ "MatLoad_MPIBAIJ" 3609 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3610 { 3611 PetscErrorCode ierr; 3612 int fd; 3613 PetscInt i,nz,j,rstart,rend; 3614 PetscScalar *vals,*buf; 3615 MPI_Comm comm = ((PetscObject)viewer)->comm; 3616 MPI_Status status; 3617 PetscMPIInt rank,size,maxnz; 3618 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3619 PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL; 3620 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3621 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3622 PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount; 3623 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3624 3625 PetscFunctionBegin; 3626 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3627 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3628 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3629 3630 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3631 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3632 if (!rank) { 3633 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3634 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3635 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3636 } 3637 3638 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3639 3640 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3641 M = header[1]; N = header[2]; 3642 3643 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3644 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3645 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3646 3647 /* If global sizes are set, check if they are consistent with that given in the file */ 3648 if (sizesset) { 3649 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3650 } 3651 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3652 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3653 3654 if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices"); 3655 3656 /* 3657 This code adds extra rows to make sure the number of rows is 3658 divisible by the blocksize 3659 */ 3660 Mbs = M/bs; 3661 extra_rows = bs - M + bs*Mbs; 3662 if (extra_rows == bs) extra_rows = 0; 3663 else Mbs++; 3664 if (extra_rows && !rank) { 3665 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3666 } 3667 3668 /* determine ownership of all rows */ 3669 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3670 mbs = Mbs/size + ((Mbs % size) > rank); 3671 m = mbs*bs; 3672 } else { /* User set */ 3673 m = newmat->rmap->n; 3674 mbs = m/bs; 3675 } 3676 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3677 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3678 3679 /* process 0 needs enough room for process with most rows */ 3680 if (!rank) { 3681 mmax = rowners[1]; 3682 for (i=2; i<size; i++) { 3683 mmax = PetscMax(mmax,rowners[i]); 3684 } 3685 mmax*=bs; 3686 } else mmax = m; 3687 3688 rowners[0] = 0; 3689 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3690 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3691 rstart = rowners[rank]; 3692 rend = rowners[rank+1]; 3693 3694 /* distribute row lengths to all processors */ 3695 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3696 if (!rank) { 3697 mend = m; 3698 if (size == 1) mend = mend - extra_rows; 3699 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3700 for (j=mend; j<m; j++) locrowlens[j] = 1; 3701 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3702 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3703 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3704 for (j=0; j<m; j++) { 3705 procsnz[0] += locrowlens[j]; 3706 } 3707 for (i=1; i<size; i++) { 3708 mend = browners[i+1] - browners[i]; 3709 if (i == size-1) mend = mend - extra_rows; 3710 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3711 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3712 /* calculate the number of nonzeros on each processor */ 3713 for (j=0; j<browners[i+1]-browners[i]; j++) { 3714 procsnz[i] += rowlengths[j]; 3715 } 3716 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3717 } 3718 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3719 } else { 3720 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3721 } 3722 3723 if (!rank) { 3724 /* determine max buffer needed and allocate it */ 3725 maxnz = procsnz[0]; 3726 for (i=1; i<size; i++) { 3727 maxnz = PetscMax(maxnz,procsnz[i]); 3728 } 3729 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3730 3731 /* read in my part of the matrix column indices */ 3732 nz = procsnz[0]; 3733 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3734 mycols = ibuf; 3735 if (size == 1) nz -= extra_rows; 3736 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3737 if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 3738 3739 /* read in every ones (except the last) and ship off */ 3740 for (i=1; i<size-1; i++) { 3741 nz = procsnz[i]; 3742 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3743 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3744 } 3745 /* read in the stuff for the last proc */ 3746 if (size != 1) { 3747 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3748 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3749 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3750 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3751 } 3752 ierr = PetscFree(cols);CHKERRQ(ierr); 3753 } else { 3754 /* determine buffer space needed for message */ 3755 nz = 0; 3756 for (i=0; i<m; i++) { 3757 nz += locrowlens[i]; 3758 } 3759 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3760 mycols = ibuf; 3761 /* receive message of column indices*/ 3762 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3763 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3764 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3765 } 3766 3767 /* loop over local rows, determining number of off diagonal entries */ 3768 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3769 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3770 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3771 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3772 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3773 rowcount = 0; nzcount = 0; 3774 for (i=0; i<mbs; i++) { 3775 dcount = 0; 3776 odcount = 0; 3777 for (j=0; j<bs; j++) { 3778 kmax = locrowlens[rowcount]; 3779 for (k=0; k<kmax; k++) { 3780 tmp = mycols[nzcount++]/bs; 3781 if (!mask[tmp]) { 3782 mask[tmp] = 1; 3783 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3784 else masked1[dcount++] = tmp; 3785 } 3786 } 3787 rowcount++; 3788 } 3789 3790 dlens[i] = dcount; 3791 odlens[i] = odcount; 3792 3793 /* zero out the mask elements we set */ 3794 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3795 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3796 } 3797 3798 3799 if (!sizesset) { 3800 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3801 } 3802 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3803 3804 if (!rank) { 3805 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3806 /* read in my part of the matrix numerical values */ 3807 nz = procsnz[0]; 3808 vals = buf; 3809 mycols = ibuf; 3810 if (size == 1) nz -= extra_rows; 3811 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3812 if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 3813 3814 /* insert into matrix */ 3815 jj = rstart*bs; 3816 for (i=0; i<m; i++) { 3817 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3818 mycols += locrowlens[i]; 3819 vals += locrowlens[i]; 3820 jj++; 3821 } 3822 /* read in other processors (except the last one) and ship out */ 3823 for (i=1; i<size-1; i++) { 3824 nz = procsnz[i]; 3825 vals = buf; 3826 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3827 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3828 } 3829 /* the last proc */ 3830 if (size != 1){ 3831 nz = procsnz[i] - extra_rows; 3832 vals = buf; 3833 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3834 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3835 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3836 } 3837 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3838 } else { 3839 /* receive numeric values */ 3840 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3841 3842 /* receive message of values*/ 3843 vals = buf; 3844 mycols = ibuf; 3845 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 3846 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 3847 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3848 3849 /* insert into matrix */ 3850 jj = rstart*bs; 3851 for (i=0; i<m; i++) { 3852 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3853 mycols += locrowlens[i]; 3854 vals += locrowlens[i]; 3855 jj++; 3856 } 3857 } 3858 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3859 ierr = PetscFree(buf);CHKERRQ(ierr); 3860 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3861 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3862 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3863 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3864 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3865 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3866 3867 PetscFunctionReturn(0); 3868 } 3869 3870 #undef __FUNCT__ 3871 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3872 /*@ 3873 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3874 3875 Input Parameters: 3876 . mat - the matrix 3877 . fact - factor 3878 3879 Not Collective, each process can use a different factor 3880 3881 Level: advanced 3882 3883 Notes: 3884 This can also be set by the command line option: -mat_use_hash_table <fact> 3885 3886 .keywords: matrix, hashtable, factor, HT 3887 3888 .seealso: MatSetOption() 3889 @*/ 3890 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3891 { 3892 PetscErrorCode ierr; 3893 3894 PetscFunctionBegin; 3895 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3896 PetscFunctionReturn(0); 3897 } 3898 3899 EXTERN_C_BEGIN 3900 #undef __FUNCT__ 3901 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3902 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3903 { 3904 Mat_MPIBAIJ *baij; 3905 3906 PetscFunctionBegin; 3907 baij = (Mat_MPIBAIJ*)mat->data; 3908 baij->ht_fact = fact; 3909 PetscFunctionReturn(0); 3910 } 3911 EXTERN_C_END 3912 3913 #undef __FUNCT__ 3914 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3915 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 3916 { 3917 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3918 PetscFunctionBegin; 3919 *Ad = a->A; 3920 *Ao = a->B; 3921 *colmap = a->garray; 3922 PetscFunctionReturn(0); 3923 } 3924 3925 /* 3926 Special version for direct calls from Fortran (to eliminate two function call overheads 3927 */ 3928 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3929 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3930 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3931 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3932 #endif 3933 3934 #undef __FUNCT__ 3935 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3936 /*@C 3937 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3938 3939 Collective on Mat 3940 3941 Input Parameters: 3942 + mat - the matrix 3943 . min - number of input rows 3944 . im - input rows 3945 . nin - number of input columns 3946 . in - input columns 3947 . v - numerical values input 3948 - addvin - INSERT_VALUES or ADD_VALUES 3949 3950 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3951 3952 Level: advanced 3953 3954 .seealso: MatSetValuesBlocked() 3955 @*/ 3956 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3957 { 3958 /* convert input arguments to C version */ 3959 Mat mat = *matin; 3960 PetscInt m = *min, n = *nin; 3961 InsertMode addv = *addvin; 3962 3963 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3964 const MatScalar *value; 3965 MatScalar *barray=baij->barray; 3966 PetscBool roworiented = baij->roworiented; 3967 PetscErrorCode ierr; 3968 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3969 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3970 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3971 3972 PetscFunctionBegin; 3973 /* tasks normally handled by MatSetValuesBlocked() */ 3974 if (mat->insertmode == NOT_SET_VALUES) { 3975 mat->insertmode = addv; 3976 } 3977 #if defined(PETSC_USE_DEBUG) 3978 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3979 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3980 #endif 3981 if (mat->assembled) { 3982 mat->was_assembled = PETSC_TRUE; 3983 mat->assembled = PETSC_FALSE; 3984 } 3985 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3986 3987 3988 if(!barray) { 3989 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3990 baij->barray = barray; 3991 } 3992 3993 if (roworiented) { 3994 stepval = (n-1)*bs; 3995 } else { 3996 stepval = (m-1)*bs; 3997 } 3998 for (i=0; i<m; i++) { 3999 if (im[i] < 0) continue; 4000 #if defined(PETSC_USE_DEBUG) 4001 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 4002 #endif 4003 if (im[i] >= rstart && im[i] < rend) { 4004 row = im[i] - rstart; 4005 for (j=0; j<n; j++) { 4006 /* If NumCol = 1 then a copy is not required */ 4007 if ((roworiented) && (n == 1)) { 4008 barray = (MatScalar*)v + i*bs2; 4009 } else if((!roworiented) && (m == 1)) { 4010 barray = (MatScalar*)v + j*bs2; 4011 } else { /* Here a copy is required */ 4012 if (roworiented) { 4013 value = v + i*(stepval+bs)*bs + j*bs; 4014 } else { 4015 value = v + j*(stepval+bs)*bs + i*bs; 4016 } 4017 for (ii=0; ii<bs; ii++,value+=stepval) { 4018 for (jj=0; jj<bs; jj++) { 4019 *barray++ = *value++; 4020 } 4021 } 4022 barray -=bs2; 4023 } 4024 4025 if (in[j] >= cstart && in[j] < cend){ 4026 col = in[j] - cstart; 4027 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4028 } 4029 else if (in[j] < 0) continue; 4030 #if defined(PETSC_USE_DEBUG) 4031 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 4032 #endif 4033 else { 4034 if (mat->was_assembled) { 4035 if (!baij->colmap) { 4036 ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 4037 } 4038 4039 #if defined(PETSC_USE_DEBUG) 4040 #if defined (PETSC_USE_CTABLE) 4041 { PetscInt data; 4042 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 4043 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4044 } 4045 #else 4046 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4047 #endif 4048 #endif 4049 #if defined (PETSC_USE_CTABLE) 4050 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4051 col = (col - 1)/bs; 4052 #else 4053 col = (baij->colmap[in[j]] - 1)/bs; 4054 #endif 4055 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 4056 ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 4057 col = in[j]; 4058 } 4059 } 4060 else col = in[j]; 4061 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4062 } 4063 } 4064 } else { 4065 if (!baij->donotstash) { 4066 if (roworiented) { 4067 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4068 } else { 4069 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4070 } 4071 } 4072 } 4073 } 4074 4075 /* task normally handled by MatSetValuesBlocked() */ 4076 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4077 PetscFunctionReturn(0); 4078 } 4079 4080 #undef __FUNCT__ 4081 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 4082 /*@ 4083 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 4084 CSR format the local rows. 4085 4086 Collective on MPI_Comm 4087 4088 Input Parameters: 4089 + comm - MPI communicator 4090 . bs - the block size, only a block size of 1 is supported 4091 . m - number of local rows (Cannot be PETSC_DECIDE) 4092 . n - This value should be the same as the local size used in creating the 4093 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4094 calculated if N is given) For square matrices n is almost always m. 4095 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4096 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4097 . i - row indices 4098 . j - column indices 4099 - a - matrix values 4100 4101 Output Parameter: 4102 . mat - the matrix 4103 4104 Level: intermediate 4105 4106 Notes: 4107 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4108 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4109 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4110 4111 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4112 4113 .keywords: matrix, aij, compressed row, sparse, parallel 4114 4115 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4116 MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays() 4117 @*/ 4118 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4119 { 4120 PetscErrorCode ierr; 4121 4122 4123 PetscFunctionBegin; 4124 if (i[0]) { 4125 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4126 } 4127 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4128 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4129 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4130 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 4131 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 4132 PetscFunctionReturn(0); 4133 } 4134