1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 6 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat); 7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 17 { 18 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 19 PetscErrorCode ierr; 20 PetscInt i,*idxb = 0; 21 PetscScalar *va,*vb; 22 Vec vtmp; 23 24 PetscFunctionBegin; 25 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 26 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 27 if (idx) { 28 for (i=0; i<A->rmap->n; i++) { 29 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 30 } 31 } 32 33 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 34 if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);} 35 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 36 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 37 38 for (i=0; i<A->rmap->n; i++) { 39 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 40 va[i] = vb[i]; 41 if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs); 42 } 43 } 44 45 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 46 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 47 ierr = PetscFree(idxb);CHKERRQ(ierr); 48 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 49 PetscFunctionReturn(0); 50 } 51 52 EXTERN_C_BEGIN 53 #undef __FUNCT__ 54 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 55 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 56 { 57 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 58 PetscErrorCode ierr; 59 60 PetscFunctionBegin; 61 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 62 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 EXTERN_C_END 66 67 EXTERN_C_BEGIN 68 #undef __FUNCT__ 69 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 70 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 71 { 72 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 73 PetscErrorCode ierr; 74 75 PetscFunctionBegin; 76 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 77 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 EXTERN_C_END 81 82 /* 83 Local utility routine that creates a mapping from the global column 84 number to the local number in the off-diagonal part of the local 85 storage of the matrix. This is done in a non scalable way since the 86 length of colmap equals the global matrix length. 87 */ 88 #undef __FUNCT__ 89 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private" 90 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 91 { 92 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 93 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 94 PetscErrorCode ierr; 95 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 96 97 PetscFunctionBegin; 98 #if defined(PETSC_USE_CTABLE) 99 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 100 for (i=0; i<nbs; i++) { 101 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 102 } 103 #else 104 ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr); 105 ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 106 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 107 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 108 #endif 109 PetscFunctionReturn(0); 110 } 111 112 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 113 { \ 114 \ 115 brow = row/bs; \ 116 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 117 rmax = aimax[brow]; nrow = ailen[brow]; \ 118 bcol = col/bs; \ 119 ridx = row % bs; cidx = col % bs; \ 120 low = 0; high = nrow; \ 121 while (high-low > 3) { \ 122 t = (low+high)/2; \ 123 if (rp[t] > bcol) high = t; \ 124 else low = t; \ 125 } \ 126 for (_i=low; _i<high; _i++) { \ 127 if (rp[_i] > bcol) break; \ 128 if (rp[_i] == bcol) { \ 129 bap = ap + bs2*_i + bs*cidx + ridx; \ 130 if (addv == ADD_VALUES) *bap += value; \ 131 else *bap = value; \ 132 goto a_noinsert; \ 133 } \ 134 } \ 135 if (a->nonew == 1) goto a_noinsert; \ 136 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 137 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 138 N = nrow++ - 1; \ 139 /* shift up all the later entries in this row */ \ 140 for (ii=N; ii>=_i; ii--) { \ 141 rp[ii+1] = rp[ii]; \ 142 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 143 } \ 144 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 145 rp[_i] = bcol; \ 146 ap[bs2*_i + bs*cidx + ridx] = value; \ 147 a_noinsert:; \ 148 ailen[brow] = nrow; \ 149 } 150 151 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 152 { \ 153 brow = row/bs; \ 154 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 155 rmax = bimax[brow]; nrow = bilen[brow]; \ 156 bcol = col/bs; \ 157 ridx = row % bs; cidx = col % bs; \ 158 low = 0; high = nrow; \ 159 while (high-low > 3) { \ 160 t = (low+high)/2; \ 161 if (rp[t] > bcol) high = t; \ 162 else low = t; \ 163 } \ 164 for (_i=low; _i<high; _i++) { \ 165 if (rp[_i] > bcol) break; \ 166 if (rp[_i] == bcol) { \ 167 bap = ap + bs2*_i + bs*cidx + ridx; \ 168 if (addv == ADD_VALUES) *bap += value; \ 169 else *bap = value; \ 170 goto b_noinsert; \ 171 } \ 172 } \ 173 if (b->nonew == 1) goto b_noinsert; \ 174 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 175 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 176 CHKMEMQ; \ 177 N = nrow++ - 1; \ 178 /* shift up all the later entries in this row */ \ 179 for (ii=N; ii>=_i; ii--) { \ 180 rp[ii+1] = rp[ii]; \ 181 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 182 } \ 183 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 184 rp[_i] = bcol; \ 185 ap[bs2*_i + bs*cidx + ridx] = value; \ 186 b_noinsert:; \ 187 bilen[brow] = nrow; \ 188 } 189 190 #undef __FUNCT__ 191 #define __FUNCT__ "MatSetValues_MPIBAIJ" 192 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 193 { 194 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 195 MatScalar value; 196 PetscBool roworiented = baij->roworiented; 197 PetscErrorCode ierr; 198 PetscInt i,j,row,col; 199 PetscInt rstart_orig=mat->rmap->rstart; 200 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 201 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 202 203 /* Some Variables required in the macro */ 204 Mat A = baij->A; 205 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 206 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 207 MatScalar *aa =a->a; 208 209 Mat B = baij->B; 210 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 211 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 212 MatScalar *ba =b->a; 213 214 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 215 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 216 MatScalar *ap,*bap; 217 218 PetscFunctionBegin; 219 if (v) PetscValidScalarPointer(v,6); 220 for (i=0; i<m; i++) { 221 if (im[i] < 0) continue; 222 #if defined(PETSC_USE_DEBUG) 223 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 224 #endif 225 if (im[i] >= rstart_orig && im[i] < rend_orig) { 226 row = im[i] - rstart_orig; 227 for (j=0; j<n; j++) { 228 if (in[j] >= cstart_orig && in[j] < cend_orig) { 229 col = in[j] - cstart_orig; 230 if (roworiented) value = v[i*n+j]; 231 else value = v[i+j*m]; 232 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 233 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 234 } else if (in[j] < 0) continue; 235 #if defined(PETSC_USE_DEBUG) 236 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 237 #endif 238 else { 239 if (mat->was_assembled) { 240 if (!baij->colmap) { 241 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 242 } 243 #if defined(PETSC_USE_CTABLE) 244 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 245 col = col - 1; 246 #else 247 col = baij->colmap[in[j]/bs] - 1; 248 #endif 249 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 250 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 251 col = in[j]; 252 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 253 B = baij->B; 254 b = (Mat_SeqBAIJ*)(B)->data; 255 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 256 ba =b->a; 257 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 258 else col += in[j]%bs; 259 } else col = in[j]; 260 if (roworiented) value = v[i*n+j]; 261 else value = v[i+j*m]; 262 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 263 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 264 } 265 } 266 } else { 267 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 268 if (!baij->donotstash) { 269 mat->assembled = PETSC_FALSE; 270 if (roworiented) { 271 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 272 } else { 273 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 274 } 275 } 276 } 277 } 278 PetscFunctionReturn(0); 279 } 280 281 #undef __FUNCT__ 282 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 283 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 284 { 285 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 286 const PetscScalar *value; 287 MatScalar *barray = baij->barray; 288 PetscBool roworiented = baij->roworiented; 289 PetscErrorCode ierr; 290 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 291 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 292 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 293 294 PetscFunctionBegin; 295 if (!barray) { 296 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 297 baij->barray = barray; 298 } 299 300 if (roworiented) stepval = (n-1)*bs; 301 else stepval = (m-1)*bs; 302 303 for (i=0; i<m; i++) { 304 if (im[i] < 0) continue; 305 #if defined(PETSC_USE_DEBUG) 306 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 307 #endif 308 if (im[i] >= rstart && im[i] < rend) { 309 row = im[i] - rstart; 310 for (j=0; j<n; j++) { 311 /* If NumCol = 1 then a copy is not required */ 312 if ((roworiented) && (n == 1)) { 313 barray = (MatScalar*)v + i*bs2; 314 } else if ((!roworiented) && (m == 1)) { 315 barray = (MatScalar*)v + j*bs2; 316 } else { /* Here a copy is required */ 317 if (roworiented) { 318 value = v + (i*(stepval+bs) + j)*bs; 319 } else { 320 value = v + (j*(stepval+bs) + i)*bs; 321 } 322 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 323 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 324 barray += bs; 325 } 326 barray -= bs2; 327 } 328 329 if (in[j] >= cstart && in[j] < cend) { 330 col = in[j] - cstart; 331 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 332 } else if (in[j] < 0) continue; 333 #if defined(PETSC_USE_DEBUG) 334 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 335 #endif 336 else { 337 if (mat->was_assembled) { 338 if (!baij->colmap) { 339 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 340 } 341 342 #if defined(PETSC_USE_DEBUG) 343 #if defined(PETSC_USE_CTABLE) 344 { PetscInt data; 345 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 346 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 347 } 348 #else 349 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 350 #endif 351 #endif 352 #if defined(PETSC_USE_CTABLE) 353 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 354 col = (col - 1)/bs; 355 #else 356 col = (baij->colmap[in[j]] - 1)/bs; 357 #endif 358 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 359 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 360 col = in[j]; 361 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]); 362 } else col = in[j]; 363 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 364 } 365 } 366 } else { 367 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 368 if (!baij->donotstash) { 369 if (roworiented) { 370 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 371 } else { 372 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 373 } 374 } 375 } 376 } 377 PetscFunctionReturn(0); 378 } 379 380 #define HASH_KEY 0.6180339887 381 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 382 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 383 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 384 #undef __FUNCT__ 385 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 386 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 387 { 388 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 389 PetscBool roworiented = baij->roworiented; 390 PetscErrorCode ierr; 391 PetscInt i,j,row,col; 392 PetscInt rstart_orig=mat->rmap->rstart; 393 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 394 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 395 PetscReal tmp; 396 MatScalar **HD = baij->hd,value; 397 #if defined(PETSC_USE_DEBUG) 398 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 399 #endif 400 401 PetscFunctionBegin; 402 if (v) PetscValidScalarPointer(v,6); 403 for (i=0; i<m; i++) { 404 #if defined(PETSC_USE_DEBUG) 405 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 406 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 407 #endif 408 row = im[i]; 409 if (row >= rstart_orig && row < rend_orig) { 410 for (j=0; j<n; j++) { 411 col = in[j]; 412 if (roworiented) value = v[i*n+j]; 413 else value = v[i+j*m]; 414 /* Look up PetscInto the Hash Table */ 415 key = (row/bs)*Nbs+(col/bs)+1; 416 h1 = HASH(size,key,tmp); 417 418 419 idx = h1; 420 #if defined(PETSC_USE_DEBUG) 421 insert_ct++; 422 total_ct++; 423 if (HT[idx] != key) { 424 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 425 if (idx == size) { 426 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 427 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 428 } 429 } 430 #else 431 if (HT[idx] != key) { 432 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 433 if (idx == size) { 434 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 435 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 436 } 437 } 438 #endif 439 /* A HASH table entry is found, so insert the values at the correct address */ 440 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 441 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 442 } 443 } else if (!baij->donotstash) { 444 if (roworiented) { 445 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 446 } else { 447 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 448 } 449 } 450 } 451 #if defined(PETSC_USE_DEBUG) 452 baij->ht_total_ct = total_ct; 453 baij->ht_insert_ct = insert_ct; 454 #endif 455 PetscFunctionReturn(0); 456 } 457 458 #undef __FUNCT__ 459 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 460 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 461 { 462 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 463 PetscBool roworiented = baij->roworiented; 464 PetscErrorCode ierr; 465 PetscInt i,j,ii,jj,row,col; 466 PetscInt rstart=baij->rstartbs; 467 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 468 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 469 PetscReal tmp; 470 MatScalar **HD = baij->hd,*baij_a; 471 const PetscScalar *v_t,*value; 472 #if defined(PETSC_USE_DEBUG) 473 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 474 #endif 475 476 PetscFunctionBegin; 477 if (roworiented) stepval = (n-1)*bs; 478 else stepval = (m-1)*bs; 479 480 for (i=0; i<m; i++) { 481 #if defined(PETSC_USE_DEBUG) 482 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 483 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 484 #endif 485 row = im[i]; 486 v_t = v + i*nbs2; 487 if (row >= rstart && row < rend) { 488 for (j=0; j<n; j++) { 489 col = in[j]; 490 491 /* Look up into the Hash Table */ 492 key = row*Nbs+col+1; 493 h1 = HASH(size,key,tmp); 494 495 idx = h1; 496 #if defined(PETSC_USE_DEBUG) 497 total_ct++; 498 insert_ct++; 499 if (HT[idx] != key) { 500 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 501 if (idx == size) { 502 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 503 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 504 } 505 } 506 #else 507 if (HT[idx] != key) { 508 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 509 if (idx == size) { 510 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 511 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 512 } 513 } 514 #endif 515 baij_a = HD[idx]; 516 if (roworiented) { 517 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 518 /* value = v + (i*(stepval+bs)+j)*bs; */ 519 value = v_t; 520 v_t += bs; 521 if (addv == ADD_VALUES) { 522 for (ii=0; ii<bs; ii++,value+=stepval) { 523 for (jj=ii; jj<bs2; jj+=bs) { 524 baij_a[jj] += *value++; 525 } 526 } 527 } else { 528 for (ii=0; ii<bs; ii++,value+=stepval) { 529 for (jj=ii; jj<bs2; jj+=bs) { 530 baij_a[jj] = *value++; 531 } 532 } 533 } 534 } else { 535 value = v + j*(stepval+bs)*bs + i*bs; 536 if (addv == ADD_VALUES) { 537 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 538 for (jj=0; jj<bs; jj++) { 539 baij_a[jj] += *value++; 540 } 541 } 542 } else { 543 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 544 for (jj=0; jj<bs; jj++) { 545 baij_a[jj] = *value++; 546 } 547 } 548 } 549 } 550 } 551 } else { 552 if (!baij->donotstash) { 553 if (roworiented) { 554 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 555 } else { 556 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 557 } 558 } 559 } 560 } 561 #if defined(PETSC_USE_DEBUG) 562 baij->ht_total_ct = total_ct; 563 baij->ht_insert_ct = insert_ct; 564 #endif 565 PetscFunctionReturn(0); 566 } 567 568 #undef __FUNCT__ 569 #define __FUNCT__ "MatGetValues_MPIBAIJ" 570 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 571 { 572 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 573 PetscErrorCode ierr; 574 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 575 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 576 577 PetscFunctionBegin; 578 for (i=0; i<m; i++) { 579 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 580 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 581 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 582 row = idxm[i] - bsrstart; 583 for (j=0; j<n; j++) { 584 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 585 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 586 if (idxn[j] >= bscstart && idxn[j] < bscend) { 587 col = idxn[j] - bscstart; 588 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 589 } else { 590 if (!baij->colmap) { 591 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 592 } 593 #if defined(PETSC_USE_CTABLE) 594 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 595 data--; 596 #else 597 data = baij->colmap[idxn[j]/bs]-1; 598 #endif 599 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 600 else { 601 col = data + idxn[j]%bs; 602 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 603 } 604 } 605 } 606 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 607 } 608 PetscFunctionReturn(0); 609 } 610 611 #undef __FUNCT__ 612 #define __FUNCT__ "MatNorm_MPIBAIJ" 613 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 614 { 615 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 616 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 617 PetscErrorCode ierr; 618 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 619 PetscReal sum = 0.0; 620 MatScalar *v; 621 622 PetscFunctionBegin; 623 if (baij->size == 1) { 624 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 625 } else { 626 if (type == NORM_FROBENIUS) { 627 v = amat->a; 628 nz = amat->nz*bs2; 629 for (i=0; i<nz; i++) { 630 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 631 } 632 v = bmat->a; 633 nz = bmat->nz*bs2; 634 for (i=0; i<nz; i++) { 635 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 636 } 637 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 638 *nrm = PetscSqrtReal(*nrm); 639 } else if (type == NORM_1) { /* max column sum */ 640 PetscReal *tmp,*tmp2; 641 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 642 ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 643 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 644 v = amat->a; jj = amat->j; 645 for (i=0; i<amat->nz; i++) { 646 for (j=0; j<bs; j++) { 647 col = bs*(cstart + *jj) + j; /* column index */ 648 for (row=0; row<bs; row++) { 649 tmp[col] += PetscAbsScalar(*v); v++; 650 } 651 } 652 jj++; 653 } 654 v = bmat->a; jj = bmat->j; 655 for (i=0; i<bmat->nz; i++) { 656 for (j=0; j<bs; j++) { 657 col = bs*garray[*jj] + j; 658 for (row=0; row<bs; row++) { 659 tmp[col] += PetscAbsScalar(*v); v++; 660 } 661 } 662 jj++; 663 } 664 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 665 *nrm = 0.0; 666 for (j=0; j<mat->cmap->N; j++) { 667 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 668 } 669 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 670 } else if (type == NORM_INFINITY) { /* max row sum */ 671 PetscReal *sums; 672 ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr); 673 sum = 0.0; 674 for (j=0; j<amat->mbs; j++) { 675 for (row=0; row<bs; row++) sums[row] = 0.0; 676 v = amat->a + bs2*amat->i[j]; 677 nz = amat->i[j+1]-amat->i[j]; 678 for (i=0; i<nz; i++) { 679 for (col=0; col<bs; col++) { 680 for (row=0; row<bs; row++) { 681 sums[row] += PetscAbsScalar(*v); v++; 682 } 683 } 684 } 685 v = bmat->a + bs2*bmat->i[j]; 686 nz = bmat->i[j+1]-bmat->i[j]; 687 for (i=0; i<nz; i++) { 688 for (col=0; col<bs; col++) { 689 for (row=0; row<bs; row++) { 690 sums[row] += PetscAbsScalar(*v); v++; 691 } 692 } 693 } 694 for (row=0; row<bs; row++) { 695 if (sums[row] > sum) sum = sums[row]; 696 } 697 } 698 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 699 ierr = PetscFree(sums);CHKERRQ(ierr); 700 } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet"); 701 } 702 PetscFunctionReturn(0); 703 } 704 705 /* 706 Creates the hash table, and sets the table 707 This table is created only once. 708 If new entried need to be added to the matrix 709 then the hash table has to be destroyed and 710 recreated. 711 */ 712 #undef __FUNCT__ 713 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 714 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 715 { 716 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 717 Mat A = baij->A,B=baij->B; 718 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 719 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 720 PetscErrorCode ierr; 721 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 722 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 723 PetscInt *HT,key; 724 MatScalar **HD; 725 PetscReal tmp; 726 #if defined(PETSC_USE_INFO) 727 PetscInt ct=0,max=0; 728 #endif 729 730 PetscFunctionBegin; 731 if (baij->ht) PetscFunctionReturn(0); 732 733 baij->ht_size = (PetscInt)(factor*nz); 734 ht_size = baij->ht_size; 735 736 /* Allocate Memory for Hash Table */ 737 ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr); 738 ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr); 739 ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr); 740 HD = baij->hd; 741 HT = baij->ht; 742 743 /* Loop Over A */ 744 for (i=0; i<a->mbs; i++) { 745 for (j=ai[i]; j<ai[i+1]; j++) { 746 row = i+rstart; 747 col = aj[j]+cstart; 748 749 key = row*Nbs + col + 1; 750 h1 = HASH(ht_size,key,tmp); 751 for (k=0; k<ht_size; k++) { 752 if (!HT[(h1+k)%ht_size]) { 753 HT[(h1+k)%ht_size] = key; 754 HD[(h1+k)%ht_size] = a->a + j*bs2; 755 break; 756 #if defined(PETSC_USE_INFO) 757 } else { 758 ct++; 759 #endif 760 } 761 } 762 #if defined(PETSC_USE_INFO) 763 if (k> max) max = k; 764 #endif 765 } 766 } 767 /* Loop Over B */ 768 for (i=0; i<b->mbs; i++) { 769 for (j=bi[i]; j<bi[i+1]; j++) { 770 row = i+rstart; 771 col = garray[bj[j]]; 772 key = row*Nbs + col + 1; 773 h1 = HASH(ht_size,key,tmp); 774 for (k=0; k<ht_size; k++) { 775 if (!HT[(h1+k)%ht_size]) { 776 HT[(h1+k)%ht_size] = key; 777 HD[(h1+k)%ht_size] = b->a + j*bs2; 778 break; 779 #if defined(PETSC_USE_INFO) 780 } else { 781 ct++; 782 #endif 783 } 784 } 785 #if defined(PETSC_USE_INFO) 786 if (k> max) max = k; 787 #endif 788 } 789 } 790 791 /* Print Summary */ 792 #if defined(PETSC_USE_INFO) 793 for (i=0,j=0; i<ht_size; i++) { 794 if (HT[i]) j++; 795 } 796 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 797 #endif 798 PetscFunctionReturn(0); 799 } 800 801 #undef __FUNCT__ 802 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 803 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 804 { 805 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 806 PetscErrorCode ierr; 807 PetscInt nstash,reallocs; 808 InsertMode addv; 809 810 PetscFunctionBegin; 811 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 812 813 /* make sure all processors are either in INSERTMODE or ADDMODE */ 814 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 815 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 816 mat->insertmode = addv; /* in case this processor had no cache */ 817 818 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 819 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 820 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 821 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 822 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 823 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 824 PetscFunctionReturn(0); 825 } 826 827 #undef __FUNCT__ 828 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 829 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 830 { 831 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 832 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 833 PetscErrorCode ierr; 834 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 835 PetscInt *row,*col; 836 PetscBool r1,r2,r3,other_disassembled; 837 MatScalar *val; 838 InsertMode addv = mat->insertmode; 839 PetscMPIInt n; 840 841 PetscFunctionBegin; 842 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 843 if (!baij->donotstash && !mat->nooffprocentries) { 844 while (1) { 845 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 846 if (!flg) break; 847 848 for (i=0; i<n;) { 849 /* Now identify the consecutive vals belonging to the same row */ 850 for (j=i,rstart=row[j]; j<n; j++) { 851 if (row[j] != rstart) break; 852 } 853 if (j < n) ncols = j-i; 854 else ncols = n-i; 855 /* Now assemble all these values with a single function call */ 856 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 857 i = j; 858 } 859 } 860 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 861 /* Now process the block-stash. Since the values are stashed column-oriented, 862 set the roworiented flag to column oriented, and after MatSetValues() 863 restore the original flags */ 864 r1 = baij->roworiented; 865 r2 = a->roworiented; 866 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 867 868 baij->roworiented = PETSC_FALSE; 869 a->roworiented = PETSC_FALSE; 870 871 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 872 while (1) { 873 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 874 if (!flg) break; 875 876 for (i=0; i<n;) { 877 /* Now identify the consecutive vals belonging to the same row */ 878 for (j=i,rstart=row[j]; j<n; j++) { 879 if (row[j] != rstart) break; 880 } 881 if (j < n) ncols = j-i; 882 else ncols = n-i; 883 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 884 i = j; 885 } 886 } 887 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 888 889 baij->roworiented = r1; 890 a->roworiented = r2; 891 892 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 893 } 894 895 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 896 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 897 898 /* determine if any processor has disassembled, if so we must 899 also disassemble ourselfs, in order that we may reassemble. */ 900 /* 901 if nonzero structure of submatrix B cannot change then we know that 902 no processor disassembled thus we can skip this stuff 903 */ 904 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 905 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 906 if (mat->was_assembled && !other_disassembled) { 907 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 908 } 909 } 910 911 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 912 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 913 } 914 ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 915 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 916 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 917 918 #if defined(PETSC_USE_INFO) 919 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 920 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 921 922 baij->ht_total_ct = 0; 923 baij->ht_insert_ct = 0; 924 } 925 #endif 926 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 927 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 928 929 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 930 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 931 } 932 933 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 934 935 baij->rowvalues = 0; 936 PetscFunctionReturn(0); 937 } 938 939 #undef __FUNCT__ 940 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 941 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 942 { 943 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 944 PetscErrorCode ierr; 945 PetscMPIInt size = baij->size,rank = baij->rank; 946 PetscInt bs = mat->rmap->bs; 947 PetscBool iascii,isdraw; 948 PetscViewer sviewer; 949 PetscViewerFormat format; 950 951 PetscFunctionBegin; 952 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 953 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 954 if (iascii) { 955 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 956 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 957 MatInfo info; 958 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 959 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 960 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 961 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 962 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 963 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 964 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 965 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 966 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 967 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 968 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 969 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 970 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 971 PetscFunctionReturn(0); 972 } else if (format == PETSC_VIEWER_ASCII_INFO) { 973 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 974 PetscFunctionReturn(0); 975 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 976 PetscFunctionReturn(0); 977 } 978 } 979 980 if (isdraw) { 981 PetscDraw draw; 982 PetscBool isnull; 983 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 984 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 985 } 986 987 if (size == 1) { 988 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 989 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 990 } else { 991 /* assemble the entire matrix onto first processor. */ 992 Mat A; 993 Mat_SeqBAIJ *Aloc; 994 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 995 MatScalar *a; 996 997 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 998 /* Perhaps this should be the type of mat? */ 999 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1000 if (!rank) { 1001 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1002 } else { 1003 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1004 } 1005 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1006 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 1007 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1008 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1009 1010 /* copy over the A part */ 1011 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1012 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1013 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1014 1015 for (i=0; i<mbs; i++) { 1016 rvals[0] = bs*(baij->rstartbs + i); 1017 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1018 for (j=ai[i]; j<ai[i+1]; j++) { 1019 col = (baij->cstartbs+aj[j])*bs; 1020 for (k=0; k<bs; k++) { 1021 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1022 col++; a += bs; 1023 } 1024 } 1025 } 1026 /* copy over the B part */ 1027 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1028 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1029 for (i=0; i<mbs; i++) { 1030 rvals[0] = bs*(baij->rstartbs + i); 1031 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1032 for (j=ai[i]; j<ai[i+1]; j++) { 1033 col = baij->garray[aj[j]]*bs; 1034 for (k=0; k<bs; k++) { 1035 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1036 col++; a += bs; 1037 } 1038 } 1039 } 1040 ierr = PetscFree(rvals);CHKERRQ(ierr); 1041 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1042 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1043 /* 1044 Everyone has to call to draw the matrix since the graphics waits are 1045 synchronized across all processors that share the PetscDraw object 1046 */ 1047 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1048 if (!rank) { 1049 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1050 /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/ 1051 PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ); 1052 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1053 } 1054 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1055 ierr = MatDestroy(&A);CHKERRQ(ierr); 1056 } 1057 PetscFunctionReturn(0); 1058 } 1059 1060 #undef __FUNCT__ 1061 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1062 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1063 { 1064 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1065 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)a->A->data; 1066 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 1067 PetscErrorCode ierr; 1068 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1069 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1070 int fd; 1071 PetscScalar *column_values; 1072 FILE *file; 1073 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1074 PetscInt message_count,flowcontrolcount; 1075 1076 PetscFunctionBegin; 1077 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1078 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1079 nz = bs2*(A->nz + B->nz); 1080 rlen = mat->rmap->n; 1081 if (!rank) { 1082 header[0] = MAT_FILE_CLASSID; 1083 header[1] = mat->rmap->N; 1084 header[2] = mat->cmap->N; 1085 1086 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1087 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1088 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1089 /* get largest number of rows any processor has */ 1090 range = mat->rmap->range; 1091 for (i=1; i<size; i++) { 1092 rlen = PetscMax(rlen,range[i+1] - range[i]); 1093 } 1094 } else { 1095 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1096 } 1097 1098 ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr); 1099 /* compute lengths of each row */ 1100 for (i=0; i<a->mbs; i++) { 1101 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1102 } 1103 /* store the row lengths to the file */ 1104 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1105 if (!rank) { 1106 MPI_Status status; 1107 ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr); 1108 rlen = (range[1] - range[0])/bs; 1109 for (i=0; i<rlen; i++) { 1110 for (j=0; j<bs; j++) { 1111 row_lens[i*bs+j] = bs*crow_lens[i]; 1112 } 1113 } 1114 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1115 for (i=1; i<size; i++) { 1116 rlen = (range[i+1] - range[i])/bs; 1117 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1118 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1119 for (k=0; k<rlen; k++) { 1120 for (j=0; j<bs; j++) { 1121 row_lens[k*bs+j] = bs*crow_lens[k]; 1122 } 1123 } 1124 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1125 } 1126 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1127 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1128 } else { 1129 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1130 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1131 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1132 } 1133 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1134 1135 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1136 information needed to make it for each row from a block row. This does require more communication but still not more than 1137 the communication needed for the nonzero values */ 1138 nzmax = nz; /* space a largest processor needs */ 1139 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1140 ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1141 cnt = 0; 1142 for (i=0; i<a->mbs; i++) { 1143 pcnt = cnt; 1144 for (j=B->i[i]; j<B->i[i+1]; j++) { 1145 if ((col = garray[B->j[j]]) > cstart) break; 1146 for (l=0; l<bs; l++) { 1147 column_indices[cnt++] = bs*col+l; 1148 } 1149 } 1150 for (k=A->i[i]; k<A->i[i+1]; k++) { 1151 for (l=0; l<bs; l++) { 1152 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1153 } 1154 } 1155 for (; j<B->i[i+1]; j++) { 1156 for (l=0; l<bs; l++) { 1157 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1158 } 1159 } 1160 len = cnt - pcnt; 1161 for (k=1; k<bs; k++) { 1162 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1163 cnt += len; 1164 } 1165 } 1166 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1167 1168 /* store the columns to the file */ 1169 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1170 if (!rank) { 1171 MPI_Status status; 1172 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1173 for (i=1; i<size; i++) { 1174 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1175 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1176 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1177 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1178 } 1179 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1180 } else { 1181 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1182 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1183 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1184 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1185 } 1186 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1187 1188 /* load up the numerical values */ 1189 ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1190 cnt = 0; 1191 for (i=0; i<a->mbs; i++) { 1192 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1193 for (j=B->i[i]; j<B->i[i+1]; j++) { 1194 if (garray[B->j[j]] > cstart) break; 1195 for (l=0; l<bs; l++) { 1196 for (ll=0; ll<bs; ll++) { 1197 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1198 } 1199 } 1200 cnt += bs; 1201 } 1202 for (k=A->i[i]; k<A->i[i+1]; k++) { 1203 for (l=0; l<bs; l++) { 1204 for (ll=0; ll<bs; ll++) { 1205 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1206 } 1207 } 1208 cnt += bs; 1209 } 1210 for (; j<B->i[i+1]; j++) { 1211 for (l=0; l<bs; l++) { 1212 for (ll=0; ll<bs; ll++) { 1213 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1214 } 1215 } 1216 cnt += bs; 1217 } 1218 cnt += (bs-1)*rlen; 1219 } 1220 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1221 1222 /* store the column values to the file */ 1223 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1224 if (!rank) { 1225 MPI_Status status; 1226 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1227 for (i=1; i<size; i++) { 1228 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1229 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1230 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1231 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1232 } 1233 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1234 } else { 1235 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1236 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1237 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1238 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1239 } 1240 ierr = PetscFree(column_values);CHKERRQ(ierr); 1241 1242 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1243 if (file) { 1244 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1245 } 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatView_MPIBAIJ" 1251 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1252 { 1253 PetscErrorCode ierr; 1254 PetscBool iascii,isdraw,issocket,isbinary; 1255 1256 PetscFunctionBegin; 1257 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1258 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1259 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1260 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1261 if (iascii || isdraw || issocket) { 1262 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1263 } else if (isbinary) { 1264 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1265 } 1266 PetscFunctionReturn(0); 1267 } 1268 1269 #undef __FUNCT__ 1270 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1271 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1272 { 1273 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1274 PetscErrorCode ierr; 1275 1276 PetscFunctionBegin; 1277 #if defined(PETSC_USE_LOG) 1278 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1279 #endif 1280 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1281 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1282 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1283 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1284 #if defined(PETSC_USE_CTABLE) 1285 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1286 #else 1287 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1288 #endif 1289 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1290 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1291 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1292 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1293 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1294 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1295 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1296 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1297 1298 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",NULL);CHKERRQ(ierr); 1307 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",NULL);CHKERRQ(ierr); 1308 PetscFunctionReturn(0); 1309 } 1310 1311 #undef __FUNCT__ 1312 #define __FUNCT__ "MatMult_MPIBAIJ" 1313 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1314 { 1315 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1316 PetscErrorCode ierr; 1317 PetscInt nt; 1318 1319 PetscFunctionBegin; 1320 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1321 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1322 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1323 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1324 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1325 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1326 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1327 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1328 PetscFunctionReturn(0); 1329 } 1330 1331 #undef __FUNCT__ 1332 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1333 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1334 { 1335 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1336 PetscErrorCode ierr; 1337 1338 PetscFunctionBegin; 1339 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1340 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1341 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1342 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1343 PetscFunctionReturn(0); 1344 } 1345 1346 #undef __FUNCT__ 1347 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1348 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1349 { 1350 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1351 PetscErrorCode ierr; 1352 PetscBool merged; 1353 1354 PetscFunctionBegin; 1355 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1356 /* do nondiagonal part */ 1357 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1358 if (!merged) { 1359 /* send it on its way */ 1360 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1361 /* do local part */ 1362 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1363 /* receive remote parts: note this assumes the values are not actually */ 1364 /* inserted in yy until the next line */ 1365 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1366 } else { 1367 /* do local part */ 1368 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1369 /* send it on its way */ 1370 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1371 /* values actually were received in the Begin() but we need to call this nop */ 1372 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1373 } 1374 PetscFunctionReturn(0); 1375 } 1376 1377 #undef __FUNCT__ 1378 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1379 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1380 { 1381 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1382 PetscErrorCode ierr; 1383 1384 PetscFunctionBegin; 1385 /* do nondiagonal part */ 1386 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1387 /* send it on its way */ 1388 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1389 /* do local part */ 1390 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1391 /* receive remote parts: note this assumes the values are not actually */ 1392 /* inserted in yy until the next line, which is true for my implementation*/ 1393 /* but is not perhaps always true. */ 1394 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1395 PetscFunctionReturn(0); 1396 } 1397 1398 /* 1399 This only works correctly for square matrices where the subblock A->A is the 1400 diagonal block 1401 */ 1402 #undef __FUNCT__ 1403 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1404 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1405 { 1406 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1407 PetscErrorCode ierr; 1408 1409 PetscFunctionBegin; 1410 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1411 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1412 PetscFunctionReturn(0); 1413 } 1414 1415 #undef __FUNCT__ 1416 #define __FUNCT__ "MatScale_MPIBAIJ" 1417 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1418 { 1419 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1420 PetscErrorCode ierr; 1421 1422 PetscFunctionBegin; 1423 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1424 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1430 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1431 { 1432 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1433 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1434 PetscErrorCode ierr; 1435 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1436 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1437 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1438 1439 PetscFunctionBegin; 1440 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1441 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1442 mat->getrowactive = PETSC_TRUE; 1443 1444 if (!mat->rowvalues && (idx || v)) { 1445 /* 1446 allocate enough space to hold information from the longest row. 1447 */ 1448 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1449 PetscInt max = 1,mbs = mat->mbs,tmp; 1450 for (i=0; i<mbs; i++) { 1451 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1452 if (max < tmp) max = tmp; 1453 } 1454 ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1455 } 1456 lrow = row - brstart; 1457 1458 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1459 if (!v) {pvA = 0; pvB = 0;} 1460 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1461 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1462 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1463 nztot = nzA + nzB; 1464 1465 cmap = mat->garray; 1466 if (v || idx) { 1467 if (nztot) { 1468 /* Sort by increasing column numbers, assuming A and B already sorted */ 1469 PetscInt imark = -1; 1470 if (v) { 1471 *v = v_p = mat->rowvalues; 1472 for (i=0; i<nzB; i++) { 1473 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1474 else break; 1475 } 1476 imark = i; 1477 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1478 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1479 } 1480 if (idx) { 1481 *idx = idx_p = mat->rowindices; 1482 if (imark > -1) { 1483 for (i=0; i<imark; i++) { 1484 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1485 } 1486 } else { 1487 for (i=0; i<nzB; i++) { 1488 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1489 else break; 1490 } 1491 imark = i; 1492 } 1493 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1494 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1495 } 1496 } else { 1497 if (idx) *idx = 0; 1498 if (v) *v = 0; 1499 } 1500 } 1501 *nz = nztot; 1502 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1503 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1504 PetscFunctionReturn(0); 1505 } 1506 1507 #undef __FUNCT__ 1508 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1509 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1510 { 1511 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1512 1513 PetscFunctionBegin; 1514 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1515 baij->getrowactive = PETSC_FALSE; 1516 PetscFunctionReturn(0); 1517 } 1518 1519 #undef __FUNCT__ 1520 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1521 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1522 { 1523 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1524 PetscErrorCode ierr; 1525 1526 PetscFunctionBegin; 1527 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1528 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1529 PetscFunctionReturn(0); 1530 } 1531 1532 #undef __FUNCT__ 1533 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1534 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1535 { 1536 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1537 Mat A = a->A,B = a->B; 1538 PetscErrorCode ierr; 1539 PetscReal isend[5],irecv[5]; 1540 1541 PetscFunctionBegin; 1542 info->block_size = (PetscReal)matin->rmap->bs; 1543 1544 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1545 1546 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1547 isend[3] = info->memory; isend[4] = info->mallocs; 1548 1549 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1550 1551 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1552 isend[3] += info->memory; isend[4] += info->mallocs; 1553 1554 if (flag == MAT_LOCAL) { 1555 info->nz_used = isend[0]; 1556 info->nz_allocated = isend[1]; 1557 info->nz_unneeded = isend[2]; 1558 info->memory = isend[3]; 1559 info->mallocs = isend[4]; 1560 } else if (flag == MAT_GLOBAL_MAX) { 1561 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1562 1563 info->nz_used = irecv[0]; 1564 info->nz_allocated = irecv[1]; 1565 info->nz_unneeded = irecv[2]; 1566 info->memory = irecv[3]; 1567 info->mallocs = irecv[4]; 1568 } else if (flag == MAT_GLOBAL_SUM) { 1569 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1570 1571 info->nz_used = irecv[0]; 1572 info->nz_allocated = irecv[1]; 1573 info->nz_unneeded = irecv[2]; 1574 info->memory = irecv[3]; 1575 info->mallocs = irecv[4]; 1576 } else SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1577 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1578 info->fill_ratio_needed = 0; 1579 info->factor_mallocs = 0; 1580 PetscFunctionReturn(0); 1581 } 1582 1583 #undef __FUNCT__ 1584 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1585 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1586 { 1587 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1588 PetscErrorCode ierr; 1589 1590 PetscFunctionBegin; 1591 switch (op) { 1592 case MAT_NEW_NONZERO_LOCATIONS: 1593 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1594 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1595 case MAT_KEEP_NONZERO_PATTERN: 1596 case MAT_NEW_NONZERO_LOCATION_ERR: 1597 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1598 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1599 break; 1600 case MAT_ROW_ORIENTED: 1601 a->roworiented = flg; 1602 1603 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1604 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1605 break; 1606 case MAT_NEW_DIAGONALS: 1607 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1608 break; 1609 case MAT_IGNORE_OFF_PROC_ENTRIES: 1610 a->donotstash = flg; 1611 break; 1612 case MAT_USE_HASH_TABLE: 1613 a->ht_flag = flg; 1614 break; 1615 case MAT_SYMMETRIC: 1616 case MAT_STRUCTURALLY_SYMMETRIC: 1617 case MAT_HERMITIAN: 1618 case MAT_SYMMETRY_ETERNAL: 1619 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1620 break; 1621 default: 1622 SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op); 1623 } 1624 PetscFunctionReturn(0); 1625 } 1626 1627 #undef __FUNCT__ 1628 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1629 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1630 { 1631 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1632 Mat_SeqBAIJ *Aloc; 1633 Mat B; 1634 PetscErrorCode ierr; 1635 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1636 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1637 MatScalar *a; 1638 1639 PetscFunctionBegin; 1640 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1641 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1642 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1643 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1644 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1645 /* Do not know preallocation information, but must set block size */ 1646 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1647 } else { 1648 B = *matout; 1649 } 1650 1651 /* copy over the A part */ 1652 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1653 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1654 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 1655 1656 for (i=0; i<mbs; i++) { 1657 rvals[0] = bs*(baij->rstartbs + i); 1658 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1659 for (j=ai[i]; j<ai[i+1]; j++) { 1660 col = (baij->cstartbs+aj[j])*bs; 1661 for (k=0; k<bs; k++) { 1662 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1663 1664 col++; a += bs; 1665 } 1666 } 1667 } 1668 /* copy over the B part */ 1669 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1670 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1671 for (i=0; i<mbs; i++) { 1672 rvals[0] = bs*(baij->rstartbs + i); 1673 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1674 for (j=ai[i]; j<ai[i+1]; j++) { 1675 col = baij->garray[aj[j]]*bs; 1676 for (k=0; k<bs; k++) { 1677 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1678 col++; 1679 a += bs; 1680 } 1681 } 1682 } 1683 ierr = PetscFree(rvals);CHKERRQ(ierr); 1684 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1685 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1686 1687 if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B; 1688 else { 1689 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1690 } 1691 PetscFunctionReturn(0); 1692 } 1693 1694 #undef __FUNCT__ 1695 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1696 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1697 { 1698 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1699 Mat a = baij->A,b = baij->B; 1700 PetscErrorCode ierr; 1701 PetscInt s1,s2,s3; 1702 1703 PetscFunctionBegin; 1704 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1705 if (rr) { 1706 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1707 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1708 /* Overlap communication with computation. */ 1709 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1710 } 1711 if (ll) { 1712 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1713 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1714 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1715 } 1716 /* scale the diagonal block */ 1717 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1718 1719 if (rr) { 1720 /* Do a scatter end and then right scale the off-diagonal block */ 1721 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1722 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1723 } 1724 PetscFunctionReturn(0); 1725 } 1726 1727 #undef __FUNCT__ 1728 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1729 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1730 { 1731 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1732 PetscErrorCode ierr; 1733 PetscMPIInt imdex,size = l->size,n,rank = l->rank; 1734 PetscInt i,*owners = A->rmap->range; 1735 PetscInt *nprocs,j,idx,nsends,row; 1736 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 1737 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1; 1738 PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart; 1739 MPI_Comm comm = ((PetscObject)A)->comm; 1740 MPI_Request *send_waits,*recv_waits; 1741 MPI_Status recv_status,*send_status; 1742 const PetscScalar *xx; 1743 PetscScalar *bb; 1744 #if defined(PETSC_DEBUG) 1745 PetscBool found = PETSC_FALSE; 1746 #endif 1747 1748 PetscFunctionBegin; 1749 /* first count number of contributors to each processor */ 1750 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 1751 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 1752 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 1753 j = 0; 1754 for (i=0; i<N; i++) { 1755 if (lastidx > (idx = rows[i])) j = 0; 1756 lastidx = idx; 1757 for (; j<size; j++) { 1758 if (idx >= owners[j] && idx < owners[j+1]) { 1759 nprocs[2*j]++; 1760 nprocs[2*j+1] = 1; 1761 owner[i] = j; 1762 #if defined(PETSC_DEBUG) 1763 found = PETSC_TRUE; 1764 #endif 1765 break; 1766 } 1767 } 1768 #if defined(PETSC_DEBUG) 1769 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 1770 found = PETSC_FALSE; 1771 #endif 1772 } 1773 nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 1774 1775 if (A->nooffproczerorows) { 1776 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 1777 nrecvs = nsends; 1778 nmax = N; 1779 } else { 1780 /* inform other processors of number of messages and max length*/ 1781 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 1782 } 1783 1784 /* post receives: */ 1785 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 1786 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 1787 for (i=0; i<nrecvs; i++) { 1788 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1789 } 1790 1791 /* do sends: 1792 1) starts[i] gives the starting index in svalues for stuff going to 1793 the ith processor 1794 */ 1795 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 1796 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 1797 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 1798 starts[0] = 0; 1799 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 1800 for (i=0; i<N; i++) { 1801 svalues[starts[owner[i]]++] = rows[i]; 1802 } 1803 1804 starts[0] = 0; 1805 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 1806 count = 0; 1807 for (i=0; i<size; i++) { 1808 if (nprocs[2*i+1]) { 1809 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1810 } 1811 } 1812 ierr = PetscFree(starts);CHKERRQ(ierr); 1813 1814 base = owners[rank]; 1815 1816 /* wait on receives */ 1817 ierr = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr); 1818 count = nrecvs; 1819 slen = 0; 1820 while (count) { 1821 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1822 /* unpack receives into our local space */ 1823 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 1824 1825 source[imdex] = recv_status.MPI_SOURCE; 1826 lens[imdex] = n; 1827 slen += n; 1828 count--; 1829 } 1830 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1831 1832 /* move the data into the send scatter */ 1833 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1834 count = 0; 1835 for (i=0; i<nrecvs; i++) { 1836 values = rvalues + i*nmax; 1837 for (j=0; j<lens[i]; j++) { 1838 lrows[count++] = values[j] - base; 1839 } 1840 } 1841 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1842 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1843 ierr = PetscFree(owner);CHKERRQ(ierr); 1844 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1845 1846 /* fix right hand side if needed */ 1847 if (x && b) { 1848 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1849 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1850 for (i=0; i<slen; i++) { 1851 bb[lrows[i]] = diag*xx[lrows[i]]; 1852 } 1853 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1854 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1855 } 1856 1857 /* actually zap the local rows */ 1858 /* 1859 Zero the required rows. If the "diagonal block" of the matrix 1860 is square and the user wishes to set the diagonal we use separate 1861 code so that MatSetValues() is not called for each diagonal allocating 1862 new memory, thus calling lots of mallocs and slowing things down. 1863 1864 */ 1865 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1866 ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1867 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1868 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 1869 } else if (diag != 0.0) { 1870 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1871 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1872 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1873 for (i=0; i<slen; i++) { 1874 row = lrows[i] + rstart_bs; 1875 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1876 } 1877 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1878 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1879 } else { 1880 ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 1881 } 1882 1883 ierr = PetscFree(lrows);CHKERRQ(ierr); 1884 1885 /* wait on sends */ 1886 if (nsends) { 1887 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1888 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1889 ierr = PetscFree(send_status);CHKERRQ(ierr); 1890 } 1891 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1892 ierr = PetscFree(svalues);CHKERRQ(ierr); 1893 PetscFunctionReturn(0); 1894 } 1895 1896 #undef __FUNCT__ 1897 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1898 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1899 { 1900 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1901 PetscErrorCode ierr; 1902 1903 PetscFunctionBegin; 1904 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1905 PetscFunctionReturn(0); 1906 } 1907 1908 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1909 1910 #undef __FUNCT__ 1911 #define __FUNCT__ "MatEqual_MPIBAIJ" 1912 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1913 { 1914 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1915 Mat a,b,c,d; 1916 PetscBool flg; 1917 PetscErrorCode ierr; 1918 1919 PetscFunctionBegin; 1920 a = matA->A; b = matA->B; 1921 c = matB->A; d = matB->B; 1922 1923 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1924 if (flg) { 1925 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1926 } 1927 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1928 PetscFunctionReturn(0); 1929 } 1930 1931 #undef __FUNCT__ 1932 #define __FUNCT__ "MatCopy_MPIBAIJ" 1933 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1934 { 1935 PetscErrorCode ierr; 1936 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1937 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1938 1939 PetscFunctionBegin; 1940 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1941 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1942 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1943 } else { 1944 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1945 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1946 } 1947 PetscFunctionReturn(0); 1948 } 1949 1950 #undef __FUNCT__ 1951 #define __FUNCT__ "MatSetUp_MPIBAIJ" 1952 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1953 { 1954 PetscErrorCode ierr; 1955 1956 PetscFunctionBegin; 1957 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1958 PetscFunctionReturn(0); 1959 } 1960 1961 #undef __FUNCT__ 1962 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1963 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1964 { 1965 PetscErrorCode ierr; 1966 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 1967 PetscBLASInt bnz,one=1; 1968 Mat_SeqBAIJ *x,*y; 1969 1970 PetscFunctionBegin; 1971 if (str == SAME_NONZERO_PATTERN) { 1972 PetscScalar alpha = a; 1973 x = (Mat_SeqBAIJ*)xx->A->data; 1974 y = (Mat_SeqBAIJ*)yy->A->data; 1975 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 1976 PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1977 x = (Mat_SeqBAIJ*)xx->B->data; 1978 y = (Mat_SeqBAIJ*)yy->B->data; 1979 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 1980 PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1981 } else { 1982 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1983 } 1984 PetscFunctionReturn(0); 1985 } 1986 1987 #undef __FUNCT__ 1988 #define __FUNCT__ "MatRealPart_MPIBAIJ" 1989 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1990 { 1991 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1992 PetscErrorCode ierr; 1993 1994 PetscFunctionBegin; 1995 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1996 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1997 PetscFunctionReturn(0); 1998 } 1999 2000 #undef __FUNCT__ 2001 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2002 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2003 { 2004 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2005 PetscErrorCode ierr; 2006 2007 PetscFunctionBegin; 2008 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2009 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2010 PetscFunctionReturn(0); 2011 } 2012 2013 #undef __FUNCT__ 2014 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2015 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2016 { 2017 PetscErrorCode ierr; 2018 IS iscol_local; 2019 PetscInt csize; 2020 2021 PetscFunctionBegin; 2022 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2023 if (call == MAT_REUSE_MATRIX) { 2024 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2025 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2026 } else { 2027 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2028 } 2029 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2030 if (call == MAT_INITIAL_MATRIX) { 2031 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2032 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2033 } 2034 PetscFunctionReturn(0); 2035 } 2036 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*); 2037 #undef __FUNCT__ 2038 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2039 /* 2040 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2041 in local and then by concatenating the local matrices the end result. 2042 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2043 */ 2044 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2045 { 2046 PetscErrorCode ierr; 2047 PetscMPIInt rank,size; 2048 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2049 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow; 2050 Mat M,Mreuse; 2051 MatScalar *vwork,*aa; 2052 MPI_Comm comm = ((PetscObject)mat)->comm; 2053 IS isrow_new, iscol_new; 2054 PetscBool idflag,allrows, allcols; 2055 Mat_SeqBAIJ *aij; 2056 2057 PetscFunctionBegin; 2058 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2059 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2060 /* The compression and expansion should be avoided. Doesn't point 2061 out errors, might change the indices, hence buggey */ 2062 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 2063 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 2064 2065 /* Check for special case: each processor gets entire matrix columns */ 2066 ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr); 2067 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 2068 if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE; 2069 else allcols = PETSC_FALSE; 2070 2071 ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr); 2072 ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr); 2073 if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE; 2074 else allrows = PETSC_FALSE; 2075 2076 if (call == MAT_REUSE_MATRIX) { 2077 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 2078 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2079 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2080 } else { 2081 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2082 } 2083 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 2084 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 2085 /* 2086 m - number of local rows 2087 n - number of columns (same on all processors) 2088 rstart - first row in new global matrix generated 2089 */ 2090 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2091 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2092 m = m/bs; 2093 n = n/bs; 2094 2095 if (call == MAT_INITIAL_MATRIX) { 2096 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2097 ii = aij->i; 2098 jj = aij->j; 2099 2100 /* 2101 Determine the number of non-zeros in the diagonal and off-diagonal 2102 portions of the matrix in order to do correct preallocation 2103 */ 2104 2105 /* first get start and end of "diagonal" columns */ 2106 if (csize == PETSC_DECIDE) { 2107 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2108 if (mglobal == n*bs) { /* square matrix */ 2109 nlocal = m; 2110 } else { 2111 nlocal = n/size + ((n % size) > rank); 2112 } 2113 } else { 2114 nlocal = csize/bs; 2115 } 2116 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2117 rstart = rend - nlocal; 2118 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2119 2120 /* next, compute all the lengths */ 2121 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2122 olens = dlens + m; 2123 for (i=0; i<m; i++) { 2124 jend = ii[i+1] - ii[i]; 2125 olen = 0; 2126 dlen = 0; 2127 for (j=0; j<jend; j++) { 2128 if (*jj < rstart || *jj >= rend) olen++; 2129 else dlen++; 2130 jj++; 2131 } 2132 olens[i] = olen; 2133 dlens[i] = dlen; 2134 } 2135 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2136 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2137 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2138 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2139 ierr = PetscFree(dlens);CHKERRQ(ierr); 2140 } else { 2141 PetscInt ml,nl; 2142 2143 M = *newmat; 2144 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2145 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2146 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2147 /* 2148 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2149 rather than the slower MatSetValues(). 2150 */ 2151 M->was_assembled = PETSC_TRUE; 2152 M->assembled = PETSC_FALSE; 2153 } 2154 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2155 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2156 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2157 ii = aij->i; 2158 jj = aij->j; 2159 aa = aij->a; 2160 for (i=0; i<m; i++) { 2161 row = rstart/bs + i; 2162 nz = ii[i+1] - ii[i]; 2163 cwork = jj; jj += nz; 2164 vwork = aa; aa += nz*bs*bs; 2165 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2166 } 2167 2168 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2169 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2170 *newmat = M; 2171 2172 /* save submatrix used in processor for next request */ 2173 if (call == MAT_INITIAL_MATRIX) { 2174 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2175 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2176 } 2177 PetscFunctionReturn(0); 2178 } 2179 2180 #undef __FUNCT__ 2181 #define __FUNCT__ "MatPermute_MPIBAIJ" 2182 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2183 { 2184 MPI_Comm comm,pcomm; 2185 PetscInt first,rlocal_size,clocal_size,nrows; 2186 const PetscInt *rows; 2187 PetscMPIInt size; 2188 IS crowp,growp,irowp,lrowp,lcolp; 2189 PetscErrorCode ierr; 2190 2191 PetscFunctionBegin; 2192 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2193 /* make a collective version of 'rowp' */ 2194 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2195 if (pcomm==comm) { 2196 crowp = rowp; 2197 } else { 2198 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2199 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2200 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2201 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2202 } 2203 /* collect the global row permutation and invert it */ 2204 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 2205 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 2206 if (pcomm!=comm) { 2207 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2208 } 2209 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 2210 ierr = ISDestroy(&growp);CHKERRQ(ierr); 2211 /* get the local target indices */ 2212 ierr = MatGetOwnershipRange(A,&first,NULL);CHKERRQ(ierr); 2213 ierr = MatGetLocalSize(A,&rlocal_size,&clocal_size);CHKERRQ(ierr); 2214 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 2215 ierr = ISCreateGeneral(MPI_COMM_SELF,rlocal_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 2216 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 2217 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 2218 /* the column permutation is so much easier; 2219 make a local version of 'colp' and invert it */ 2220 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2221 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2222 if (size==1) { 2223 lcolp = colp; 2224 } else { 2225 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2226 } 2227 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2228 /* now we just get the submatrix */ 2229 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2230 if (size>1) { 2231 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2232 } 2233 /* clean up */ 2234 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 2235 PetscFunctionReturn(0); 2236 } 2237 2238 #undef __FUNCT__ 2239 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2240 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2241 { 2242 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2243 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2244 2245 PetscFunctionBegin; 2246 if (nghosts) *nghosts = B->nbs; 2247 if (ghosts) *ghosts = baij->garray; 2248 PetscFunctionReturn(0); 2249 } 2250 2251 extern PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat); 2252 2253 #undef __FUNCT__ 2254 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ" 2255 /* 2256 This routine is almost identical to MatFDColoringCreate_MPIBAIJ()! 2257 */ 2258 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 2259 { 2260 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 2261 PetscErrorCode ierr; 2262 PetscMPIInt size,*ncolsonproc,*disp,nn; 2263 PetscInt bs,i,n,nrows,j,k,m,ncols,col; 2264 const PetscInt *is,*rows = 0,*A_ci,*A_cj,*B_ci,*B_cj,*ltog; 2265 PetscInt nis = iscoloring->n,nctot,*cols; 2266 PetscInt *rowhit,M,cstart,cend,colb; 2267 PetscInt *columnsforrow,l; 2268 IS *isa; 2269 PetscBool done,flg; 2270 ISLocalToGlobalMapping map = mat->cmap->bmapping; 2271 PetscInt ctype=c->ctype; 2272 2273 PetscFunctionBegin; 2274 if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();"); 2275 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock"); 2276 2277 if (map) {ierr = ISLocalToGlobalMappingGetIndices(map,<og);CHKERRQ(ierr);} 2278 else ltog = NULL; 2279 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 2280 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2281 2282 M = mat->rmap->n/bs; 2283 cstart = mat->cmap->rstart/bs; 2284 cend = mat->cmap->rend/bs; 2285 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 2286 c->N = mat->cmap->N/bs; 2287 c->m = mat->rmap->n/bs; 2288 c->rstart = mat->rmap->rstart/bs; 2289 2290 c->ncolors = nis; 2291 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 2292 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 2293 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 2294 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 2295 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 2296 ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 2297 2298 /* Allow access to data structures of local part of matrix */ 2299 if (!baij->colmap) { 2300 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 2301 } 2302 ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2303 ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2304 2305 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 2306 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 2307 2308 for (i=0; i<nis; i++) { 2309 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 2310 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 2311 2312 c->ncolumns[i] = n; 2313 if (n) { 2314 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 2315 ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr); 2316 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 2317 } else { 2318 c->columns[i] = 0; 2319 } 2320 2321 if (ctype == IS_COLORING_GLOBAL) { 2322 /* Determine the total (parallel) number of columns of this color */ 2323 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 2324 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 2325 2326 ierr = PetscMPIIntCast(n,&nn);CHKERRQ(ierr); 2327 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2328 nctot = 0; for (j=0; j<size; j++) nctot += ncolsonproc[j]; 2329 if (!nctot) { 2330 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 2331 } 2332 2333 disp[0] = 0; 2334 for (j=1; j<size; j++) { 2335 disp[j] = disp[j-1] + ncolsonproc[j-1]; 2336 } 2337 2338 /* Get complete list of columns for color on each processor */ 2339 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2340 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr); 2341 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 2342 } else if (ctype == IS_COLORING_GHOSTED) { 2343 /* Determine local number of columns of this color on this process, including ghost points */ 2344 nctot = n; 2345 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2346 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 2347 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 2348 2349 /* 2350 Mark all rows affect by these columns 2351 */ 2352 /* Temporary option to allow for debugging/testing */ 2353 flg = PETSC_FALSE; 2354 ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr); 2355 if (!flg) { /*-----------------------------------------------------------------------------*/ 2356 /* crude, fast version */ 2357 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 2358 /* loop over columns*/ 2359 for (j=0; j<nctot; j++) { 2360 if (ctype == IS_COLORING_GHOSTED) { 2361 col = ltog[cols[j]]; 2362 } else { 2363 col = cols[j]; 2364 } 2365 if (col >= cstart && col < cend) { 2366 /* column is in diagonal block of matrix */ 2367 rows = A_cj + A_ci[col-cstart]; 2368 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2369 } else { 2370 #if defined(PETSC_USE_CTABLE) 2371 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2372 colb--; 2373 #else 2374 colb = baij->colmap[col] - 1; 2375 #endif 2376 if (colb == -1) { 2377 m = 0; 2378 } else { 2379 colb = colb/bs; 2380 rows = B_cj + B_ci[colb]; 2381 m = B_ci[colb+1] - B_ci[colb]; 2382 } 2383 } 2384 /* loop over columns marking them in rowhit */ 2385 for (k=0; k<m; k++) { 2386 rowhit[*rows++] = col + 1; 2387 } 2388 } 2389 2390 /* count the number of hits */ 2391 nrows = 0; 2392 for (j=0; j<M; j++) { 2393 if (rowhit[j]) nrows++; 2394 } 2395 c->nrows[i] = nrows; 2396 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2397 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2398 ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2399 nrows = 0; 2400 for (j=0; j<M; j++) { 2401 if (rowhit[j]) { 2402 c->rows[i][nrows] = j; 2403 c->columnsforrow[i][nrows] = rowhit[j] - 1; 2404 nrows++; 2405 } 2406 } 2407 } else { /*-------------------------------------------------------------------------------*/ 2408 /* slow version, using rowhit as a linked list */ 2409 PetscInt currentcol,fm,mfm; 2410 rowhit[M] = M; 2411 nrows = 0; 2412 /* loop over columns*/ 2413 for (j=0; j<nctot; j++) { 2414 if (ctype == IS_COLORING_GHOSTED) { 2415 col = ltog[cols[j]]; 2416 } else { 2417 col = cols[j]; 2418 } 2419 if (col >= cstart && col < cend) { 2420 /* column is in diagonal block of matrix */ 2421 rows = A_cj + A_ci[col-cstart]; 2422 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 2423 } else { 2424 #if defined(PETSC_USE_CTABLE) 2425 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2426 colb--; 2427 #else 2428 colb = baij->colmap[col] - 1; 2429 #endif 2430 if (colb == -1) { 2431 m = 0; 2432 } else { 2433 colb = colb/bs; 2434 rows = B_cj + B_ci[colb]; 2435 m = B_ci[colb+1] - B_ci[colb]; 2436 } 2437 } 2438 2439 /* loop over columns marking them in rowhit */ 2440 fm = M; /* fm points to first entry in linked list */ 2441 for (k=0; k<m; k++) { 2442 currentcol = *rows++; 2443 /* is it already in the list? */ 2444 do { 2445 mfm = fm; 2446 fm = rowhit[fm]; 2447 } while (fm < currentcol); 2448 /* not in list so add it */ 2449 if (fm != currentcol) { 2450 nrows++; 2451 columnsforrow[currentcol] = col; 2452 /* next three lines insert new entry into linked list */ 2453 rowhit[mfm] = currentcol; 2454 rowhit[currentcol] = fm; 2455 fm = currentcol; 2456 /* fm points to present position in list since we know the columns are sorted */ 2457 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 2458 } 2459 } 2460 c->nrows[i] = nrows; 2461 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 2462 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 2463 ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 2464 /* now store the linked list of rows into c->rows[i] */ 2465 nrows = 0; 2466 fm = rowhit[M]; 2467 do { 2468 c->rows[i][nrows] = fm; 2469 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 2470 fm = rowhit[fm]; 2471 } while (fm < M); 2472 } /* ---------------------------------------------------------------------------------------*/ 2473 ierr = PetscFree(cols);CHKERRQ(ierr); 2474 } 2475 2476 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 2477 /* 2478 vscale will contain the "diagonal" on processor scalings followed by the off processor 2479 */ 2480 if (ctype == IS_COLORING_GLOBAL) { 2481 PetscInt *garray; 2482 ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2483 for (i=0; i<baij->B->cmap->n/bs; i++) { 2484 for (j=0; j<bs; j++) { 2485 garray[i*bs+j] = bs*baij->garray[i]+j; 2486 } 2487 } 2488 ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 2489 ierr = PetscFree(garray);CHKERRQ(ierr); 2490 CHKMEMQ; 2491 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2492 for (k=0; k<c->ncolors; k++) { 2493 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2494 for (l=0; l<c->nrows[k]; l++) { 2495 col = c->columnsforrow[k][l]; 2496 if (col >= cstart && col < cend) { 2497 /* column is in diagonal block of matrix */ 2498 colb = col - cstart; 2499 } else { 2500 /* column is in "off-processor" part */ 2501 #if defined(PETSC_USE_CTABLE) 2502 ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr); 2503 colb--; 2504 #else 2505 colb = baij->colmap[col] - 1; 2506 #endif 2507 colb = colb/bs; 2508 colb += cend - cstart; 2509 } 2510 c->vscaleforrow[k][l] = colb; 2511 } 2512 } 2513 } else if (ctype == IS_COLORING_GHOSTED) { 2514 /* Get gtol mapping */ 2515 PetscInt N = mat->cmap->N,nlocal,*gtol; 2516 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 2517 for (i=0; i<N; i++) gtol[i] = -1; 2518 ierr = ISLocalToGlobalMappingGetSize(map,&nlocal);CHKERRQ(ierr); 2519 for (i=0; i<nlocal; i++) gtol[ltog[i]] = i; 2520 2521 c->vscale = 0; /* will be created in MatFDColoringApply() */ 2522 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 2523 for (k=0; k<c->ncolors; k++) { 2524 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 2525 for (l=0; l<c->nrows[k]; l++) { 2526 col = c->columnsforrow[k][l]; /* global column index */ 2527 2528 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 2529 } 2530 } 2531 ierr = PetscFree(gtol);CHKERRQ(ierr); 2532 } 2533 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 2534 2535 ierr = PetscFree(rowhit);CHKERRQ(ierr); 2536 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 2537 ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 2538 ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 2539 if (map) {ierr = ISLocalToGlobalMappingRestoreIndices(map,<og);CHKERRQ(ierr);} 2540 CHKMEMQ; 2541 PetscFunctionReturn(0); 2542 } 2543 2544 #undef __FUNCT__ 2545 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2546 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2547 { 2548 Mat B; 2549 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2550 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2551 Mat_SeqAIJ *b; 2552 PetscErrorCode ierr; 2553 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2554 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2555 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2556 2557 PetscFunctionBegin; 2558 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2559 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 2560 2561 /* ---------------------------------------------------------------- 2562 Tell every processor the number of nonzeros per row 2563 */ 2564 ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 2565 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2566 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2567 } 2568 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2569 ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr); 2570 displs = recvcounts + size; 2571 for (i=0; i<size; i++) { 2572 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2573 displs[i] = A->rmap->range[i]/bs; 2574 } 2575 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2576 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2577 #else 2578 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2579 #endif 2580 /* --------------------------------------------------------------- 2581 Create the sequential matrix of the same type as the local block diagonal 2582 */ 2583 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2584 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2585 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2586 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2587 b = (Mat_SeqAIJ*)B->data; 2588 2589 /*-------------------------------------------------------------------- 2590 Copy my part of matrix column indices over 2591 */ 2592 sendcount = ad->nz + bd->nz; 2593 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2594 a_jsendbuf = ad->j; 2595 b_jsendbuf = bd->j; 2596 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2597 cnt = 0; 2598 for (i=0; i<n; i++) { 2599 2600 /* put in lower diagonal portion */ 2601 m = bd->i[i+1] - bd->i[i]; 2602 while (m > 0) { 2603 /* is it above diagonal (in bd (compressed) numbering) */ 2604 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2605 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2606 m--; 2607 } 2608 2609 /* put in diagonal portion */ 2610 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2611 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2612 } 2613 2614 /* put in upper diagonal portion */ 2615 while (m-- > 0) { 2616 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2617 } 2618 } 2619 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2620 2621 /*-------------------------------------------------------------------- 2622 Gather all column indices to all processors 2623 */ 2624 for (i=0; i<size; i++) { 2625 recvcounts[i] = 0; 2626 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2627 recvcounts[i] += lens[j]; 2628 } 2629 } 2630 displs[0] = 0; 2631 for (i=1; i<size; i++) { 2632 displs[i] = displs[i-1] + recvcounts[i-1]; 2633 } 2634 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2635 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2636 #else 2637 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr); 2638 #endif 2639 /*-------------------------------------------------------------------- 2640 Assemble the matrix into useable form (note numerical values not yet set) 2641 */ 2642 /* set the b->ilen (length of each row) values */ 2643 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2644 /* set the b->i indices */ 2645 b->i[0] = 0; 2646 for (i=1; i<=A->rmap->N/bs; i++) { 2647 b->i[i] = b->i[i-1] + lens[i-1]; 2648 } 2649 ierr = PetscFree(lens);CHKERRQ(ierr); 2650 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2651 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2652 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2653 2654 if (A->symmetric) { 2655 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2656 } else if (A->hermitian) { 2657 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2658 } else if (A->structurally_symmetric) { 2659 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2660 } 2661 *newmat = B; 2662 PetscFunctionReturn(0); 2663 } 2664 2665 #undef __FUNCT__ 2666 #define __FUNCT__ "MatSOR_MPIBAIJ" 2667 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2668 { 2669 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2670 PetscErrorCode ierr; 2671 Vec bb1 = 0; 2672 2673 PetscFunctionBegin; 2674 if (flag == SOR_APPLY_UPPER) { 2675 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2676 PetscFunctionReturn(0); 2677 } 2678 2679 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2680 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2681 } 2682 2683 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2684 if (flag & SOR_ZERO_INITIAL_GUESS) { 2685 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2686 its--; 2687 } 2688 2689 while (its--) { 2690 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2691 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2692 2693 /* update rhs: bb1 = bb - B*x */ 2694 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2695 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2696 2697 /* local sweep */ 2698 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2699 } 2700 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2701 if (flag & SOR_ZERO_INITIAL_GUESS) { 2702 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2703 its--; 2704 } 2705 while (its--) { 2706 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2707 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2708 2709 /* update rhs: bb1 = bb - B*x */ 2710 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2711 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2712 2713 /* local sweep */ 2714 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2715 } 2716 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2717 if (flag & SOR_ZERO_INITIAL_GUESS) { 2718 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2719 its--; 2720 } 2721 while (its--) { 2722 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2723 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2724 2725 /* update rhs: bb1 = bb - B*x */ 2726 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2727 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2728 2729 /* local sweep */ 2730 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2731 } 2732 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2733 2734 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2735 PetscFunctionReturn(0); 2736 } 2737 2738 extern PetscErrorCode MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2739 2740 #undef __FUNCT__ 2741 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2742 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2743 { 2744 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2745 PetscErrorCode ierr; 2746 2747 PetscFunctionBegin; 2748 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2749 PetscFunctionReturn(0); 2750 } 2751 2752 2753 /* -------------------------------------------------------------------*/ 2754 static struct _MatOps MatOps_Values = { 2755 MatSetValues_MPIBAIJ, 2756 MatGetRow_MPIBAIJ, 2757 MatRestoreRow_MPIBAIJ, 2758 MatMult_MPIBAIJ, 2759 /* 4*/ MatMultAdd_MPIBAIJ, 2760 MatMultTranspose_MPIBAIJ, 2761 MatMultTransposeAdd_MPIBAIJ, 2762 0, 2763 0, 2764 0, 2765 /*10*/ 0, 2766 0, 2767 0, 2768 MatSOR_MPIBAIJ, 2769 MatTranspose_MPIBAIJ, 2770 /*15*/ MatGetInfo_MPIBAIJ, 2771 MatEqual_MPIBAIJ, 2772 MatGetDiagonal_MPIBAIJ, 2773 MatDiagonalScale_MPIBAIJ, 2774 MatNorm_MPIBAIJ, 2775 /*20*/ MatAssemblyBegin_MPIBAIJ, 2776 MatAssemblyEnd_MPIBAIJ, 2777 MatSetOption_MPIBAIJ, 2778 MatZeroEntries_MPIBAIJ, 2779 /*24*/ MatZeroRows_MPIBAIJ, 2780 0, 2781 0, 2782 0, 2783 0, 2784 /*29*/ MatSetUp_MPIBAIJ, 2785 0, 2786 0, 2787 0, 2788 0, 2789 /*34*/ MatDuplicate_MPIBAIJ, 2790 0, 2791 0, 2792 0, 2793 0, 2794 /*39*/ MatAXPY_MPIBAIJ, 2795 MatGetSubMatrices_MPIBAIJ, 2796 MatIncreaseOverlap_MPIBAIJ, 2797 MatGetValues_MPIBAIJ, 2798 MatCopy_MPIBAIJ, 2799 /*44*/ 0, 2800 MatScale_MPIBAIJ, 2801 0, 2802 0, 2803 0, 2804 /*49*/ 0, 2805 0, 2806 0, 2807 0, 2808 0, 2809 /*54*/ MatFDColoringCreate_MPIBAIJ, 2810 0, 2811 MatSetUnfactored_MPIBAIJ, 2812 MatPermute_MPIBAIJ, 2813 MatSetValuesBlocked_MPIBAIJ, 2814 /*59*/ MatGetSubMatrix_MPIBAIJ, 2815 MatDestroy_MPIBAIJ, 2816 MatView_MPIBAIJ, 2817 0, 2818 0, 2819 /*64*/ 0, 2820 0, 2821 0, 2822 0, 2823 0, 2824 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2825 0, 2826 0, 2827 0, 2828 0, 2829 /*74*/ 0, 2830 MatFDColoringApply_BAIJ, 2831 0, 2832 0, 2833 0, 2834 /*79*/ 0, 2835 0, 2836 0, 2837 0, 2838 MatLoad_MPIBAIJ, 2839 /*84*/ 0, 2840 0, 2841 0, 2842 0, 2843 0, 2844 /*89*/ 0, 2845 0, 2846 0, 2847 0, 2848 0, 2849 /*94*/ 0, 2850 0, 2851 0, 2852 0, 2853 0, 2854 /*99*/ 0, 2855 0, 2856 0, 2857 0, 2858 0, 2859 /*104*/0, 2860 MatRealPart_MPIBAIJ, 2861 MatImaginaryPart_MPIBAIJ, 2862 0, 2863 0, 2864 /*109*/0, 2865 0, 2866 0, 2867 0, 2868 0, 2869 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2870 0, 2871 MatGetGhosts_MPIBAIJ, 2872 0, 2873 0, 2874 /*119*/0, 2875 0, 2876 0, 2877 0, 2878 0, 2879 /*124*/0, 2880 0, 2881 MatInvertBlockDiagonal_MPIBAIJ 2882 }; 2883 2884 EXTERN_C_BEGIN 2885 #undef __FUNCT__ 2886 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2887 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2888 { 2889 PetscFunctionBegin; 2890 *a = ((Mat_MPIBAIJ*)A->data)->A; 2891 PetscFunctionReturn(0); 2892 } 2893 EXTERN_C_END 2894 2895 EXTERN_C_BEGIN 2896 extern PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2897 EXTERN_C_END 2898 2899 EXTERN_C_BEGIN 2900 #undef __FUNCT__ 2901 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2902 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2903 { 2904 PetscInt m,rstart,cstart,cend; 2905 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2906 const PetscInt *JJ =0; 2907 PetscScalar *values=0; 2908 PetscErrorCode ierr; 2909 2910 PetscFunctionBegin; 2911 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2912 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2913 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2914 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2915 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2916 m = B->rmap->n/bs; 2917 rstart = B->rmap->rstart/bs; 2918 cstart = B->cmap->rstart/bs; 2919 cend = B->cmap->rend/bs; 2920 2921 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2922 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 2923 for (i=0; i<m; i++) { 2924 nz = ii[i+1] - ii[i]; 2925 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2926 nz_max = PetscMax(nz_max,nz); 2927 JJ = jj + ii[i]; 2928 for (j=0; j<nz; j++) { 2929 if (*JJ >= cstart) break; 2930 JJ++; 2931 } 2932 d = 0; 2933 for (; j<nz; j++) { 2934 if (*JJ++ >= cend) break; 2935 d++; 2936 } 2937 d_nnz[i] = d; 2938 o_nnz[i] = nz - d; 2939 } 2940 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2941 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2942 2943 values = (PetscScalar*)V; 2944 if (!values) { 2945 ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2946 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2947 } 2948 for (i=0; i<m; i++) { 2949 PetscInt row = i + rstart; 2950 PetscInt ncols = ii[i+1] - ii[i]; 2951 const PetscInt *icols = jj + ii[i]; 2952 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2953 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2954 } 2955 2956 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2957 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2958 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2959 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2960 PetscFunctionReturn(0); 2961 } 2962 EXTERN_C_END 2963 2964 #undef __FUNCT__ 2965 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2966 /*@C 2967 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2968 (the default parallel PETSc format). 2969 2970 Collective on MPI_Comm 2971 2972 Input Parameters: 2973 + A - the matrix 2974 . bs - the block size 2975 . i - the indices into j for the start of each local row (starts with zero) 2976 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2977 - v - optional values in the matrix 2978 2979 Level: developer 2980 2981 .keywords: matrix, aij, compressed row, sparse, parallel 2982 2983 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 2984 @*/ 2985 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2986 { 2987 PetscErrorCode ierr; 2988 2989 PetscFunctionBegin; 2990 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2991 PetscValidType(B,1); 2992 PetscValidLogicalCollectiveInt(B,bs,2); 2993 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2994 PetscFunctionReturn(0); 2995 } 2996 2997 EXTERN_C_BEGIN 2998 #undef __FUNCT__ 2999 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 3000 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz) 3001 { 3002 Mat_MPIBAIJ *b; 3003 PetscErrorCode ierr; 3004 PetscInt i; 3005 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3006 3007 PetscFunctionBegin; 3008 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3009 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3010 3011 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3012 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3013 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3014 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3015 3016 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 3017 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 3018 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3019 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3020 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 3021 3022 if (d_nnz) { 3023 for (i=0; i<B->rmap->n/bs; i++) { 3024 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 3025 } 3026 } 3027 if (o_nnz) { 3028 for (i=0; i<B->rmap->n/bs; i++) { 3029 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 3030 } 3031 } 3032 3033 b = (Mat_MPIBAIJ*)B->data; 3034 b->bs2 = bs*bs; 3035 b->mbs = B->rmap->n/bs; 3036 b->nbs = B->cmap->n/bs; 3037 b->Mbs = B->rmap->N/bs; 3038 b->Nbs = B->cmap->N/bs; 3039 3040 for (i=0; i<=b->size; i++) { 3041 b->rangebs[i] = B->rmap->range[i]/bs; 3042 } 3043 b->rstartbs = B->rmap->rstart/bs; 3044 b->rendbs = B->rmap->rend/bs; 3045 b->cstartbs = B->cmap->rstart/bs; 3046 b->cendbs = B->cmap->rend/bs; 3047 3048 if (!B->preallocated) { 3049 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3050 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3051 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 3052 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3053 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3054 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3055 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 3056 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3057 ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr); 3058 } 3059 3060 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3061 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 3062 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3063 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3064 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3065 B->preallocated = PETSC_TRUE; 3066 PetscFunctionReturn(0); 3067 } 3068 EXTERN_C_END 3069 3070 EXTERN_C_BEGIN 3071 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 3072 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 3073 EXTERN_C_END 3074 3075 3076 EXTERN_C_BEGIN 3077 #undef __FUNCT__ 3078 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 3079 PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 3080 { 3081 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 3082 PetscErrorCode ierr; 3083 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 3084 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 3085 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 3086 3087 PetscFunctionBegin; 3088 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr); 3089 ii[0] = 0; 3090 CHKMEMQ; 3091 for (i=0; i<M; i++) { 3092 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 3093 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 3094 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 3095 /* remove one from count of matrix has diagonal */ 3096 for (j=id[i]; j<id[i+1]; j++) { 3097 if (jd[j] == i) {ii[i+1]--;break;} 3098 } 3099 CHKMEMQ; 3100 } 3101 ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr); 3102 cnt = 0; 3103 for (i=0; i<M; i++) { 3104 for (j=io[i]; j<io[i+1]; j++) { 3105 if (garray[jo[j]] > rstart) break; 3106 jj[cnt++] = garray[jo[j]]; 3107 CHKMEMQ; 3108 } 3109 for (k=id[i]; k<id[i+1]; k++) { 3110 if (jd[k] != i) { 3111 jj[cnt++] = rstart + jd[k]; 3112 CHKMEMQ; 3113 } 3114 } 3115 for (; j<io[i+1]; j++) { 3116 jj[cnt++] = garray[jo[j]]; 3117 CHKMEMQ; 3118 } 3119 } 3120 ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 3121 PetscFunctionReturn(0); 3122 } 3123 EXTERN_C_END 3124 3125 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3126 EXTERN_C_BEGIN 3127 PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 3128 EXTERN_C_END 3129 3130 EXTERN_C_BEGIN 3131 #undef __FUNCT__ 3132 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 3133 PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 3134 { 3135 PetscErrorCode ierr; 3136 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3137 Mat B; 3138 Mat_MPIAIJ *b; 3139 3140 PetscFunctionBegin; 3141 if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled"); 3142 3143 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 3144 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3145 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 3146 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3147 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 3148 b = (Mat_MPIAIJ*) B->data; 3149 3150 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 3151 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 3152 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 3153 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 3154 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 3155 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3156 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3157 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3158 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3159 if (reuse == MAT_REUSE_MATRIX) { 3160 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 3161 } else { 3162 *newmat = B; 3163 } 3164 PetscFunctionReturn(0); 3165 } 3166 EXTERN_C_END 3167 3168 EXTERN_C_BEGIN 3169 #if defined(PETSC_HAVE_MUMPS) 3170 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3171 #endif 3172 EXTERN_C_END 3173 3174 /*MC 3175 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3176 3177 Options Database Keys: 3178 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3179 . -mat_block_size <bs> - set the blocksize used to store the matrix 3180 - -mat_use_hash_table <fact> 3181 3182 Level: beginner 3183 3184 .seealso: MatCreateMPIBAIJ 3185 M*/ 3186 3187 EXTERN_C_BEGIN 3188 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 3189 EXTERN_C_END 3190 3191 EXTERN_C_BEGIN 3192 #undef __FUNCT__ 3193 #define __FUNCT__ "MatCreate_MPIBAIJ" 3194 PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3195 { 3196 Mat_MPIBAIJ *b; 3197 PetscErrorCode ierr; 3198 PetscBool flg; 3199 3200 PetscFunctionBegin; 3201 ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr); 3202 B->data = (void*)b; 3203 3204 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3205 B->assembled = PETSC_FALSE; 3206 3207 B->insertmode = NOT_SET_VALUES; 3208 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 3209 ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr); 3210 3211 /* build local table of row and column ownerships */ 3212 ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr); 3213 3214 /* build cache for off array entries formed */ 3215 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 3216 3217 b->donotstash = PETSC_FALSE; 3218 b->colmap = NULL; 3219 b->garray = NULL; 3220 b->roworiented = PETSC_TRUE; 3221 3222 /* stuff used in block assembly */ 3223 b->barray = 0; 3224 3225 /* stuff used for matrix vector multiply */ 3226 b->lvec = 0; 3227 b->Mvctx = 0; 3228 3229 /* stuff for MatGetRow() */ 3230 b->rowindices = 0; 3231 b->rowvalues = 0; 3232 b->getrowactive = PETSC_FALSE; 3233 3234 /* hash table stuff */ 3235 b->ht = 0; 3236 b->hd = 0; 3237 b->ht_size = 0; 3238 b->ht_flag = PETSC_FALSE; 3239 b->ht_fact = 0; 3240 b->ht_total_ct = 0; 3241 b->ht_insert_ct = 0; 3242 3243 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3244 b->ijonly = PETSC_FALSE; 3245 3246 ierr = PetscOptionsBegin(((PetscObject)B)->comm,NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3247 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr); 3248 if (flg) { 3249 PetscReal fact = 1.39; 3250 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3251 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 3252 if (fact <= 1.0) fact = 1.39; 3253 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3254 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3255 } 3256 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3257 3258 #if defined(PETSC_HAVE_MUMPS) 3259 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3260 #endif 3261 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C", 3262 "MatConvert_MPIBAIJ_MPIAdj", 3263 MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3264 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C", 3265 "MatConvert_MPIBAIJ_MPIAIJ", 3266 MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3267 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C", 3268 "MatConvert_MPIBAIJ_MPISBAIJ", 3269 MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3270 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3271 "MatStoreValues_MPIBAIJ", 3272 MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3273 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3274 "MatRetrieveValues_MPIBAIJ", 3275 MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3276 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3277 "MatGetDiagonalBlock_MPIBAIJ", 3278 MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3279 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C", 3280 "MatMPIBAIJSetPreallocation_MPIBAIJ", 3281 MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3282 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C", 3283 "MatMPIBAIJSetPreallocationCSR_MPIBAIJ", 3284 MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3285 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3286 "MatDiagonalScaleLocal_MPIBAIJ", 3287 MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3288 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C", 3289 "MatSetHashTableFactor_MPIBAIJ", 3290 MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3291 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C", 3292 "MatConvert_MPIBAIJ_MPIBSTRM", 3293 MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 3294 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3295 PetscFunctionReturn(0); 3296 } 3297 EXTERN_C_END 3298 3299 /*MC 3300 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3301 3302 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3303 and MATMPIBAIJ otherwise. 3304 3305 Options Database Keys: 3306 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3307 3308 Level: beginner 3309 3310 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3311 M*/ 3312 3313 #undef __FUNCT__ 3314 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3315 /*@C 3316 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3317 (block compressed row). For good matrix assembly performance 3318 the user should preallocate the matrix storage by setting the parameters 3319 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3320 performance can be increased by more than a factor of 50. 3321 3322 Collective on Mat 3323 3324 Input Parameters: 3325 + A - the matrix 3326 . bs - size of blockk 3327 . d_nz - number of block nonzeros per block row in diagonal portion of local 3328 submatrix (same for all local rows) 3329 . d_nnz - array containing the number of block nonzeros in the various block rows 3330 of the in diagonal portion of the local (possibly different for each block 3331 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3332 set it even if it is zero. 3333 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3334 submatrix (same for all local rows). 3335 - o_nnz - array containing the number of nonzeros in the various block rows of the 3336 off-diagonal portion of the local submatrix (possibly different for 3337 each block row) or NULL. 3338 3339 If the *_nnz parameter is given then the *_nz parameter is ignored 3340 3341 Options Database Keys: 3342 + -mat_block_size - size of the blocks to use 3343 - -mat_use_hash_table <fact> 3344 3345 Notes: 3346 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3347 than it must be used on all processors that share the object for that argument. 3348 3349 Storage Information: 3350 For a square global matrix we define each processor's diagonal portion 3351 to be its local rows and the corresponding columns (a square submatrix); 3352 each processor's off-diagonal portion encompasses the remainder of the 3353 local matrix (a rectangular submatrix). 3354 3355 The user can specify preallocated storage for the diagonal part of 3356 the local submatrix with either d_nz or d_nnz (not both). Set 3357 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3358 memory allocation. Likewise, specify preallocated storage for the 3359 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3360 3361 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3362 the figure below we depict these three local rows and all columns (0-11). 3363 3364 .vb 3365 0 1 2 3 4 5 6 7 8 9 10 11 3366 ------------------- 3367 row 3 | o o o d d d o o o o o o 3368 row 4 | o o o d d d o o o o o o 3369 row 5 | o o o d d d o o o o o o 3370 ------------------- 3371 .ve 3372 3373 Thus, any entries in the d locations are stored in the d (diagonal) 3374 submatrix, and any entries in the o locations are stored in the 3375 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3376 stored simply in the MATSEQBAIJ format for compressed row storage. 3377 3378 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3379 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3380 In general, for PDE problems in which most nonzeros are near the diagonal, 3381 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3382 or you will get TERRIBLE performance; see the users' manual chapter on 3383 matrices. 3384 3385 You can call MatGetInfo() to get information on how effective the preallocation was; 3386 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3387 You can also run with the option -info and look for messages with the string 3388 malloc in them to see if additional memory allocation was needed. 3389 3390 Level: intermediate 3391 3392 .keywords: matrix, block, aij, compressed row, sparse, parallel 3393 3394 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3395 @*/ 3396 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3397 { 3398 PetscErrorCode ierr; 3399 3400 PetscFunctionBegin; 3401 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3402 PetscValidType(B,1); 3403 PetscValidLogicalCollectiveInt(B,bs,2); 3404 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3405 PetscFunctionReturn(0); 3406 } 3407 3408 #undef __FUNCT__ 3409 #define __FUNCT__ "MatCreateBAIJ" 3410 /*@C 3411 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3412 (block compressed row). For good matrix assembly performance 3413 the user should preallocate the matrix storage by setting the parameters 3414 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3415 performance can be increased by more than a factor of 50. 3416 3417 Collective on MPI_Comm 3418 3419 Input Parameters: 3420 + comm - MPI communicator 3421 . bs - size of blockk 3422 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3423 This value should be the same as the local size used in creating the 3424 y vector for the matrix-vector product y = Ax. 3425 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3426 This value should be the same as the local size used in creating the 3427 x vector for the matrix-vector product y = Ax. 3428 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3429 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3430 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3431 submatrix (same for all local rows) 3432 . d_nnz - array containing the number of nonzero blocks in the various block rows 3433 of the in diagonal portion of the local (possibly different for each block 3434 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3435 and set it even if it is zero. 3436 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3437 submatrix (same for all local rows). 3438 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3439 off-diagonal portion of the local submatrix (possibly different for 3440 each block row) or NULL. 3441 3442 Output Parameter: 3443 . A - the matrix 3444 3445 Options Database Keys: 3446 + -mat_block_size - size of the blocks to use 3447 - -mat_use_hash_table <fact> 3448 3449 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3450 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3451 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3452 3453 Notes: 3454 If the *_nnz parameter is given then the *_nz parameter is ignored 3455 3456 A nonzero block is any block that as 1 or more nonzeros in it 3457 3458 The user MUST specify either the local or global matrix dimensions 3459 (possibly both). 3460 3461 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3462 than it must be used on all processors that share the object for that argument. 3463 3464 Storage Information: 3465 For a square global matrix we define each processor's diagonal portion 3466 to be its local rows and the corresponding columns (a square submatrix); 3467 each processor's off-diagonal portion encompasses the remainder of the 3468 local matrix (a rectangular submatrix). 3469 3470 The user can specify preallocated storage for the diagonal part of 3471 the local submatrix with either d_nz or d_nnz (not both). Set 3472 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3473 memory allocation. Likewise, specify preallocated storage for the 3474 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3475 3476 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3477 the figure below we depict these three local rows and all columns (0-11). 3478 3479 .vb 3480 0 1 2 3 4 5 6 7 8 9 10 11 3481 ------------------- 3482 row 3 | o o o d d d o o o o o o 3483 row 4 | o o o d d d o o o o o o 3484 row 5 | o o o d d d o o o o o o 3485 ------------------- 3486 .ve 3487 3488 Thus, any entries in the d locations are stored in the d (diagonal) 3489 submatrix, and any entries in the o locations are stored in the 3490 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3491 stored simply in the MATSEQBAIJ format for compressed row storage. 3492 3493 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3494 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3495 In general, for PDE problems in which most nonzeros are near the diagonal, 3496 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3497 or you will get TERRIBLE performance; see the users' manual chapter on 3498 matrices. 3499 3500 Level: intermediate 3501 3502 .keywords: matrix, block, aij, compressed row, sparse, parallel 3503 3504 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3505 @*/ 3506 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3507 { 3508 PetscErrorCode ierr; 3509 PetscMPIInt size; 3510 3511 PetscFunctionBegin; 3512 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3513 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3514 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3515 if (size > 1) { 3516 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3517 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3518 } else { 3519 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3520 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3521 } 3522 PetscFunctionReturn(0); 3523 } 3524 3525 #undef __FUNCT__ 3526 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3527 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3528 { 3529 Mat mat; 3530 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3531 PetscErrorCode ierr; 3532 PetscInt len=0; 3533 3534 PetscFunctionBegin; 3535 *newmat = 0; 3536 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3537 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3538 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3539 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3540 3541 mat->factortype = matin->factortype; 3542 mat->preallocated = PETSC_TRUE; 3543 mat->assembled = PETSC_TRUE; 3544 mat->insertmode = NOT_SET_VALUES; 3545 3546 a = (Mat_MPIBAIJ*)mat->data; 3547 mat->rmap->bs = matin->rmap->bs; 3548 a->bs2 = oldmat->bs2; 3549 a->mbs = oldmat->mbs; 3550 a->nbs = oldmat->nbs; 3551 a->Mbs = oldmat->Mbs; 3552 a->Nbs = oldmat->Nbs; 3553 3554 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3555 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3556 3557 a->size = oldmat->size; 3558 a->rank = oldmat->rank; 3559 a->donotstash = oldmat->donotstash; 3560 a->roworiented = oldmat->roworiented; 3561 a->rowindices = 0; 3562 a->rowvalues = 0; 3563 a->getrowactive = PETSC_FALSE; 3564 a->barray = 0; 3565 a->rstartbs = oldmat->rstartbs; 3566 a->rendbs = oldmat->rendbs; 3567 a->cstartbs = oldmat->cstartbs; 3568 a->cendbs = oldmat->cendbs; 3569 3570 /* hash table stuff */ 3571 a->ht = 0; 3572 a->hd = 0; 3573 a->ht_size = 0; 3574 a->ht_flag = oldmat->ht_flag; 3575 a->ht_fact = oldmat->ht_fact; 3576 a->ht_total_ct = 0; 3577 a->ht_insert_ct = 0; 3578 3579 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3580 if (oldmat->colmap) { 3581 #if defined(PETSC_USE_CTABLE) 3582 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3583 #else 3584 ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3585 ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3586 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3587 #endif 3588 } else a->colmap = 0; 3589 3590 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3591 ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3592 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3593 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3594 } else a->garray = 0; 3595 3596 ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3597 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3598 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3599 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3600 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3601 3602 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3603 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3604 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3605 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3606 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3607 *newmat = mat; 3608 PetscFunctionReturn(0); 3609 } 3610 3611 #undef __FUNCT__ 3612 #define __FUNCT__ "MatLoad_MPIBAIJ" 3613 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3614 { 3615 PetscErrorCode ierr; 3616 int fd; 3617 PetscInt i,nz,j,rstart,rend; 3618 PetscScalar *vals,*buf; 3619 MPI_Comm comm = ((PetscObject)viewer)->comm; 3620 MPI_Status status; 3621 PetscMPIInt rank,size,maxnz; 3622 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3623 PetscInt *locrowlens = NULL,*procsnz = NULL,*browners = NULL; 3624 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3625 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3626 PetscInt *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount; 3627 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3628 3629 PetscFunctionBegin; 3630 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3631 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3632 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3633 3634 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3635 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3636 if (!rank) { 3637 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3638 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3639 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3640 } 3641 3642 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3643 3644 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3645 M = header[1]; N = header[2]; 3646 3647 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3648 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3649 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3650 3651 /* If global sizes are set, check if they are consistent with that given in the file */ 3652 if (sizesset) { 3653 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3654 } 3655 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3656 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3657 3658 if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices"); 3659 3660 /* 3661 This code adds extra rows to make sure the number of rows is 3662 divisible by the blocksize 3663 */ 3664 Mbs = M/bs; 3665 extra_rows = bs - M + bs*Mbs; 3666 if (extra_rows == bs) extra_rows = 0; 3667 else Mbs++; 3668 if (extra_rows && !rank) { 3669 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3670 } 3671 3672 /* determine ownership of all rows */ 3673 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3674 mbs = Mbs/size + ((Mbs % size) > rank); 3675 m = mbs*bs; 3676 } else { /* User set */ 3677 m = newmat->rmap->n; 3678 mbs = m/bs; 3679 } 3680 ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr); 3681 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3682 3683 /* process 0 needs enough room for process with most rows */ 3684 if (!rank) { 3685 mmax = rowners[1]; 3686 for (i=2; i<=size; i++) { 3687 mmax = PetscMax(mmax,rowners[i]); 3688 } 3689 mmax*=bs; 3690 } else mmax = m; 3691 3692 rowners[0] = 0; 3693 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3694 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3695 rstart = rowners[rank]; 3696 rend = rowners[rank+1]; 3697 3698 /* distribute row lengths to all processors */ 3699 ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr); 3700 if (!rank) { 3701 mend = m; 3702 if (size == 1) mend = mend - extra_rows; 3703 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3704 for (j=mend; j<m; j++) locrowlens[j] = 1; 3705 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3706 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3707 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3708 for (j=0; j<m; j++) { 3709 procsnz[0] += locrowlens[j]; 3710 } 3711 for (i=1; i<size; i++) { 3712 mend = browners[i+1] - browners[i]; 3713 if (i == size-1) mend = mend - extra_rows; 3714 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3715 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3716 /* calculate the number of nonzeros on each processor */ 3717 for (j=0; j<browners[i+1]-browners[i]; j++) { 3718 procsnz[i] += rowlengths[j]; 3719 } 3720 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3721 } 3722 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3723 } else { 3724 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3725 } 3726 3727 if (!rank) { 3728 /* determine max buffer needed and allocate it */ 3729 maxnz = procsnz[0]; 3730 for (i=1; i<size; i++) { 3731 maxnz = PetscMax(maxnz,procsnz[i]); 3732 } 3733 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3734 3735 /* read in my part of the matrix column indices */ 3736 nz = procsnz[0]; 3737 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3738 mycols = ibuf; 3739 if (size == 1) nz -= extra_rows; 3740 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3741 if (size == 1) { 3742 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 3743 } 3744 3745 /* read in every ones (except the last) and ship off */ 3746 for (i=1; i<size-1; i++) { 3747 nz = procsnz[i]; 3748 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3749 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3750 } 3751 /* read in the stuff for the last proc */ 3752 if (size != 1) { 3753 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3754 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3755 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3756 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3757 } 3758 ierr = PetscFree(cols);CHKERRQ(ierr); 3759 } else { 3760 /* determine buffer space needed for message */ 3761 nz = 0; 3762 for (i=0; i<m; i++) { 3763 nz += locrowlens[i]; 3764 } 3765 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr); 3766 mycols = ibuf; 3767 /* receive message of column indices*/ 3768 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3769 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3770 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3771 } 3772 3773 /* loop over local rows, determining number of off diagonal entries */ 3774 ierr = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr); 3775 ierr = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr); 3776 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3777 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3778 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 3779 rowcount = 0; nzcount = 0; 3780 for (i=0; i<mbs; i++) { 3781 dcount = 0; 3782 odcount = 0; 3783 for (j=0; j<bs; j++) { 3784 kmax = locrowlens[rowcount]; 3785 for (k=0; k<kmax; k++) { 3786 tmp = mycols[nzcount++]/bs; 3787 if (!mask[tmp]) { 3788 mask[tmp] = 1; 3789 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3790 else masked1[dcount++] = tmp; 3791 } 3792 } 3793 rowcount++; 3794 } 3795 3796 dlens[i] = dcount; 3797 odlens[i] = odcount; 3798 3799 /* zero out the mask elements we set */ 3800 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3801 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3802 } 3803 3804 3805 if (!sizesset) { 3806 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3807 } 3808 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3809 3810 if (!rank) { 3811 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3812 /* read in my part of the matrix numerical values */ 3813 nz = procsnz[0]; 3814 vals = buf; 3815 mycols = ibuf; 3816 if (size == 1) nz -= extra_rows; 3817 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3818 if (size == 1) { 3819 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 3820 } 3821 3822 /* insert into matrix */ 3823 jj = rstart*bs; 3824 for (i=0; i<m; i++) { 3825 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3826 mycols += locrowlens[i]; 3827 vals += locrowlens[i]; 3828 jj++; 3829 } 3830 /* read in other processors (except the last one) and ship out */ 3831 for (i=1; i<size-1; i++) { 3832 nz = procsnz[i]; 3833 vals = buf; 3834 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3835 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3836 } 3837 /* the last proc */ 3838 if (size != 1) { 3839 nz = procsnz[i] - extra_rows; 3840 vals = buf; 3841 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3842 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3843 ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3844 } 3845 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3846 } else { 3847 /* receive numeric values */ 3848 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr); 3849 3850 /* receive message of values*/ 3851 vals = buf; 3852 mycols = ibuf; 3853 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3854 3855 /* insert into matrix */ 3856 jj = rstart*bs; 3857 for (i=0; i<m; i++) { 3858 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3859 mycols += locrowlens[i]; 3860 vals += locrowlens[i]; 3861 jj++; 3862 } 3863 } 3864 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3865 ierr = PetscFree(buf);CHKERRQ(ierr); 3866 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3867 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3868 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3869 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3870 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3871 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3872 PetscFunctionReturn(0); 3873 } 3874 3875 #undef __FUNCT__ 3876 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3877 /*@ 3878 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3879 3880 Input Parameters: 3881 . mat - the matrix 3882 . fact - factor 3883 3884 Not Collective, each process can use a different factor 3885 3886 Level: advanced 3887 3888 Notes: 3889 This can also be set by the command line option: -mat_use_hash_table <fact> 3890 3891 .keywords: matrix, hashtable, factor, HT 3892 3893 .seealso: MatSetOption() 3894 @*/ 3895 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3896 { 3897 PetscErrorCode ierr; 3898 3899 PetscFunctionBegin; 3900 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3901 PetscFunctionReturn(0); 3902 } 3903 3904 EXTERN_C_BEGIN 3905 #undef __FUNCT__ 3906 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3907 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3908 { 3909 Mat_MPIBAIJ *baij; 3910 3911 PetscFunctionBegin; 3912 baij = (Mat_MPIBAIJ*)mat->data; 3913 baij->ht_fact = fact; 3914 PetscFunctionReturn(0); 3915 } 3916 EXTERN_C_END 3917 3918 #undef __FUNCT__ 3919 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3920 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3921 { 3922 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3923 3924 PetscFunctionBegin; 3925 *Ad = a->A; 3926 *Ao = a->B; 3927 *colmap = a->garray; 3928 PetscFunctionReturn(0); 3929 } 3930 3931 /* 3932 Special version for direct calls from Fortran (to eliminate two function call overheads 3933 */ 3934 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3935 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3936 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3937 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3938 #endif 3939 3940 #undef __FUNCT__ 3941 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3942 /*@C 3943 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3944 3945 Collective on Mat 3946 3947 Input Parameters: 3948 + mat - the matrix 3949 . min - number of input rows 3950 . im - input rows 3951 . nin - number of input columns 3952 . in - input columns 3953 . v - numerical values input 3954 - addvin - INSERT_VALUES or ADD_VALUES 3955 3956 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3957 3958 Level: advanced 3959 3960 .seealso: MatSetValuesBlocked() 3961 @*/ 3962 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3963 { 3964 /* convert input arguments to C version */ 3965 Mat mat = *matin; 3966 PetscInt m = *min, n = *nin; 3967 InsertMode addv = *addvin; 3968 3969 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3970 const MatScalar *value; 3971 MatScalar *barray = baij->barray; 3972 PetscBool roworiented = baij->roworiented; 3973 PetscErrorCode ierr; 3974 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3975 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3976 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3977 3978 PetscFunctionBegin; 3979 /* tasks normally handled by MatSetValuesBlocked() */ 3980 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3981 #if defined(PETSC_USE_DEBUG) 3982 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3983 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3984 #endif 3985 if (mat->assembled) { 3986 mat->was_assembled = PETSC_TRUE; 3987 mat->assembled = PETSC_FALSE; 3988 } 3989 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3990 3991 3992 if (!barray) { 3993 ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr); 3994 baij->barray = barray; 3995 } 3996 3997 if (roworiented) stepval = (n-1)*bs; 3998 else stepval = (m-1)*bs; 3999 4000 for (i=0; i<m; i++) { 4001 if (im[i] < 0) continue; 4002 #if defined(PETSC_USE_DEBUG) 4003 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 4004 #endif 4005 if (im[i] >= rstart && im[i] < rend) { 4006 row = im[i] - rstart; 4007 for (j=0; j<n; j++) { 4008 /* If NumCol = 1 then a copy is not required */ 4009 if ((roworiented) && (n == 1)) { 4010 barray = (MatScalar*)v + i*bs2; 4011 } else if ((!roworiented) && (m == 1)) { 4012 barray = (MatScalar*)v + j*bs2; 4013 } else { /* Here a copy is required */ 4014 if (roworiented) { 4015 value = v + i*(stepval+bs)*bs + j*bs; 4016 } else { 4017 value = v + j*(stepval+bs)*bs + i*bs; 4018 } 4019 for (ii=0; ii<bs; ii++,value+=stepval) { 4020 for (jj=0; jj<bs; jj++) { 4021 *barray++ = *value++; 4022 } 4023 } 4024 barray -=bs2; 4025 } 4026 4027 if (in[j] >= cstart && in[j] < cend) { 4028 col = in[j] - cstart; 4029 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4030 } else if (in[j] < 0) continue; 4031 #if defined(PETSC_USE_DEBUG) 4032 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 4033 #endif 4034 else { 4035 if (mat->was_assembled) { 4036 if (!baij->colmap) { 4037 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 4038 } 4039 4040 #if defined(PETSC_USE_DEBUG) 4041 #if defined(PETSC_USE_CTABLE) 4042 { PetscInt data; 4043 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 4044 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4045 } 4046 #else 4047 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 4048 #endif 4049 #endif 4050 #if defined(PETSC_USE_CTABLE) 4051 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4052 col = (col - 1)/bs; 4053 #else 4054 col = (baij->colmap[in[j]] - 1)/bs; 4055 #endif 4056 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 4057 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 4058 col = in[j]; 4059 } 4060 } else col = in[j]; 4061 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 4062 } 4063 } 4064 } else { 4065 if (!baij->donotstash) { 4066 if (roworiented) { 4067 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4068 } else { 4069 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 4070 } 4071 } 4072 } 4073 } 4074 4075 /* task normally handled by MatSetValuesBlocked() */ 4076 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 4077 PetscFunctionReturn(0); 4078 } 4079 4080 #undef __FUNCT__ 4081 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 4082 /*@ 4083 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 4084 CSR format the local rows. 4085 4086 Collective on MPI_Comm 4087 4088 Input Parameters: 4089 + comm - MPI communicator 4090 . bs - the block size, only a block size of 1 is supported 4091 . m - number of local rows (Cannot be PETSC_DECIDE) 4092 . n - This value should be the same as the local size used in creating the 4093 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4094 calculated if N is given) For square matrices n is almost always m. 4095 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4096 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4097 . i - row indices 4098 . j - column indices 4099 - a - matrix values 4100 4101 Output Parameter: 4102 . mat - the matrix 4103 4104 Level: intermediate 4105 4106 Notes: 4107 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4108 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4109 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4110 4111 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4112 4113 .keywords: matrix, aij, compressed row, sparse, parallel 4114 4115 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4116 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4117 @*/ 4118 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4119 { 4120 PetscErrorCode ierr; 4121 4122 PetscFunctionBegin; 4123 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4124 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4125 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4126 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4127 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 4128 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 4129 PetscFunctionReturn(0); 4130 } 4131