1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 #include <petscsf.h> 5 6 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 7 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat); 8 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 9 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 10 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 11 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 14 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 17 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 18 { 19 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 20 PetscErrorCode ierr; 21 PetscInt i,*idxb = 0; 22 PetscScalar *va,*vb; 23 Vec vtmp; 24 25 PetscFunctionBegin; 26 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 27 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 28 if (idx) { 29 for (i=0; i<A->rmap->n; i++) { 30 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 31 } 32 } 33 34 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 35 if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);} 36 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 37 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 38 39 for (i=0; i<A->rmap->n; i++) { 40 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 41 va[i] = vb[i]; 42 if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs); 43 } 44 } 45 46 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 47 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 48 ierr = PetscFree(idxb);CHKERRQ(ierr); 49 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 50 PetscFunctionReturn(0); 51 } 52 53 #undef __FUNCT__ 54 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 55 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 56 { 57 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 58 PetscErrorCode ierr; 59 60 PetscFunctionBegin; 61 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 62 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 68 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 69 { 70 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 75 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 76 PetscFunctionReturn(0); 77 } 78 79 /* 80 Local utility routine that creates a mapping from the global column 81 number to the local number in the off-diagonal part of the local 82 storage of the matrix. This is done in a non scalable way since the 83 length of colmap equals the global matrix length. 84 */ 85 #undef __FUNCT__ 86 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private" 87 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 88 { 89 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 90 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 91 PetscErrorCode ierr; 92 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 93 94 PetscFunctionBegin; 95 #if defined(PETSC_USE_CTABLE) 96 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 97 for (i=0; i<nbs; i++) { 98 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 99 } 100 #else 101 ierr = PetscMalloc1((baij->Nbs+1),&baij->colmap);CHKERRQ(ierr); 102 ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 103 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 105 #endif 106 PetscFunctionReturn(0); 107 } 108 109 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 110 { \ 111 \ 112 brow = row/bs; \ 113 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 114 rmax = aimax[brow]; nrow = ailen[brow]; \ 115 bcol = col/bs; \ 116 ridx = row % bs; cidx = col % bs; \ 117 low = 0; high = nrow; \ 118 while (high-low > 3) { \ 119 t = (low+high)/2; \ 120 if (rp[t] > bcol) high = t; \ 121 else low = t; \ 122 } \ 123 for (_i=low; _i<high; _i++) { \ 124 if (rp[_i] > bcol) break; \ 125 if (rp[_i] == bcol) { \ 126 bap = ap + bs2*_i + bs*cidx + ridx; \ 127 if (addv == ADD_VALUES) *bap += value; \ 128 else *bap = value; \ 129 goto a_noinsert; \ 130 } \ 131 } \ 132 if (a->nonew == 1) goto a_noinsert; \ 133 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 134 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 135 N = nrow++ - 1; \ 136 /* shift up all the later entries in this row */ \ 137 for (ii=N; ii>=_i; ii--) { \ 138 rp[ii+1] = rp[ii]; \ 139 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 140 } \ 141 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 142 rp[_i] = bcol; \ 143 ap[bs2*_i + bs*cidx + ridx] = value; \ 144 a_noinsert:; \ 145 ailen[brow] = nrow; \ 146 } 147 148 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 149 { \ 150 brow = row/bs; \ 151 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 152 rmax = bimax[brow]; nrow = bilen[brow]; \ 153 bcol = col/bs; \ 154 ridx = row % bs; cidx = col % bs; \ 155 low = 0; high = nrow; \ 156 while (high-low > 3) { \ 157 t = (low+high)/2; \ 158 if (rp[t] > bcol) high = t; \ 159 else low = t; \ 160 } \ 161 for (_i=low; _i<high; _i++) { \ 162 if (rp[_i] > bcol) break; \ 163 if (rp[_i] == bcol) { \ 164 bap = ap + bs2*_i + bs*cidx + ridx; \ 165 if (addv == ADD_VALUES) *bap += value; \ 166 else *bap = value; \ 167 goto b_noinsert; \ 168 } \ 169 } \ 170 if (b->nonew == 1) goto b_noinsert; \ 171 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 172 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 173 N = nrow++ - 1; \ 174 /* shift up all the later entries in this row */ \ 175 for (ii=N; ii>=_i; ii--) { \ 176 rp[ii+1] = rp[ii]; \ 177 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 178 } \ 179 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 180 rp[_i] = bcol; \ 181 ap[bs2*_i + bs*cidx + ridx] = value; \ 182 b_noinsert:; \ 183 bilen[brow] = nrow; \ 184 } 185 186 #undef __FUNCT__ 187 #define __FUNCT__ "MatSetValues_MPIBAIJ" 188 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 189 { 190 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 191 MatScalar value; 192 PetscBool roworiented = baij->roworiented; 193 PetscErrorCode ierr; 194 PetscInt i,j,row,col; 195 PetscInt rstart_orig=mat->rmap->rstart; 196 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 197 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 198 199 /* Some Variables required in the macro */ 200 Mat A = baij->A; 201 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 202 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 203 MatScalar *aa =a->a; 204 205 Mat B = baij->B; 206 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 207 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 208 MatScalar *ba =b->a; 209 210 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 211 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 212 MatScalar *ap,*bap; 213 214 PetscFunctionBegin; 215 if (v) PetscValidScalarPointer(v,6); 216 for (i=0; i<m; i++) { 217 if (im[i] < 0) continue; 218 #if defined(PETSC_USE_DEBUG) 219 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 220 #endif 221 if (im[i] >= rstart_orig && im[i] < rend_orig) { 222 row = im[i] - rstart_orig; 223 for (j=0; j<n; j++) { 224 if (in[j] >= cstart_orig && in[j] < cend_orig) { 225 col = in[j] - cstart_orig; 226 if (roworiented) value = v[i*n+j]; 227 else value = v[i+j*m]; 228 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 229 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 230 } else if (in[j] < 0) continue; 231 #if defined(PETSC_USE_DEBUG) 232 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 233 #endif 234 else { 235 if (mat->was_assembled) { 236 if (!baij->colmap) { 237 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 238 } 239 #if defined(PETSC_USE_CTABLE) 240 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 241 col = col - 1; 242 #else 243 col = baij->colmap[in[j]/bs] - 1; 244 #endif 245 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 246 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 247 col = in[j]; 248 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 249 B = baij->B; 250 b = (Mat_SeqBAIJ*)(B)->data; 251 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 252 ba =b->a; 253 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 254 else col += in[j]%bs; 255 } else col = in[j]; 256 if (roworiented) value = v[i*n+j]; 257 else value = v[i+j*m]; 258 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 259 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 260 } 261 } 262 } else { 263 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 264 if (!baij->donotstash) { 265 mat->assembled = PETSC_FALSE; 266 if (roworiented) { 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 268 } else { 269 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 270 } 271 } 272 } 273 } 274 PetscFunctionReturn(0); 275 } 276 277 #undef __FUNCT__ 278 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 279 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 280 { 281 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 282 const PetscScalar *value; 283 MatScalar *barray = baij->barray; 284 PetscBool roworiented = baij->roworiented; 285 PetscErrorCode ierr; 286 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 287 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 288 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 289 290 PetscFunctionBegin; 291 if (!barray) { 292 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 293 baij->barray = barray; 294 } 295 296 if (roworiented) stepval = (n-1)*bs; 297 else stepval = (m-1)*bs; 298 299 for (i=0; i<m; i++) { 300 if (im[i] < 0) continue; 301 #if defined(PETSC_USE_DEBUG) 302 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 303 #endif 304 if (im[i] >= rstart && im[i] < rend) { 305 row = im[i] - rstart; 306 for (j=0; j<n; j++) { 307 /* If NumCol = 1 then a copy is not required */ 308 if ((roworiented) && (n == 1)) { 309 barray = (MatScalar*)v + i*bs2; 310 } else if ((!roworiented) && (m == 1)) { 311 barray = (MatScalar*)v + j*bs2; 312 } else { /* Here a copy is required */ 313 if (roworiented) { 314 value = v + (i*(stepval+bs) + j)*bs; 315 } else { 316 value = v + (j*(stepval+bs) + i)*bs; 317 } 318 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 319 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 320 barray += bs; 321 } 322 barray -= bs2; 323 } 324 325 if (in[j] >= cstart && in[j] < cend) { 326 col = in[j] - cstart; 327 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 328 } else if (in[j] < 0) continue; 329 #if defined(PETSC_USE_DEBUG) 330 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 331 #endif 332 else { 333 if (mat->was_assembled) { 334 if (!baij->colmap) { 335 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 336 } 337 338 #if defined(PETSC_USE_DEBUG) 339 #if defined(PETSC_USE_CTABLE) 340 { PetscInt data; 341 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 342 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 343 } 344 #else 345 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 346 #endif 347 #endif 348 #if defined(PETSC_USE_CTABLE) 349 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 350 col = (col - 1)/bs; 351 #else 352 col = (baij->colmap[in[j]] - 1)/bs; 353 #endif 354 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 355 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 356 col = in[j]; 357 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]); 358 } else col = in[j]; 359 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 360 } 361 } 362 } else { 363 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 364 if (!baij->donotstash) { 365 if (roworiented) { 366 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 367 } else { 368 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 369 } 370 } 371 } 372 } 373 PetscFunctionReturn(0); 374 } 375 376 #define HASH_KEY 0.6180339887 377 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 378 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 379 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 380 #undef __FUNCT__ 381 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 382 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 383 { 384 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 385 PetscBool roworiented = baij->roworiented; 386 PetscErrorCode ierr; 387 PetscInt i,j,row,col; 388 PetscInt rstart_orig=mat->rmap->rstart; 389 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 390 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 391 PetscReal tmp; 392 MatScalar **HD = baij->hd,value; 393 #if defined(PETSC_USE_DEBUG) 394 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 395 #endif 396 397 PetscFunctionBegin; 398 if (v) PetscValidScalarPointer(v,6); 399 for (i=0; i<m; i++) { 400 #if defined(PETSC_USE_DEBUG) 401 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 402 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 403 #endif 404 row = im[i]; 405 if (row >= rstart_orig && row < rend_orig) { 406 for (j=0; j<n; j++) { 407 col = in[j]; 408 if (roworiented) value = v[i*n+j]; 409 else value = v[i+j*m]; 410 /* Look up PetscInto the Hash Table */ 411 key = (row/bs)*Nbs+(col/bs)+1; 412 h1 = HASH(size,key,tmp); 413 414 415 idx = h1; 416 #if defined(PETSC_USE_DEBUG) 417 insert_ct++; 418 total_ct++; 419 if (HT[idx] != key) { 420 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 421 if (idx == size) { 422 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 423 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 424 } 425 } 426 #else 427 if (HT[idx] != key) { 428 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 429 if (idx == size) { 430 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 431 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 432 } 433 } 434 #endif 435 /* A HASH table entry is found, so insert the values at the correct address */ 436 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 437 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 438 } 439 } else if (!baij->donotstash) { 440 if (roworiented) { 441 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 442 } else { 443 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 444 } 445 } 446 } 447 #if defined(PETSC_USE_DEBUG) 448 baij->ht_total_ct = total_ct; 449 baij->ht_insert_ct = insert_ct; 450 #endif 451 PetscFunctionReturn(0); 452 } 453 454 #undef __FUNCT__ 455 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 456 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 457 { 458 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 459 PetscBool roworiented = baij->roworiented; 460 PetscErrorCode ierr; 461 PetscInt i,j,ii,jj,row,col; 462 PetscInt rstart=baij->rstartbs; 463 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 464 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 465 PetscReal tmp; 466 MatScalar **HD = baij->hd,*baij_a; 467 const PetscScalar *v_t,*value; 468 #if defined(PETSC_USE_DEBUG) 469 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 470 #endif 471 472 PetscFunctionBegin; 473 if (roworiented) stepval = (n-1)*bs; 474 else stepval = (m-1)*bs; 475 476 for (i=0; i<m; i++) { 477 #if defined(PETSC_USE_DEBUG) 478 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 479 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 480 #endif 481 row = im[i]; 482 v_t = v + i*nbs2; 483 if (row >= rstart && row < rend) { 484 for (j=0; j<n; j++) { 485 col = in[j]; 486 487 /* Look up into the Hash Table */ 488 key = row*Nbs+col+1; 489 h1 = HASH(size,key,tmp); 490 491 idx = h1; 492 #if defined(PETSC_USE_DEBUG) 493 total_ct++; 494 insert_ct++; 495 if (HT[idx] != key) { 496 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 497 if (idx == size) { 498 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 499 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 500 } 501 } 502 #else 503 if (HT[idx] != key) { 504 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 505 if (idx == size) { 506 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 507 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 508 } 509 } 510 #endif 511 baij_a = HD[idx]; 512 if (roworiented) { 513 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 514 /* value = v + (i*(stepval+bs)+j)*bs; */ 515 value = v_t; 516 v_t += bs; 517 if (addv == ADD_VALUES) { 518 for (ii=0; ii<bs; ii++,value+=stepval) { 519 for (jj=ii; jj<bs2; jj+=bs) { 520 baij_a[jj] += *value++; 521 } 522 } 523 } else { 524 for (ii=0; ii<bs; ii++,value+=stepval) { 525 for (jj=ii; jj<bs2; jj+=bs) { 526 baij_a[jj] = *value++; 527 } 528 } 529 } 530 } else { 531 value = v + j*(stepval+bs)*bs + i*bs; 532 if (addv == ADD_VALUES) { 533 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 534 for (jj=0; jj<bs; jj++) { 535 baij_a[jj] += *value++; 536 } 537 } 538 } else { 539 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 540 for (jj=0; jj<bs; jj++) { 541 baij_a[jj] = *value++; 542 } 543 } 544 } 545 } 546 } 547 } else { 548 if (!baij->donotstash) { 549 if (roworiented) { 550 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 551 } else { 552 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 553 } 554 } 555 } 556 } 557 #if defined(PETSC_USE_DEBUG) 558 baij->ht_total_ct = total_ct; 559 baij->ht_insert_ct = insert_ct; 560 #endif 561 PetscFunctionReturn(0); 562 } 563 564 #undef __FUNCT__ 565 #define __FUNCT__ "MatGetValues_MPIBAIJ" 566 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 567 { 568 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 569 PetscErrorCode ierr; 570 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 571 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 572 573 PetscFunctionBegin; 574 for (i=0; i<m; i++) { 575 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 576 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 577 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 578 row = idxm[i] - bsrstart; 579 for (j=0; j<n; j++) { 580 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 581 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 582 if (idxn[j] >= bscstart && idxn[j] < bscend) { 583 col = idxn[j] - bscstart; 584 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 585 } else { 586 if (!baij->colmap) { 587 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 588 } 589 #if defined(PETSC_USE_CTABLE) 590 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 591 data--; 592 #else 593 data = baij->colmap[idxn[j]/bs]-1; 594 #endif 595 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 596 else { 597 col = data + idxn[j]%bs; 598 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 599 } 600 } 601 } 602 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 603 } 604 PetscFunctionReturn(0); 605 } 606 607 #undef __FUNCT__ 608 #define __FUNCT__ "MatNorm_MPIBAIJ" 609 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 610 { 611 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 612 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 613 PetscErrorCode ierr; 614 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 615 PetscReal sum = 0.0; 616 MatScalar *v; 617 618 PetscFunctionBegin; 619 if (baij->size == 1) { 620 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 621 } else { 622 if (type == NORM_FROBENIUS) { 623 v = amat->a; 624 nz = amat->nz*bs2; 625 for (i=0; i<nz; i++) { 626 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 627 } 628 v = bmat->a; 629 nz = bmat->nz*bs2; 630 for (i=0; i<nz; i++) { 631 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 632 } 633 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 634 *nrm = PetscSqrtReal(*nrm); 635 } else if (type == NORM_1) { /* max column sum */ 636 PetscReal *tmp,*tmp2; 637 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 638 ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr); 639 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 640 v = amat->a; jj = amat->j; 641 for (i=0; i<amat->nz; i++) { 642 for (j=0; j<bs; j++) { 643 col = bs*(cstart + *jj) + j; /* column index */ 644 for (row=0; row<bs; row++) { 645 tmp[col] += PetscAbsScalar(*v); v++; 646 } 647 } 648 jj++; 649 } 650 v = bmat->a; jj = bmat->j; 651 for (i=0; i<bmat->nz; i++) { 652 for (j=0; j<bs; j++) { 653 col = bs*garray[*jj] + j; 654 for (row=0; row<bs; row++) { 655 tmp[col] += PetscAbsScalar(*v); v++; 656 } 657 } 658 jj++; 659 } 660 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 661 *nrm = 0.0; 662 for (j=0; j<mat->cmap->N; j++) { 663 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 664 } 665 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 666 } else if (type == NORM_INFINITY) { /* max row sum */ 667 PetscReal *sums; 668 ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr); 669 sum = 0.0; 670 for (j=0; j<amat->mbs; j++) { 671 for (row=0; row<bs; row++) sums[row] = 0.0; 672 v = amat->a + bs2*amat->i[j]; 673 nz = amat->i[j+1]-amat->i[j]; 674 for (i=0; i<nz; i++) { 675 for (col=0; col<bs; col++) { 676 for (row=0; row<bs; row++) { 677 sums[row] += PetscAbsScalar(*v); v++; 678 } 679 } 680 } 681 v = bmat->a + bs2*bmat->i[j]; 682 nz = bmat->i[j+1]-bmat->i[j]; 683 for (i=0; i<nz; i++) { 684 for (col=0; col<bs; col++) { 685 for (row=0; row<bs; row++) { 686 sums[row] += PetscAbsScalar(*v); v++; 687 } 688 } 689 } 690 for (row=0; row<bs; row++) { 691 if (sums[row] > sum) sum = sums[row]; 692 } 693 } 694 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 695 ierr = PetscFree(sums);CHKERRQ(ierr); 696 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 697 } 698 PetscFunctionReturn(0); 699 } 700 701 /* 702 Creates the hash table, and sets the table 703 This table is created only once. 704 If new entried need to be added to the matrix 705 then the hash table has to be destroyed and 706 recreated. 707 */ 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 710 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 711 { 712 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 713 Mat A = baij->A,B=baij->B; 714 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 715 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 716 PetscErrorCode ierr; 717 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 718 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 719 PetscInt *HT,key; 720 MatScalar **HD; 721 PetscReal tmp; 722 #if defined(PETSC_USE_INFO) 723 PetscInt ct=0,max=0; 724 #endif 725 726 PetscFunctionBegin; 727 if (baij->ht) PetscFunctionReturn(0); 728 729 baij->ht_size = (PetscInt)(factor*nz); 730 ht_size = baij->ht_size; 731 732 /* Allocate Memory for Hash Table */ 733 ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr); 734 HD = baij->hd; 735 HT = baij->ht; 736 737 /* Loop Over A */ 738 for (i=0; i<a->mbs; i++) { 739 for (j=ai[i]; j<ai[i+1]; j++) { 740 row = i+rstart; 741 col = aj[j]+cstart; 742 743 key = row*Nbs + col + 1; 744 h1 = HASH(ht_size,key,tmp); 745 for (k=0; k<ht_size; k++) { 746 if (!HT[(h1+k)%ht_size]) { 747 HT[(h1+k)%ht_size] = key; 748 HD[(h1+k)%ht_size] = a->a + j*bs2; 749 break; 750 #if defined(PETSC_USE_INFO) 751 } else { 752 ct++; 753 #endif 754 } 755 } 756 #if defined(PETSC_USE_INFO) 757 if (k> max) max = k; 758 #endif 759 } 760 } 761 /* Loop Over B */ 762 for (i=0; i<b->mbs; i++) { 763 for (j=bi[i]; j<bi[i+1]; j++) { 764 row = i+rstart; 765 col = garray[bj[j]]; 766 key = row*Nbs + col + 1; 767 h1 = HASH(ht_size,key,tmp); 768 for (k=0; k<ht_size; k++) { 769 if (!HT[(h1+k)%ht_size]) { 770 HT[(h1+k)%ht_size] = key; 771 HD[(h1+k)%ht_size] = b->a + j*bs2; 772 break; 773 #if defined(PETSC_USE_INFO) 774 } else { 775 ct++; 776 #endif 777 } 778 } 779 #if defined(PETSC_USE_INFO) 780 if (k> max) max = k; 781 #endif 782 } 783 } 784 785 /* Print Summary */ 786 #if defined(PETSC_USE_INFO) 787 for (i=0,j=0; i<ht_size; i++) { 788 if (HT[i]) j++; 789 } 790 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 791 #endif 792 PetscFunctionReturn(0); 793 } 794 795 #undef __FUNCT__ 796 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 797 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 798 { 799 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 800 PetscErrorCode ierr; 801 PetscInt nstash,reallocs; 802 InsertMode addv; 803 804 PetscFunctionBegin; 805 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 806 807 /* make sure all processors are either in INSERTMODE or ADDMODE */ 808 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 809 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 810 mat->insertmode = addv; /* in case this processor had no cache */ 811 812 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 813 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 814 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 815 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 816 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 817 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 818 PetscFunctionReturn(0); 819 } 820 821 #undef __FUNCT__ 822 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 823 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 824 { 825 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 826 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 827 PetscErrorCode ierr; 828 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 829 PetscInt *row,*col; 830 PetscBool r1,r2,r3,other_disassembled; 831 MatScalar *val; 832 InsertMode addv = mat->insertmode; 833 PetscMPIInt n; 834 835 PetscFunctionBegin; 836 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 837 if (!baij->donotstash && !mat->nooffprocentries) { 838 while (1) { 839 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 840 if (!flg) break; 841 842 for (i=0; i<n;) { 843 /* Now identify the consecutive vals belonging to the same row */ 844 for (j=i,rstart=row[j]; j<n; j++) { 845 if (row[j] != rstart) break; 846 } 847 if (j < n) ncols = j-i; 848 else ncols = n-i; 849 /* Now assemble all these values with a single function call */ 850 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 851 i = j; 852 } 853 } 854 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 855 /* Now process the block-stash. Since the values are stashed column-oriented, 856 set the roworiented flag to column oriented, and after MatSetValues() 857 restore the original flags */ 858 r1 = baij->roworiented; 859 r2 = a->roworiented; 860 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 861 862 baij->roworiented = PETSC_FALSE; 863 a->roworiented = PETSC_FALSE; 864 865 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 866 while (1) { 867 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 868 if (!flg) break; 869 870 for (i=0; i<n;) { 871 /* Now identify the consecutive vals belonging to the same row */ 872 for (j=i,rstart=row[j]; j<n; j++) { 873 if (row[j] != rstart) break; 874 } 875 if (j < n) ncols = j-i; 876 else ncols = n-i; 877 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 878 i = j; 879 } 880 } 881 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 882 883 baij->roworiented = r1; 884 a->roworiented = r2; 885 886 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 887 } 888 889 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 890 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 891 892 /* determine if any processor has disassembled, if so we must 893 also disassemble ourselfs, in order that we may reassemble. */ 894 /* 895 if nonzero structure of submatrix B cannot change then we know that 896 no processor disassembled thus we can skip this stuff 897 */ 898 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 899 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 900 if (mat->was_assembled && !other_disassembled) { 901 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 902 } 903 } 904 905 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 906 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 907 } 908 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 909 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 910 911 #if defined(PETSC_USE_INFO) 912 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 913 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 914 915 baij->ht_total_ct = 0; 916 baij->ht_insert_ct = 0; 917 } 918 #endif 919 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 920 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 921 922 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 923 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 924 } 925 926 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 927 928 baij->rowvalues = 0; 929 930 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 931 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 932 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 933 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 934 } 935 PetscFunctionReturn(0); 936 } 937 938 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer); 939 #include <petscdraw.h> 940 #undef __FUNCT__ 941 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 942 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 943 { 944 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 945 PetscErrorCode ierr; 946 PetscMPIInt rank = baij->rank; 947 PetscInt bs = mat->rmap->bs; 948 PetscBool iascii,isdraw; 949 PetscViewer sviewer; 950 PetscViewerFormat format; 951 952 PetscFunctionBegin; 953 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 954 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 955 if (iascii) { 956 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 957 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 958 MatInfo info; 959 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 960 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 961 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 962 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 963 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 964 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 965 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 966 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 967 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 968 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 969 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 970 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 971 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 972 PetscFunctionReturn(0); 973 } else if (format == PETSC_VIEWER_ASCII_INFO) { 974 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 975 PetscFunctionReturn(0); 976 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 977 PetscFunctionReturn(0); 978 } 979 } 980 981 if (isdraw) { 982 PetscDraw draw; 983 PetscBool isnull; 984 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 985 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 986 } 987 988 { 989 /* assemble the entire matrix onto first processor. */ 990 Mat A; 991 Mat_SeqBAIJ *Aloc; 992 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 993 MatScalar *a; 994 995 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 996 /* Perhaps this should be the type of mat? */ 997 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 998 if (!rank) { 999 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1000 } else { 1001 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1002 } 1003 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1004 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 1005 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1006 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1007 1008 /* copy over the A part */ 1009 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1010 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1011 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1012 1013 for (i=0; i<mbs; i++) { 1014 rvals[0] = bs*(baij->rstartbs + i); 1015 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1016 for (j=ai[i]; j<ai[i+1]; j++) { 1017 col = (baij->cstartbs+aj[j])*bs; 1018 for (k=0; k<bs; k++) { 1019 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1020 col++; a += bs; 1021 } 1022 } 1023 } 1024 /* copy over the B part */ 1025 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1026 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1027 for (i=0; i<mbs; i++) { 1028 rvals[0] = bs*(baij->rstartbs + i); 1029 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1030 for (j=ai[i]; j<ai[i+1]; j++) { 1031 col = baij->garray[aj[j]]*bs; 1032 for (k=0; k<bs; k++) { 1033 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1034 col++; a += bs; 1035 } 1036 } 1037 } 1038 ierr = PetscFree(rvals);CHKERRQ(ierr); 1039 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1040 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1041 /* 1042 Everyone has to call to draw the matrix since the graphics waits are 1043 synchronized across all processors that share the PetscDraw object 1044 */ 1045 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1046 if (!rank) { 1047 ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1048 } 1049 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1050 ierr = MatDestroy(&A);CHKERRQ(ierr); 1051 } 1052 PetscFunctionReturn(0); 1053 } 1054 1055 #undef __FUNCT__ 1056 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1057 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1058 { 1059 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1060 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)a->A->data; 1061 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 1062 PetscErrorCode ierr; 1063 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1064 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1065 int fd; 1066 PetscScalar *column_values; 1067 FILE *file; 1068 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1069 PetscInt message_count,flowcontrolcount; 1070 1071 PetscFunctionBegin; 1072 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1073 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1074 nz = bs2*(A->nz + B->nz); 1075 rlen = mat->rmap->n; 1076 if (!rank) { 1077 header[0] = MAT_FILE_CLASSID; 1078 header[1] = mat->rmap->N; 1079 header[2] = mat->cmap->N; 1080 1081 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1082 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1083 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1084 /* get largest number of rows any processor has */ 1085 range = mat->rmap->range; 1086 for (i=1; i<size; i++) { 1087 rlen = PetscMax(rlen,range[i+1] - range[i]); 1088 } 1089 } else { 1090 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1091 } 1092 1093 ierr = PetscMalloc1((rlen/bs),&crow_lens);CHKERRQ(ierr); 1094 /* compute lengths of each row */ 1095 for (i=0; i<a->mbs; i++) { 1096 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1097 } 1098 /* store the row lengths to the file */ 1099 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1100 if (!rank) { 1101 MPI_Status status; 1102 ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr); 1103 rlen = (range[1] - range[0])/bs; 1104 for (i=0; i<rlen; i++) { 1105 for (j=0; j<bs; j++) { 1106 row_lens[i*bs+j] = bs*crow_lens[i]; 1107 } 1108 } 1109 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1110 for (i=1; i<size; i++) { 1111 rlen = (range[i+1] - range[i])/bs; 1112 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1113 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1114 for (k=0; k<rlen; k++) { 1115 for (j=0; j<bs; j++) { 1116 row_lens[k*bs+j] = bs*crow_lens[k]; 1117 } 1118 } 1119 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1120 } 1121 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1122 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1123 } else { 1124 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1125 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1126 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1127 } 1128 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1129 1130 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1131 information needed to make it for each row from a block row. This does require more communication but still not more than 1132 the communication needed for the nonzero values */ 1133 nzmax = nz; /* space a largest processor needs */ 1134 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1135 ierr = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr); 1136 cnt = 0; 1137 for (i=0; i<a->mbs; i++) { 1138 pcnt = cnt; 1139 for (j=B->i[i]; j<B->i[i+1]; j++) { 1140 if ((col = garray[B->j[j]]) > cstart) break; 1141 for (l=0; l<bs; l++) { 1142 column_indices[cnt++] = bs*col+l; 1143 } 1144 } 1145 for (k=A->i[i]; k<A->i[i+1]; k++) { 1146 for (l=0; l<bs; l++) { 1147 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1148 } 1149 } 1150 for (; j<B->i[i+1]; j++) { 1151 for (l=0; l<bs; l++) { 1152 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1153 } 1154 } 1155 len = cnt - pcnt; 1156 for (k=1; k<bs; k++) { 1157 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1158 cnt += len; 1159 } 1160 } 1161 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1162 1163 /* store the columns to the file */ 1164 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1165 if (!rank) { 1166 MPI_Status status; 1167 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1168 for (i=1; i<size; i++) { 1169 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1170 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1171 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1172 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1173 } 1174 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1175 } else { 1176 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1177 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1178 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1179 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1180 } 1181 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1182 1183 /* load up the numerical values */ 1184 ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr); 1185 cnt = 0; 1186 for (i=0; i<a->mbs; i++) { 1187 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1188 for (j=B->i[i]; j<B->i[i+1]; j++) { 1189 if (garray[B->j[j]] > cstart) break; 1190 for (l=0; l<bs; l++) { 1191 for (ll=0; ll<bs; ll++) { 1192 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1193 } 1194 } 1195 cnt += bs; 1196 } 1197 for (k=A->i[i]; k<A->i[i+1]; k++) { 1198 for (l=0; l<bs; l++) { 1199 for (ll=0; ll<bs; ll++) { 1200 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1201 } 1202 } 1203 cnt += bs; 1204 } 1205 for (; j<B->i[i+1]; j++) { 1206 for (l=0; l<bs; l++) { 1207 for (ll=0; ll<bs; ll++) { 1208 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1209 } 1210 } 1211 cnt += bs; 1212 } 1213 cnt += (bs-1)*rlen; 1214 } 1215 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1216 1217 /* store the column values to the file */ 1218 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1219 if (!rank) { 1220 MPI_Status status; 1221 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1222 for (i=1; i<size; i++) { 1223 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1224 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1225 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1226 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1227 } 1228 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1229 } else { 1230 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1231 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1232 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1233 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1234 } 1235 ierr = PetscFree(column_values);CHKERRQ(ierr); 1236 1237 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1238 if (file) { 1239 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1240 } 1241 PetscFunctionReturn(0); 1242 } 1243 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatView_MPIBAIJ" 1246 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1247 { 1248 PetscErrorCode ierr; 1249 PetscBool iascii,isdraw,issocket,isbinary; 1250 1251 PetscFunctionBegin; 1252 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1253 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1254 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1255 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1256 if (iascii || isdraw || issocket) { 1257 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1258 } else if (isbinary) { 1259 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1260 } 1261 PetscFunctionReturn(0); 1262 } 1263 1264 #undef __FUNCT__ 1265 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1266 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1267 { 1268 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1269 PetscErrorCode ierr; 1270 1271 PetscFunctionBegin; 1272 #if defined(PETSC_USE_LOG) 1273 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1274 #endif 1275 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1276 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1277 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1278 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1279 #if defined(PETSC_USE_CTABLE) 1280 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1281 #else 1282 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1283 #endif 1284 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1285 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1286 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1287 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1288 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1289 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1290 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1291 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1292 1293 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1294 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr); 1303 PetscFunctionReturn(0); 1304 } 1305 1306 #undef __FUNCT__ 1307 #define __FUNCT__ "MatMult_MPIBAIJ" 1308 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1309 { 1310 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1311 PetscErrorCode ierr; 1312 PetscInt nt; 1313 1314 PetscFunctionBegin; 1315 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1316 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1317 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1318 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1319 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1320 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1321 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1322 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1323 PetscFunctionReturn(0); 1324 } 1325 1326 #undef __FUNCT__ 1327 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1328 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1329 { 1330 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1331 PetscErrorCode ierr; 1332 1333 PetscFunctionBegin; 1334 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1335 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1336 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1337 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1338 PetscFunctionReturn(0); 1339 } 1340 1341 #undef __FUNCT__ 1342 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1343 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1344 { 1345 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1346 PetscErrorCode ierr; 1347 PetscBool merged; 1348 1349 PetscFunctionBegin; 1350 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1351 /* do nondiagonal part */ 1352 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1353 if (!merged) { 1354 /* send it on its way */ 1355 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1356 /* do local part */ 1357 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1358 /* receive remote parts: note this assumes the values are not actually */ 1359 /* inserted in yy until the next line */ 1360 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1361 } else { 1362 /* do local part */ 1363 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1364 /* send it on its way */ 1365 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1366 /* values actually were received in the Begin() but we need to call this nop */ 1367 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1368 } 1369 PetscFunctionReturn(0); 1370 } 1371 1372 #undef __FUNCT__ 1373 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1374 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1375 { 1376 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1377 PetscErrorCode ierr; 1378 1379 PetscFunctionBegin; 1380 /* do nondiagonal part */ 1381 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1382 /* send it on its way */ 1383 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1384 /* do local part */ 1385 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1386 /* receive remote parts: note this assumes the values are not actually */ 1387 /* inserted in yy until the next line, which is true for my implementation*/ 1388 /* but is not perhaps always true. */ 1389 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1390 PetscFunctionReturn(0); 1391 } 1392 1393 /* 1394 This only works correctly for square matrices where the subblock A->A is the 1395 diagonal block 1396 */ 1397 #undef __FUNCT__ 1398 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1399 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1400 { 1401 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1402 PetscErrorCode ierr; 1403 1404 PetscFunctionBegin; 1405 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1406 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1407 PetscFunctionReturn(0); 1408 } 1409 1410 #undef __FUNCT__ 1411 #define __FUNCT__ "MatScale_MPIBAIJ" 1412 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1413 { 1414 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1415 PetscErrorCode ierr; 1416 1417 PetscFunctionBegin; 1418 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1419 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1420 PetscFunctionReturn(0); 1421 } 1422 1423 #undef __FUNCT__ 1424 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1425 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1426 { 1427 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1428 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1429 PetscErrorCode ierr; 1430 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1431 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1432 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1433 1434 PetscFunctionBegin; 1435 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1436 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1437 mat->getrowactive = PETSC_TRUE; 1438 1439 if (!mat->rowvalues && (idx || v)) { 1440 /* 1441 allocate enough space to hold information from the longest row. 1442 */ 1443 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1444 PetscInt max = 1,mbs = mat->mbs,tmp; 1445 for (i=0; i<mbs; i++) { 1446 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1447 if (max < tmp) max = tmp; 1448 } 1449 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1450 } 1451 lrow = row - brstart; 1452 1453 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1454 if (!v) {pvA = 0; pvB = 0;} 1455 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1456 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1457 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1458 nztot = nzA + nzB; 1459 1460 cmap = mat->garray; 1461 if (v || idx) { 1462 if (nztot) { 1463 /* Sort by increasing column numbers, assuming A and B already sorted */ 1464 PetscInt imark = -1; 1465 if (v) { 1466 *v = v_p = mat->rowvalues; 1467 for (i=0; i<nzB; i++) { 1468 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1469 else break; 1470 } 1471 imark = i; 1472 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1473 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1474 } 1475 if (idx) { 1476 *idx = idx_p = mat->rowindices; 1477 if (imark > -1) { 1478 for (i=0; i<imark; i++) { 1479 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1480 } 1481 } else { 1482 for (i=0; i<nzB; i++) { 1483 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1484 else break; 1485 } 1486 imark = i; 1487 } 1488 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1489 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1490 } 1491 } else { 1492 if (idx) *idx = 0; 1493 if (v) *v = 0; 1494 } 1495 } 1496 *nz = nztot; 1497 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1498 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1499 PetscFunctionReturn(0); 1500 } 1501 1502 #undef __FUNCT__ 1503 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1504 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1505 { 1506 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1507 1508 PetscFunctionBegin; 1509 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1510 baij->getrowactive = PETSC_FALSE; 1511 PetscFunctionReturn(0); 1512 } 1513 1514 #undef __FUNCT__ 1515 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1516 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1517 { 1518 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1519 PetscErrorCode ierr; 1520 1521 PetscFunctionBegin; 1522 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1523 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1524 PetscFunctionReturn(0); 1525 } 1526 1527 #undef __FUNCT__ 1528 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1529 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1530 { 1531 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1532 Mat A = a->A,B = a->B; 1533 PetscErrorCode ierr; 1534 PetscReal isend[5],irecv[5]; 1535 1536 PetscFunctionBegin; 1537 info->block_size = (PetscReal)matin->rmap->bs; 1538 1539 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1540 1541 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1542 isend[3] = info->memory; isend[4] = info->mallocs; 1543 1544 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1545 1546 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1547 isend[3] += info->memory; isend[4] += info->mallocs; 1548 1549 if (flag == MAT_LOCAL) { 1550 info->nz_used = isend[0]; 1551 info->nz_allocated = isend[1]; 1552 info->nz_unneeded = isend[2]; 1553 info->memory = isend[3]; 1554 info->mallocs = isend[4]; 1555 } else if (flag == MAT_GLOBAL_MAX) { 1556 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1557 1558 info->nz_used = irecv[0]; 1559 info->nz_allocated = irecv[1]; 1560 info->nz_unneeded = irecv[2]; 1561 info->memory = irecv[3]; 1562 info->mallocs = irecv[4]; 1563 } else if (flag == MAT_GLOBAL_SUM) { 1564 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1565 1566 info->nz_used = irecv[0]; 1567 info->nz_allocated = irecv[1]; 1568 info->nz_unneeded = irecv[2]; 1569 info->memory = irecv[3]; 1570 info->mallocs = irecv[4]; 1571 } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1572 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1573 info->fill_ratio_needed = 0; 1574 info->factor_mallocs = 0; 1575 PetscFunctionReturn(0); 1576 } 1577 1578 #undef __FUNCT__ 1579 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1580 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1581 { 1582 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1583 PetscErrorCode ierr; 1584 1585 PetscFunctionBegin; 1586 switch (op) { 1587 case MAT_NEW_NONZERO_LOCATIONS: 1588 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1589 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1590 case MAT_KEEP_NONZERO_PATTERN: 1591 case MAT_NEW_NONZERO_LOCATION_ERR: 1592 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1593 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1594 break; 1595 case MAT_ROW_ORIENTED: 1596 a->roworiented = flg; 1597 1598 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1599 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1600 break; 1601 case MAT_NEW_DIAGONALS: 1602 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1603 break; 1604 case MAT_IGNORE_OFF_PROC_ENTRIES: 1605 a->donotstash = flg; 1606 break; 1607 case MAT_USE_HASH_TABLE: 1608 a->ht_flag = flg; 1609 break; 1610 case MAT_SYMMETRIC: 1611 case MAT_STRUCTURALLY_SYMMETRIC: 1612 case MAT_HERMITIAN: 1613 case MAT_SYMMETRY_ETERNAL: 1614 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1615 break; 1616 default: 1617 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1618 } 1619 PetscFunctionReturn(0); 1620 } 1621 1622 #undef __FUNCT__ 1623 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1624 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1625 { 1626 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1627 Mat_SeqBAIJ *Aloc; 1628 Mat B; 1629 PetscErrorCode ierr; 1630 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1631 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1632 MatScalar *a; 1633 1634 PetscFunctionBegin; 1635 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1636 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1637 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1638 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1639 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1640 /* Do not know preallocation information, but must set block size */ 1641 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1642 } else { 1643 B = *matout; 1644 } 1645 1646 /* copy over the A part */ 1647 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1648 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1649 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1650 1651 for (i=0; i<mbs; i++) { 1652 rvals[0] = bs*(baij->rstartbs + i); 1653 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1654 for (j=ai[i]; j<ai[i+1]; j++) { 1655 col = (baij->cstartbs+aj[j])*bs; 1656 for (k=0; k<bs; k++) { 1657 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1658 1659 col++; a += bs; 1660 } 1661 } 1662 } 1663 /* copy over the B part */ 1664 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1665 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1666 for (i=0; i<mbs; i++) { 1667 rvals[0] = bs*(baij->rstartbs + i); 1668 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1669 for (j=ai[i]; j<ai[i+1]; j++) { 1670 col = baij->garray[aj[j]]*bs; 1671 for (k=0; k<bs; k++) { 1672 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1673 col++; 1674 a += bs; 1675 } 1676 } 1677 } 1678 ierr = PetscFree(rvals);CHKERRQ(ierr); 1679 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1680 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1681 1682 if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B; 1683 else { 1684 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1685 } 1686 PetscFunctionReturn(0); 1687 } 1688 1689 #undef __FUNCT__ 1690 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1691 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1692 { 1693 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1694 Mat a = baij->A,b = baij->B; 1695 PetscErrorCode ierr; 1696 PetscInt s1,s2,s3; 1697 1698 PetscFunctionBegin; 1699 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1700 if (rr) { 1701 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1702 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1703 /* Overlap communication with computation. */ 1704 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1705 } 1706 if (ll) { 1707 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1708 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1709 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1710 } 1711 /* scale the diagonal block */ 1712 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1713 1714 if (rr) { 1715 /* Do a scatter end and then right scale the off-diagonal block */ 1716 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1717 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1718 } 1719 PetscFunctionReturn(0); 1720 } 1721 1722 #undef __FUNCT__ 1723 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1724 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1725 { 1726 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1727 PetscInt *owners = A->rmap->range; 1728 PetscInt n = A->rmap->n; 1729 PetscMPIInt size = l->size; 1730 PetscSF sf; 1731 PetscInt *lrows; 1732 PetscSFNode *rrows; 1733 PetscInt lastidx = -1, r, p = 0, len = 0; 1734 PetscErrorCode ierr; 1735 1736 PetscFunctionBegin; 1737 /* Create SF where leaves are input rows and roots are owned rows */ 1738 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1739 for (r = 0; r < n; ++r) lrows[r] = -1; 1740 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1741 for (r = 0; r < N; ++r) { 1742 const PetscInt idx = rows[r]; 1743 PetscBool found = PETSC_FALSE; 1744 /* Trick for efficient searching for sorted rows */ 1745 if (lastidx > idx) p = 0; 1746 lastidx = idx; 1747 for (; p < size; ++p) { 1748 if (idx >= owners[p] && idx < owners[p+1]) { 1749 rrows[r].rank = p; 1750 rrows[r].index = rows[r] - owners[p]; 1751 found = PETSC_TRUE; 1752 break; 1753 } 1754 } 1755 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 1756 } 1757 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1758 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1759 /* Collect flags for rows to be zeroed */ 1760 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1761 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1762 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1763 /* Compress and put in row numbers */ 1764 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1765 /* fix right hand side if needed */ 1766 if (x && b) { 1767 const PetscScalar *xx; 1768 PetscScalar *bb; 1769 1770 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1771 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1772 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 1773 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1774 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1775 } 1776 1777 /* actually zap the local rows */ 1778 /* 1779 Zero the required rows. If the "diagonal block" of the matrix 1780 is square and the user wishes to set the diagonal we use separate 1781 code so that MatSetValues() is not called for each diagonal allocating 1782 new memory, thus calling lots of mallocs and slowing things down. 1783 1784 */ 1785 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1786 ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,0,0);CHKERRQ(ierr); 1787 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1788 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,0,0);CHKERRQ(ierr); 1789 } else if (diag != 0.0) { 1790 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr); 1791 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1792 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1793 for (r = 0; r < len; ++r) { 1794 const PetscInt row = lrows[r] + A->rmap->rstart; 1795 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1796 } 1797 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1798 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1799 } else { 1800 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr); 1801 } 1802 ierr = PetscFree(lrows);CHKERRQ(ierr); 1803 1804 /* only change matrix nonzero state if pattern was allowed to be changed */ 1805 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1806 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1807 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1808 } 1809 PetscFunctionReturn(0); 1810 } 1811 1812 #undef __FUNCT__ 1813 #define __FUNCT__ "MatZeroRowsColumns_MPIBAIJ" 1814 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1815 { 1816 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1817 PetscErrorCode ierr; 1818 PetscMPIInt size = l->size,n = A->rmap->n,lastidx = -1; 1819 PetscInt i,j,k,r,p = 0,len = 0,row,col,count; 1820 PetscInt *lrows,*owners = A->rmap->range; 1821 PetscSFNode *rrows; 1822 PetscSF sf; 1823 const PetscScalar *xx; 1824 PetscScalar *bb,*mask; 1825 Vec xmask,lmask; 1826 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data; 1827 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1828 PetscScalar *aa; 1829 #if defined(PETSC_DEBUG) 1830 PetscBool found = PETSC_FALSE; 1831 #endif 1832 1833 PetscFunctionBegin; 1834 /* Create SF where leaves are input rows and roots are owned rows */ 1835 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1836 for (r = 0; r < n; ++r) lrows[r] = -1; 1837 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1838 for (r = 0; r < N; ++r) { 1839 const PetscInt idx = rows[r]; 1840 PetscBool found = PETSC_FALSE; 1841 /* Trick for efficient searching for sorted rows */ 1842 if (lastidx > idx) p = 0; 1843 lastidx = idx; 1844 for (; p < size; ++p) { 1845 if (idx >= owners[p] && idx < owners[p+1]) { 1846 rrows[r].rank = p; 1847 rrows[r].index = rows[r] - owners[p]; 1848 found = PETSC_TRUE; 1849 break; 1850 } 1851 } 1852 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 1853 } 1854 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1855 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1856 /* Collect flags for rows to be zeroed */ 1857 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1858 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1859 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1860 /* Compress and put in row numbers */ 1861 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1862 /* zero diagonal part of matrix */ 1863 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1864 /* handle off diagonal part of matrix */ 1865 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 1866 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1867 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1868 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1869 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1870 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1871 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1872 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1873 if (x) { 1874 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1875 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1876 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1877 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1878 } 1879 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1880 /* remove zeroed rows of off diagonal matrix */ 1881 for (i = 0; i < len; ++i) { 1882 row = lrows[i]; 1883 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1884 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1885 for (k = 0; k < count; ++k) { 1886 aa[0] = 0.0; 1887 aa += bs; 1888 } 1889 } 1890 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1891 for (i = 0; i < l->B->rmap->N; ++i) { 1892 row = i/bs; 1893 for (j = baij->i[row]; j < baij->i[row+1]; ++j) { 1894 for (k = 0; k < bs; ++k) { 1895 col = bs*baij->j[j] + k; 1896 if (PetscAbsScalar(mask[col])) { 1897 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 1898 if (b) bb[i] -= aa[0]*xx[col]; 1899 aa[0] = 0.0; 1900 } 1901 } 1902 } 1903 } 1904 if (x) { 1905 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1906 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1907 } 1908 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1909 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1910 ierr = PetscFree(lrows);CHKERRQ(ierr); 1911 1912 /* only change matrix nonzero state if pattern was allowed to be changed */ 1913 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1914 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1915 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1916 } 1917 PetscFunctionReturn(0); 1918 } 1919 1920 #undef __FUNCT__ 1921 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1922 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1923 { 1924 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1925 PetscErrorCode ierr; 1926 1927 PetscFunctionBegin; 1928 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1929 PetscFunctionReturn(0); 1930 } 1931 1932 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1933 1934 #undef __FUNCT__ 1935 #define __FUNCT__ "MatEqual_MPIBAIJ" 1936 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1937 { 1938 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1939 Mat a,b,c,d; 1940 PetscBool flg; 1941 PetscErrorCode ierr; 1942 1943 PetscFunctionBegin; 1944 a = matA->A; b = matA->B; 1945 c = matB->A; d = matB->B; 1946 1947 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1948 if (flg) { 1949 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1950 } 1951 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1952 PetscFunctionReturn(0); 1953 } 1954 1955 #undef __FUNCT__ 1956 #define __FUNCT__ "MatCopy_MPIBAIJ" 1957 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1958 { 1959 PetscErrorCode ierr; 1960 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1961 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1962 1963 PetscFunctionBegin; 1964 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1965 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1966 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1967 } else { 1968 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1969 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1970 } 1971 PetscFunctionReturn(0); 1972 } 1973 1974 #undef __FUNCT__ 1975 #define __FUNCT__ "MatSetUp_MPIBAIJ" 1976 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1977 { 1978 PetscErrorCode ierr; 1979 1980 PetscFunctionBegin; 1981 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1982 PetscFunctionReturn(0); 1983 } 1984 1985 #undef __FUNCT__ 1986 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1987 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1988 { 1989 PetscErrorCode ierr; 1990 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 1991 PetscBLASInt bnz,one=1; 1992 Mat_SeqBAIJ *x,*y; 1993 1994 PetscFunctionBegin; 1995 if (str == SAME_NONZERO_PATTERN) { 1996 PetscScalar alpha = a; 1997 x = (Mat_SeqBAIJ*)xx->A->data; 1998 y = (Mat_SeqBAIJ*)yy->A->data; 1999 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2000 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2001 x = (Mat_SeqBAIJ*)xx->B->data; 2002 y = (Mat_SeqBAIJ*)yy->B->data; 2003 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2004 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2005 } else { 2006 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2007 } 2008 PetscFunctionReturn(0); 2009 } 2010 2011 #undef __FUNCT__ 2012 #define __FUNCT__ "MatRealPart_MPIBAIJ" 2013 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 2014 { 2015 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2016 PetscErrorCode ierr; 2017 2018 PetscFunctionBegin; 2019 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2020 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2021 PetscFunctionReturn(0); 2022 } 2023 2024 #undef __FUNCT__ 2025 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2026 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2027 { 2028 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2029 PetscErrorCode ierr; 2030 2031 PetscFunctionBegin; 2032 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2033 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2034 PetscFunctionReturn(0); 2035 } 2036 2037 #undef __FUNCT__ 2038 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2039 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2040 { 2041 PetscErrorCode ierr; 2042 IS iscol_local; 2043 PetscInt csize; 2044 2045 PetscFunctionBegin; 2046 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2047 if (call == MAT_REUSE_MATRIX) { 2048 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2049 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2050 } else { 2051 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2052 } 2053 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2054 if (call == MAT_INITIAL_MATRIX) { 2055 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2056 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2057 } 2058 PetscFunctionReturn(0); 2059 } 2060 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*); 2061 #undef __FUNCT__ 2062 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2063 /* 2064 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2065 in local and then by concatenating the local matrices the end result. 2066 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2067 */ 2068 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2069 { 2070 PetscErrorCode ierr; 2071 PetscMPIInt rank,size; 2072 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2073 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow; 2074 Mat M,Mreuse; 2075 MatScalar *vwork,*aa; 2076 MPI_Comm comm; 2077 IS isrow_new, iscol_new; 2078 PetscBool idflag,allrows, allcols; 2079 Mat_SeqBAIJ *aij; 2080 2081 PetscFunctionBegin; 2082 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2083 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2084 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2085 /* The compression and expansion should be avoided. Doesn't point 2086 out errors, might change the indices, hence buggey */ 2087 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 2088 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 2089 2090 /* Check for special case: each processor gets entire matrix columns */ 2091 ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr); 2092 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 2093 if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE; 2094 else allcols = PETSC_FALSE; 2095 2096 ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr); 2097 ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr); 2098 if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE; 2099 else allrows = PETSC_FALSE; 2100 2101 if (call == MAT_REUSE_MATRIX) { 2102 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 2103 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2104 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2105 } else { 2106 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2107 } 2108 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 2109 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 2110 /* 2111 m - number of local rows 2112 n - number of columns (same on all processors) 2113 rstart - first row in new global matrix generated 2114 */ 2115 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2116 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2117 m = m/bs; 2118 n = n/bs; 2119 2120 if (call == MAT_INITIAL_MATRIX) { 2121 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2122 ii = aij->i; 2123 jj = aij->j; 2124 2125 /* 2126 Determine the number of non-zeros in the diagonal and off-diagonal 2127 portions of the matrix in order to do correct preallocation 2128 */ 2129 2130 /* first get start and end of "diagonal" columns */ 2131 if (csize == PETSC_DECIDE) { 2132 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2133 if (mglobal == n*bs) { /* square matrix */ 2134 nlocal = m; 2135 } else { 2136 nlocal = n/size + ((n % size) > rank); 2137 } 2138 } else { 2139 nlocal = csize/bs; 2140 } 2141 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2142 rstart = rend - nlocal; 2143 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2144 2145 /* next, compute all the lengths */ 2146 ierr = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr); 2147 for (i=0; i<m; i++) { 2148 jend = ii[i+1] - ii[i]; 2149 olen = 0; 2150 dlen = 0; 2151 for (j=0; j<jend; j++) { 2152 if (*jj < rstart || *jj >= rend) olen++; 2153 else dlen++; 2154 jj++; 2155 } 2156 olens[i] = olen; 2157 dlens[i] = dlen; 2158 } 2159 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2160 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2161 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2162 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2163 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 2164 } else { 2165 PetscInt ml,nl; 2166 2167 M = *newmat; 2168 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2169 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2170 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2171 /* 2172 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2173 rather than the slower MatSetValues(). 2174 */ 2175 M->was_assembled = PETSC_TRUE; 2176 M->assembled = PETSC_FALSE; 2177 } 2178 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2179 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2180 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2181 ii = aij->i; 2182 jj = aij->j; 2183 aa = aij->a; 2184 for (i=0; i<m; i++) { 2185 row = rstart/bs + i; 2186 nz = ii[i+1] - ii[i]; 2187 cwork = jj; jj += nz; 2188 vwork = aa; aa += nz*bs*bs; 2189 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2190 } 2191 2192 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2193 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2194 *newmat = M; 2195 2196 /* save submatrix used in processor for next request */ 2197 if (call == MAT_INITIAL_MATRIX) { 2198 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2199 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2200 } 2201 PetscFunctionReturn(0); 2202 } 2203 2204 #undef __FUNCT__ 2205 #define __FUNCT__ "MatPermute_MPIBAIJ" 2206 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2207 { 2208 MPI_Comm comm,pcomm; 2209 PetscInt clocal_size,nrows; 2210 const PetscInt *rows; 2211 PetscMPIInt size; 2212 IS crowp,lcolp; 2213 PetscErrorCode ierr; 2214 2215 PetscFunctionBegin; 2216 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2217 /* make a collective version of 'rowp' */ 2218 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2219 if (pcomm==comm) { 2220 crowp = rowp; 2221 } else { 2222 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2223 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2224 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2225 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2226 } 2227 ierr = ISSetPermutation(crowp);CHKERRQ(ierr); 2228 /* make a local version of 'colp' */ 2229 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2230 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2231 if (size==1) { 2232 lcolp = colp; 2233 } else { 2234 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2235 } 2236 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2237 /* now we just get the submatrix */ 2238 ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr); 2239 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2240 /* clean up */ 2241 if (pcomm!=comm) { 2242 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2243 } 2244 if (size>1) { 2245 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2246 } 2247 PetscFunctionReturn(0); 2248 } 2249 2250 #undef __FUNCT__ 2251 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2252 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2253 { 2254 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2255 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2256 2257 PetscFunctionBegin; 2258 if (nghosts) *nghosts = B->nbs; 2259 if (ghosts) *ghosts = baij->garray; 2260 PetscFunctionReturn(0); 2261 } 2262 2263 #undef __FUNCT__ 2264 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2265 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2266 { 2267 Mat B; 2268 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2269 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2270 Mat_SeqAIJ *b; 2271 PetscErrorCode ierr; 2272 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2273 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2274 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2275 2276 PetscFunctionBegin; 2277 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2278 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2279 2280 /* ---------------------------------------------------------------- 2281 Tell every processor the number of nonzeros per row 2282 */ 2283 ierr = PetscMalloc1((A->rmap->N/bs),&lens);CHKERRQ(ierr); 2284 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2285 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2286 } 2287 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2288 ierr = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr); 2289 displs = recvcounts + size; 2290 for (i=0; i<size; i++) { 2291 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2292 displs[i] = A->rmap->range[i]/bs; 2293 } 2294 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2295 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2296 #else 2297 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2298 #endif 2299 /* --------------------------------------------------------------- 2300 Create the sequential matrix of the same type as the local block diagonal 2301 */ 2302 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2303 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2304 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2305 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2306 b = (Mat_SeqAIJ*)B->data; 2307 2308 /*-------------------------------------------------------------------- 2309 Copy my part of matrix column indices over 2310 */ 2311 sendcount = ad->nz + bd->nz; 2312 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2313 a_jsendbuf = ad->j; 2314 b_jsendbuf = bd->j; 2315 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2316 cnt = 0; 2317 for (i=0; i<n; i++) { 2318 2319 /* put in lower diagonal portion */ 2320 m = bd->i[i+1] - bd->i[i]; 2321 while (m > 0) { 2322 /* is it above diagonal (in bd (compressed) numbering) */ 2323 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2324 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2325 m--; 2326 } 2327 2328 /* put in diagonal portion */ 2329 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2330 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2331 } 2332 2333 /* put in upper diagonal portion */ 2334 while (m-- > 0) { 2335 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2336 } 2337 } 2338 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2339 2340 /*-------------------------------------------------------------------- 2341 Gather all column indices to all processors 2342 */ 2343 for (i=0; i<size; i++) { 2344 recvcounts[i] = 0; 2345 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2346 recvcounts[i] += lens[j]; 2347 } 2348 } 2349 displs[0] = 0; 2350 for (i=1; i<size; i++) { 2351 displs[i] = displs[i-1] + recvcounts[i-1]; 2352 } 2353 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2354 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2355 #else 2356 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2357 #endif 2358 /*-------------------------------------------------------------------- 2359 Assemble the matrix into useable form (note numerical values not yet set) 2360 */ 2361 /* set the b->ilen (length of each row) values */ 2362 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2363 /* set the b->i indices */ 2364 b->i[0] = 0; 2365 for (i=1; i<=A->rmap->N/bs; i++) { 2366 b->i[i] = b->i[i-1] + lens[i-1]; 2367 } 2368 ierr = PetscFree(lens);CHKERRQ(ierr); 2369 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2370 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2371 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2372 2373 if (A->symmetric) { 2374 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2375 } else if (A->hermitian) { 2376 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2377 } else if (A->structurally_symmetric) { 2378 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2379 } 2380 *newmat = B; 2381 PetscFunctionReturn(0); 2382 } 2383 2384 #undef __FUNCT__ 2385 #define __FUNCT__ "MatSOR_MPIBAIJ" 2386 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2387 { 2388 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2389 PetscErrorCode ierr; 2390 Vec bb1 = 0; 2391 2392 PetscFunctionBegin; 2393 if (flag == SOR_APPLY_UPPER) { 2394 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2395 PetscFunctionReturn(0); 2396 } 2397 2398 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2399 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2400 } 2401 2402 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2403 if (flag & SOR_ZERO_INITIAL_GUESS) { 2404 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2405 its--; 2406 } 2407 2408 while (its--) { 2409 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2410 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2411 2412 /* update rhs: bb1 = bb - B*x */ 2413 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2414 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2415 2416 /* local sweep */ 2417 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2418 } 2419 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2420 if (flag & SOR_ZERO_INITIAL_GUESS) { 2421 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2422 its--; 2423 } 2424 while (its--) { 2425 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2426 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2427 2428 /* update rhs: bb1 = bb - B*x */ 2429 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2430 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2431 2432 /* local sweep */ 2433 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2434 } 2435 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2436 if (flag & SOR_ZERO_INITIAL_GUESS) { 2437 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2438 its--; 2439 } 2440 while (its--) { 2441 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2442 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2443 2444 /* update rhs: bb1 = bb - B*x */ 2445 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2446 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2447 2448 /* local sweep */ 2449 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2450 } 2451 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2452 2453 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2454 PetscFunctionReturn(0); 2455 } 2456 2457 #undef __FUNCT__ 2458 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ" 2459 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms) 2460 { 2461 PetscErrorCode ierr; 2462 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 2463 PetscInt N,i,*garray = aij->garray; 2464 PetscInt ib,jb,bs = A->rmap->bs; 2465 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 2466 MatScalar *a_val = a_aij->a; 2467 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 2468 MatScalar *b_val = b_aij->a; 2469 PetscReal *work; 2470 2471 PetscFunctionBegin; 2472 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 2473 ierr = PetscCalloc1(N,&work);CHKERRQ(ierr); 2474 if (type == NORM_2) { 2475 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2476 for (jb=0; jb<bs; jb++) { 2477 for (ib=0; ib<bs; ib++) { 2478 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2479 a_val++; 2480 } 2481 } 2482 } 2483 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2484 for (jb=0; jb<bs; jb++) { 2485 for (ib=0; ib<bs; ib++) { 2486 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2487 b_val++; 2488 } 2489 } 2490 } 2491 } else if (type == NORM_1) { 2492 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2493 for (jb=0; jb<bs; jb++) { 2494 for (ib=0; ib<bs; ib++) { 2495 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2496 a_val++; 2497 } 2498 } 2499 } 2500 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2501 for (jb=0; jb<bs; jb++) { 2502 for (ib=0; ib<bs; ib++) { 2503 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2504 b_val++; 2505 } 2506 } 2507 } 2508 } else if (type == NORM_INFINITY) { 2509 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2510 for (jb=0; jb<bs; jb++) { 2511 for (ib=0; ib<bs; ib++) { 2512 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2513 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2514 a_val++; 2515 } 2516 } 2517 } 2518 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2519 for (jb=0; jb<bs; jb++) { 2520 for (ib=0; ib<bs; ib++) { 2521 int col = garray[b_aij->j[i]] * bs + jb; 2522 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2523 b_val++; 2524 } 2525 } 2526 } 2527 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 2528 if (type == NORM_INFINITY) { 2529 ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2530 } else { 2531 ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2532 } 2533 ierr = PetscFree(work);CHKERRQ(ierr); 2534 if (type == NORM_2) { 2535 for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]); 2536 } 2537 PetscFunctionReturn(0); 2538 } 2539 2540 #undef __FUNCT__ 2541 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2542 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2543 { 2544 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2545 PetscErrorCode ierr; 2546 2547 PetscFunctionBegin; 2548 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2549 PetscFunctionReturn(0); 2550 } 2551 2552 2553 /* -------------------------------------------------------------------*/ 2554 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2555 MatGetRow_MPIBAIJ, 2556 MatRestoreRow_MPIBAIJ, 2557 MatMult_MPIBAIJ, 2558 /* 4*/ MatMultAdd_MPIBAIJ, 2559 MatMultTranspose_MPIBAIJ, 2560 MatMultTransposeAdd_MPIBAIJ, 2561 0, 2562 0, 2563 0, 2564 /*10*/ 0, 2565 0, 2566 0, 2567 MatSOR_MPIBAIJ, 2568 MatTranspose_MPIBAIJ, 2569 /*15*/ MatGetInfo_MPIBAIJ, 2570 MatEqual_MPIBAIJ, 2571 MatGetDiagonal_MPIBAIJ, 2572 MatDiagonalScale_MPIBAIJ, 2573 MatNorm_MPIBAIJ, 2574 /*20*/ MatAssemblyBegin_MPIBAIJ, 2575 MatAssemblyEnd_MPIBAIJ, 2576 MatSetOption_MPIBAIJ, 2577 MatZeroEntries_MPIBAIJ, 2578 /*24*/ MatZeroRows_MPIBAIJ, 2579 0, 2580 0, 2581 0, 2582 0, 2583 /*29*/ MatSetUp_MPIBAIJ, 2584 0, 2585 0, 2586 0, 2587 0, 2588 /*34*/ MatDuplicate_MPIBAIJ, 2589 0, 2590 0, 2591 0, 2592 0, 2593 /*39*/ MatAXPY_MPIBAIJ, 2594 MatGetSubMatrices_MPIBAIJ, 2595 MatIncreaseOverlap_MPIBAIJ, 2596 MatGetValues_MPIBAIJ, 2597 MatCopy_MPIBAIJ, 2598 /*44*/ 0, 2599 MatScale_MPIBAIJ, 2600 0, 2601 0, 2602 MatZeroRowsColumns_MPIBAIJ, 2603 /*49*/ 0, 2604 0, 2605 0, 2606 0, 2607 0, 2608 /*54*/ MatFDColoringCreate_MPIXAIJ, 2609 0, 2610 MatSetUnfactored_MPIBAIJ, 2611 MatPermute_MPIBAIJ, 2612 MatSetValuesBlocked_MPIBAIJ, 2613 /*59*/ MatGetSubMatrix_MPIBAIJ, 2614 MatDestroy_MPIBAIJ, 2615 MatView_MPIBAIJ, 2616 0, 2617 0, 2618 /*64*/ 0, 2619 0, 2620 0, 2621 0, 2622 0, 2623 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2624 0, 2625 0, 2626 0, 2627 0, 2628 /*74*/ 0, 2629 MatFDColoringApply_BAIJ, 2630 0, 2631 0, 2632 0, 2633 /*79*/ 0, 2634 0, 2635 0, 2636 0, 2637 MatLoad_MPIBAIJ, 2638 /*84*/ 0, 2639 0, 2640 0, 2641 0, 2642 0, 2643 /*89*/ 0, 2644 0, 2645 0, 2646 0, 2647 0, 2648 /*94*/ 0, 2649 0, 2650 0, 2651 0, 2652 0, 2653 /*99*/ 0, 2654 0, 2655 0, 2656 0, 2657 0, 2658 /*104*/0, 2659 MatRealPart_MPIBAIJ, 2660 MatImaginaryPart_MPIBAIJ, 2661 0, 2662 0, 2663 /*109*/0, 2664 0, 2665 0, 2666 0, 2667 0, 2668 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2669 0, 2670 MatGetGhosts_MPIBAIJ, 2671 0, 2672 0, 2673 /*119*/0, 2674 0, 2675 0, 2676 0, 2677 MatGetMultiProcBlock_MPIBAIJ, 2678 /*124*/0, 2679 MatGetColumnNorms_MPIBAIJ, 2680 MatInvertBlockDiagonal_MPIBAIJ, 2681 0, 2682 0, 2683 /*129*/ 0, 2684 0, 2685 0, 2686 0, 2687 0, 2688 /*134*/ 0, 2689 0, 2690 0, 2691 0, 2692 0, 2693 /*139*/ 0, 2694 0, 2695 0, 2696 MatFDColoringSetUp_MPIXAIJ 2697 }; 2698 2699 #undef __FUNCT__ 2700 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2701 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2702 { 2703 PetscFunctionBegin; 2704 *a = ((Mat_MPIBAIJ*)A->data)->A; 2705 PetscFunctionReturn(0); 2706 } 2707 2708 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2709 2710 #undef __FUNCT__ 2711 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2712 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2713 { 2714 PetscInt m,rstart,cstart,cend; 2715 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2716 const PetscInt *JJ =0; 2717 PetscScalar *values=0; 2718 PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2719 PetscErrorCode ierr; 2720 2721 PetscFunctionBegin; 2722 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2723 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2724 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2725 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2726 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2727 m = B->rmap->n/bs; 2728 rstart = B->rmap->rstart/bs; 2729 cstart = B->cmap->rstart/bs; 2730 cend = B->cmap->rend/bs; 2731 2732 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2733 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2734 for (i=0; i<m; i++) { 2735 nz = ii[i+1] - ii[i]; 2736 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2737 nz_max = PetscMax(nz_max,nz); 2738 JJ = jj + ii[i]; 2739 for (j=0; j<nz; j++) { 2740 if (*JJ >= cstart) break; 2741 JJ++; 2742 } 2743 d = 0; 2744 for (; j<nz; j++) { 2745 if (*JJ++ >= cend) break; 2746 d++; 2747 } 2748 d_nnz[i] = d; 2749 o_nnz[i] = nz - d; 2750 } 2751 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2752 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2753 2754 values = (PetscScalar*)V; 2755 if (!values) { 2756 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2757 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2758 } 2759 for (i=0; i<m; i++) { 2760 PetscInt row = i + rstart; 2761 PetscInt ncols = ii[i+1] - ii[i]; 2762 const PetscInt *icols = jj + ii[i]; 2763 if (!roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2764 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2765 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2766 } else { /* block ordering does not match so we can only insert one block at a time. */ 2767 PetscInt j; 2768 for (j=0; j<ncols; j++) { 2769 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2770 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2771 } 2772 } 2773 } 2774 2775 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2776 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2777 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2778 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2779 PetscFunctionReturn(0); 2780 } 2781 2782 #undef __FUNCT__ 2783 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2784 /*@C 2785 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2786 (the default parallel PETSc format). 2787 2788 Collective on MPI_Comm 2789 2790 Input Parameters: 2791 + A - the matrix 2792 . bs - the block size 2793 . i - the indices into j for the start of each local row (starts with zero) 2794 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2795 - v - optional values in the matrix 2796 2797 Level: developer 2798 2799 Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 2800 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 2801 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2802 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2803 block column and the second index is over columns within a block. 2804 2805 .keywords: matrix, aij, compressed row, sparse, parallel 2806 2807 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ 2808 @*/ 2809 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2810 { 2811 PetscErrorCode ierr; 2812 2813 PetscFunctionBegin; 2814 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2815 PetscValidType(B,1); 2816 PetscValidLogicalCollectiveInt(B,bs,2); 2817 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2818 PetscFunctionReturn(0); 2819 } 2820 2821 #undef __FUNCT__ 2822 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2823 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2824 { 2825 Mat_MPIBAIJ *b; 2826 PetscErrorCode ierr; 2827 PetscInt i; 2828 2829 PetscFunctionBegin; 2830 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2831 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2832 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2833 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2834 2835 if (d_nnz) { 2836 for (i=0; i<B->rmap->n/bs; i++) { 2837 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2838 } 2839 } 2840 if (o_nnz) { 2841 for (i=0; i<B->rmap->n/bs; i++) { 2842 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2843 } 2844 } 2845 2846 b = (Mat_MPIBAIJ*)B->data; 2847 b->bs2 = bs*bs; 2848 b->mbs = B->rmap->n/bs; 2849 b->nbs = B->cmap->n/bs; 2850 b->Mbs = B->rmap->N/bs; 2851 b->Nbs = B->cmap->N/bs; 2852 2853 for (i=0; i<=b->size; i++) { 2854 b->rangebs[i] = B->rmap->range[i]/bs; 2855 } 2856 b->rstartbs = B->rmap->rstart/bs; 2857 b->rendbs = B->rmap->rend/bs; 2858 b->cstartbs = B->cmap->rstart/bs; 2859 b->cendbs = B->cmap->rend/bs; 2860 2861 if (!B->preallocated) { 2862 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2863 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2864 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2865 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2866 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2867 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2868 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2869 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2870 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2871 } 2872 2873 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2874 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2875 B->preallocated = PETSC_TRUE; 2876 PetscFunctionReturn(0); 2877 } 2878 2879 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2880 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2881 2882 #undef __FUNCT__ 2883 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 2884 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 2885 { 2886 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2887 PetscErrorCode ierr; 2888 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2889 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2890 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2891 2892 PetscFunctionBegin; 2893 ierr = PetscMalloc1((M+1),&ii);CHKERRQ(ierr); 2894 ii[0] = 0; 2895 for (i=0; i<M; i++) { 2896 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2897 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2898 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2899 /* remove one from count of matrix has diagonal */ 2900 for (j=id[i]; j<id[i+1]; j++) { 2901 if (jd[j] == i) {ii[i+1]--;break;} 2902 } 2903 } 2904 ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr); 2905 cnt = 0; 2906 for (i=0; i<M; i++) { 2907 for (j=io[i]; j<io[i+1]; j++) { 2908 if (garray[jo[j]] > rstart) break; 2909 jj[cnt++] = garray[jo[j]]; 2910 } 2911 for (k=id[i]; k<id[i+1]; k++) { 2912 if (jd[k] != i) { 2913 jj[cnt++] = rstart + jd[k]; 2914 } 2915 } 2916 for (; j<io[i+1]; j++) { 2917 jj[cnt++] = garray[jo[j]]; 2918 } 2919 } 2920 ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 2921 PetscFunctionReturn(0); 2922 } 2923 2924 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2925 2926 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 2927 2928 #undef __FUNCT__ 2929 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 2930 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 2931 { 2932 PetscErrorCode ierr; 2933 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2934 Mat B; 2935 Mat_MPIAIJ *b; 2936 2937 PetscFunctionBegin; 2938 if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 2939 2940 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2941 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2942 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2943 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 2944 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 2945 b = (Mat_MPIAIJ*) B->data; 2946 2947 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 2948 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2949 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 2950 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 2951 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 2952 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2953 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2954 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2955 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2956 if (reuse == MAT_REUSE_MATRIX) { 2957 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2958 } else { 2959 *newmat = B; 2960 } 2961 PetscFunctionReturn(0); 2962 } 2963 2964 #if defined(PETSC_HAVE_MUMPS) 2965 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 2966 #endif 2967 2968 /*MC 2969 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2970 2971 Options Database Keys: 2972 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2973 . -mat_block_size <bs> - set the blocksize used to store the matrix 2974 - -mat_use_hash_table <fact> 2975 2976 Level: beginner 2977 2978 .seealso: MatCreateMPIBAIJ 2979 M*/ 2980 2981 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 2982 2983 #undef __FUNCT__ 2984 #define __FUNCT__ "MatCreate_MPIBAIJ" 2985 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2986 { 2987 Mat_MPIBAIJ *b; 2988 PetscErrorCode ierr; 2989 PetscBool flg; 2990 2991 PetscFunctionBegin; 2992 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2993 B->data = (void*)b; 2994 2995 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2996 B->assembled = PETSC_FALSE; 2997 2998 B->insertmode = NOT_SET_VALUES; 2999 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 3000 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 3001 3002 /* build local table of row and column ownerships */ 3003 ierr = PetscMalloc1((b->size+1),&b->rangebs);CHKERRQ(ierr); 3004 3005 /* build cache for off array entries formed */ 3006 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 3007 3008 b->donotstash = PETSC_FALSE; 3009 b->colmap = NULL; 3010 b->garray = NULL; 3011 b->roworiented = PETSC_TRUE; 3012 3013 /* stuff used in block assembly */ 3014 b->barray = 0; 3015 3016 /* stuff used for matrix vector multiply */ 3017 b->lvec = 0; 3018 b->Mvctx = 0; 3019 3020 /* stuff for MatGetRow() */ 3021 b->rowindices = 0; 3022 b->rowvalues = 0; 3023 b->getrowactive = PETSC_FALSE; 3024 3025 /* hash table stuff */ 3026 b->ht = 0; 3027 b->hd = 0; 3028 b->ht_size = 0; 3029 b->ht_flag = PETSC_FALSE; 3030 b->ht_fact = 0; 3031 b->ht_total_ct = 0; 3032 b->ht_insert_ct = 0; 3033 3034 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3035 b->ijonly = PETSC_FALSE; 3036 3037 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3038 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr); 3039 if (flg) { 3040 PetscReal fact = 1.39; 3041 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3042 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 3043 if (fact <= 1.0) fact = 1.39; 3044 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3045 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3046 } 3047 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3048 3049 #if defined(PETSC_HAVE_MUMPS) 3050 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3051 #endif 3052 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3053 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3054 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3055 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3056 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3057 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3058 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3059 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3060 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3061 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3062 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 3063 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3064 PetscFunctionReturn(0); 3065 } 3066 3067 /*MC 3068 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3069 3070 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3071 and MATMPIBAIJ otherwise. 3072 3073 Options Database Keys: 3074 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3075 3076 Level: beginner 3077 3078 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3079 M*/ 3080 3081 #undef __FUNCT__ 3082 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3083 /*@C 3084 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3085 (block compressed row). For good matrix assembly performance 3086 the user should preallocate the matrix storage by setting the parameters 3087 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3088 performance can be increased by more than a factor of 50. 3089 3090 Collective on Mat 3091 3092 Input Parameters: 3093 + A - the matrix 3094 . bs - size of block 3095 . d_nz - number of block nonzeros per block row in diagonal portion of local 3096 submatrix (same for all local rows) 3097 . d_nnz - array containing the number of block nonzeros in the various block rows 3098 of the in diagonal portion of the local (possibly different for each block 3099 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3100 set it even if it is zero. 3101 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3102 submatrix (same for all local rows). 3103 - o_nnz - array containing the number of nonzeros in the various block rows of the 3104 off-diagonal portion of the local submatrix (possibly different for 3105 each block row) or NULL. 3106 3107 If the *_nnz parameter is given then the *_nz parameter is ignored 3108 3109 Options Database Keys: 3110 + -mat_block_size - size of the blocks to use 3111 - -mat_use_hash_table <fact> 3112 3113 Notes: 3114 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3115 than it must be used on all processors that share the object for that argument. 3116 3117 Storage Information: 3118 For a square global matrix we define each processor's diagonal portion 3119 to be its local rows and the corresponding columns (a square submatrix); 3120 each processor's off-diagonal portion encompasses the remainder of the 3121 local matrix (a rectangular submatrix). 3122 3123 The user can specify preallocated storage for the diagonal part of 3124 the local submatrix with either d_nz or d_nnz (not both). Set 3125 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3126 memory allocation. Likewise, specify preallocated storage for the 3127 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3128 3129 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3130 the figure below we depict these three local rows and all columns (0-11). 3131 3132 .vb 3133 0 1 2 3 4 5 6 7 8 9 10 11 3134 -------------------------- 3135 row 3 |o o o d d d o o o o o o 3136 row 4 |o o o d d d o o o o o o 3137 row 5 |o o o d d d o o o o o o 3138 -------------------------- 3139 .ve 3140 3141 Thus, any entries in the d locations are stored in the d (diagonal) 3142 submatrix, and any entries in the o locations are stored in the 3143 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3144 stored simply in the MATSEQBAIJ format for compressed row storage. 3145 3146 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3147 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3148 In general, for PDE problems in which most nonzeros are near the diagonal, 3149 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3150 or you will get TERRIBLE performance; see the users' manual chapter on 3151 matrices. 3152 3153 You can call MatGetInfo() to get information on how effective the preallocation was; 3154 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3155 You can also run with the option -info and look for messages with the string 3156 malloc in them to see if additional memory allocation was needed. 3157 3158 Level: intermediate 3159 3160 .keywords: matrix, block, aij, compressed row, sparse, parallel 3161 3162 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3163 @*/ 3164 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3165 { 3166 PetscErrorCode ierr; 3167 3168 PetscFunctionBegin; 3169 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3170 PetscValidType(B,1); 3171 PetscValidLogicalCollectiveInt(B,bs,2); 3172 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3173 PetscFunctionReturn(0); 3174 } 3175 3176 #undef __FUNCT__ 3177 #define __FUNCT__ "MatCreateBAIJ" 3178 /*@C 3179 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3180 (block compressed row). For good matrix assembly performance 3181 the user should preallocate the matrix storage by setting the parameters 3182 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3183 performance can be increased by more than a factor of 50. 3184 3185 Collective on MPI_Comm 3186 3187 Input Parameters: 3188 + comm - MPI communicator 3189 . bs - size of blockk 3190 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3191 This value should be the same as the local size used in creating the 3192 y vector for the matrix-vector product y = Ax. 3193 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3194 This value should be the same as the local size used in creating the 3195 x vector for the matrix-vector product y = Ax. 3196 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3197 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3198 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3199 submatrix (same for all local rows) 3200 . d_nnz - array containing the number of nonzero blocks in the various block rows 3201 of the in diagonal portion of the local (possibly different for each block 3202 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3203 and set it even if it is zero. 3204 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3205 submatrix (same for all local rows). 3206 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3207 off-diagonal portion of the local submatrix (possibly different for 3208 each block row) or NULL. 3209 3210 Output Parameter: 3211 . A - the matrix 3212 3213 Options Database Keys: 3214 + -mat_block_size - size of the blocks to use 3215 - -mat_use_hash_table <fact> 3216 3217 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3218 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3219 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3220 3221 Notes: 3222 If the *_nnz parameter is given then the *_nz parameter is ignored 3223 3224 A nonzero block is any block that as 1 or more nonzeros in it 3225 3226 The user MUST specify either the local or global matrix dimensions 3227 (possibly both). 3228 3229 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3230 than it must be used on all processors that share the object for that argument. 3231 3232 Storage Information: 3233 For a square global matrix we define each processor's diagonal portion 3234 to be its local rows and the corresponding columns (a square submatrix); 3235 each processor's off-diagonal portion encompasses the remainder of the 3236 local matrix (a rectangular submatrix). 3237 3238 The user can specify preallocated storage for the diagonal part of 3239 the local submatrix with either d_nz or d_nnz (not both). Set 3240 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3241 memory allocation. Likewise, specify preallocated storage for the 3242 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3243 3244 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3245 the figure below we depict these three local rows and all columns (0-11). 3246 3247 .vb 3248 0 1 2 3 4 5 6 7 8 9 10 11 3249 -------------------------- 3250 row 3 |o o o d d d o o o o o o 3251 row 4 |o o o d d d o o o o o o 3252 row 5 |o o o d d d o o o o o o 3253 -------------------------- 3254 .ve 3255 3256 Thus, any entries in the d locations are stored in the d (diagonal) 3257 submatrix, and any entries in the o locations are stored in the 3258 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3259 stored simply in the MATSEQBAIJ format for compressed row storage. 3260 3261 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3262 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3263 In general, for PDE problems in which most nonzeros are near the diagonal, 3264 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3265 or you will get TERRIBLE performance; see the users' manual chapter on 3266 matrices. 3267 3268 Level: intermediate 3269 3270 .keywords: matrix, block, aij, compressed row, sparse, parallel 3271 3272 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3273 @*/ 3274 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3275 { 3276 PetscErrorCode ierr; 3277 PetscMPIInt size; 3278 3279 PetscFunctionBegin; 3280 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3281 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3282 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3283 if (size > 1) { 3284 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3285 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3286 } else { 3287 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3288 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3289 } 3290 PetscFunctionReturn(0); 3291 } 3292 3293 #undef __FUNCT__ 3294 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3295 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3296 { 3297 Mat mat; 3298 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3299 PetscErrorCode ierr; 3300 PetscInt len=0; 3301 3302 PetscFunctionBegin; 3303 *newmat = 0; 3304 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3305 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3306 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3307 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3308 3309 mat->factortype = matin->factortype; 3310 mat->preallocated = PETSC_TRUE; 3311 mat->assembled = PETSC_TRUE; 3312 mat->insertmode = NOT_SET_VALUES; 3313 3314 a = (Mat_MPIBAIJ*)mat->data; 3315 mat->rmap->bs = matin->rmap->bs; 3316 a->bs2 = oldmat->bs2; 3317 a->mbs = oldmat->mbs; 3318 a->nbs = oldmat->nbs; 3319 a->Mbs = oldmat->Mbs; 3320 a->Nbs = oldmat->Nbs; 3321 3322 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3323 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3324 3325 a->size = oldmat->size; 3326 a->rank = oldmat->rank; 3327 a->donotstash = oldmat->donotstash; 3328 a->roworiented = oldmat->roworiented; 3329 a->rowindices = 0; 3330 a->rowvalues = 0; 3331 a->getrowactive = PETSC_FALSE; 3332 a->barray = 0; 3333 a->rstartbs = oldmat->rstartbs; 3334 a->rendbs = oldmat->rendbs; 3335 a->cstartbs = oldmat->cstartbs; 3336 a->cendbs = oldmat->cendbs; 3337 3338 /* hash table stuff */ 3339 a->ht = 0; 3340 a->hd = 0; 3341 a->ht_size = 0; 3342 a->ht_flag = oldmat->ht_flag; 3343 a->ht_fact = oldmat->ht_fact; 3344 a->ht_total_ct = 0; 3345 a->ht_insert_ct = 0; 3346 3347 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3348 if (oldmat->colmap) { 3349 #if defined(PETSC_USE_CTABLE) 3350 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3351 #else 3352 ierr = PetscMalloc1((a->Nbs),&a->colmap);CHKERRQ(ierr); 3353 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3354 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3355 #endif 3356 } else a->colmap = 0; 3357 3358 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3359 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 3360 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3361 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3362 } else a->garray = 0; 3363 3364 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3365 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3366 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3367 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3368 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3369 3370 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3371 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3372 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3373 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3374 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3375 *newmat = mat; 3376 PetscFunctionReturn(0); 3377 } 3378 3379 #undef __FUNCT__ 3380 #define __FUNCT__ "MatLoad_MPIBAIJ" 3381 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3382 { 3383 PetscErrorCode ierr; 3384 int fd; 3385 PetscInt i,nz,j,rstart,rend; 3386 PetscScalar *vals,*buf; 3387 MPI_Comm comm; 3388 MPI_Status status; 3389 PetscMPIInt rank,size,maxnz; 3390 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3391 PetscInt *locrowlens = NULL,*procsnz = NULL,*browners = NULL; 3392 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3393 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3394 PetscInt *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount; 3395 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3396 3397 PetscFunctionBegin; 3398 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3399 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3400 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3401 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3402 3403 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3404 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3405 if (!rank) { 3406 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3407 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3408 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3409 } 3410 3411 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3412 3413 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3414 M = header[1]; N = header[2]; 3415 3416 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3417 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3418 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3419 3420 /* If global sizes are set, check if they are consistent with that given in the file */ 3421 if (sizesset) { 3422 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3423 } 3424 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3425 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3426 3427 if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices"); 3428 3429 /* 3430 This code adds extra rows to make sure the number of rows is 3431 divisible by the blocksize 3432 */ 3433 Mbs = M/bs; 3434 extra_rows = bs - M + bs*Mbs; 3435 if (extra_rows == bs) extra_rows = 0; 3436 else Mbs++; 3437 if (extra_rows && !rank) { 3438 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3439 } 3440 3441 /* determine ownership of all rows */ 3442 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3443 mbs = Mbs/size + ((Mbs % size) > rank); 3444 m = mbs*bs; 3445 } else { /* User set */ 3446 m = newmat->rmap->n; 3447 mbs = m/bs; 3448 } 3449 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 3450 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3451 3452 /* process 0 needs enough room for process with most rows */ 3453 if (!rank) { 3454 mmax = rowners[1]; 3455 for (i=2; i<=size; i++) { 3456 mmax = PetscMax(mmax,rowners[i]); 3457 } 3458 mmax*=bs; 3459 } else mmax = -1; /* unused, but compiler warns anyway */ 3460 3461 rowners[0] = 0; 3462 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3463 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3464 rstart = rowners[rank]; 3465 rend = rowners[rank+1]; 3466 3467 /* distribute row lengths to all processors */ 3468 ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr); 3469 if (!rank) { 3470 mend = m; 3471 if (size == 1) mend = mend - extra_rows; 3472 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3473 for (j=mend; j<m; j++) locrowlens[j] = 1; 3474 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3475 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3476 for (j=0; j<m; j++) { 3477 procsnz[0] += locrowlens[j]; 3478 } 3479 for (i=1; i<size; i++) { 3480 mend = browners[i+1] - browners[i]; 3481 if (i == size-1) mend = mend - extra_rows; 3482 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3483 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3484 /* calculate the number of nonzeros on each processor */ 3485 for (j=0; j<browners[i+1]-browners[i]; j++) { 3486 procsnz[i] += rowlengths[j]; 3487 } 3488 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3489 } 3490 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3491 } else { 3492 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3493 } 3494 3495 if (!rank) { 3496 /* determine max buffer needed and allocate it */ 3497 maxnz = procsnz[0]; 3498 for (i=1; i<size; i++) { 3499 maxnz = PetscMax(maxnz,procsnz[i]); 3500 } 3501 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3502 3503 /* read in my part of the matrix column indices */ 3504 nz = procsnz[0]; 3505 ierr = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr); 3506 mycols = ibuf; 3507 if (size == 1) nz -= extra_rows; 3508 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3509 if (size == 1) { 3510 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 3511 } 3512 3513 /* read in every ones (except the last) and ship off */ 3514 for (i=1; i<size-1; i++) { 3515 nz = procsnz[i]; 3516 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3517 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3518 } 3519 /* read in the stuff for the last proc */ 3520 if (size != 1) { 3521 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3522 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3523 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3524 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3525 } 3526 ierr = PetscFree(cols);CHKERRQ(ierr); 3527 } else { 3528 /* determine buffer space needed for message */ 3529 nz = 0; 3530 for (i=0; i<m; i++) { 3531 nz += locrowlens[i]; 3532 } 3533 ierr = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr); 3534 mycols = ibuf; 3535 /* receive message of column indices*/ 3536 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3537 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3538 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3539 } 3540 3541 /* loop over local rows, determining number of off diagonal entries */ 3542 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 3543 ierr = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 3544 rowcount = 0; nzcount = 0; 3545 for (i=0; i<mbs; i++) { 3546 dcount = 0; 3547 odcount = 0; 3548 for (j=0; j<bs; j++) { 3549 kmax = locrowlens[rowcount]; 3550 for (k=0; k<kmax; k++) { 3551 tmp = mycols[nzcount++]/bs; 3552 if (!mask[tmp]) { 3553 mask[tmp] = 1; 3554 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3555 else masked1[dcount++] = tmp; 3556 } 3557 } 3558 rowcount++; 3559 } 3560 3561 dlens[i] = dcount; 3562 odlens[i] = odcount; 3563 3564 /* zero out the mask elements we set */ 3565 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3566 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3567 } 3568 3569 3570 if (!sizesset) { 3571 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3572 } 3573 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3574 3575 if (!rank) { 3576 ierr = PetscMalloc1((maxnz+1),&buf);CHKERRQ(ierr); 3577 /* read in my part of the matrix numerical values */ 3578 nz = procsnz[0]; 3579 vals = buf; 3580 mycols = ibuf; 3581 if (size == 1) nz -= extra_rows; 3582 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3583 if (size == 1) { 3584 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 3585 } 3586 3587 /* insert into matrix */ 3588 jj = rstart*bs; 3589 for (i=0; i<m; i++) { 3590 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3591 mycols += locrowlens[i]; 3592 vals += locrowlens[i]; 3593 jj++; 3594 } 3595 /* read in other processors (except the last one) and ship out */ 3596 for (i=1; i<size-1; i++) { 3597 nz = procsnz[i]; 3598 vals = buf; 3599 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3600 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3601 } 3602 /* the last proc */ 3603 if (size != 1) { 3604 nz = procsnz[i] - extra_rows; 3605 vals = buf; 3606 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3607 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3608 ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3609 } 3610 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3611 } else { 3612 /* receive numeric values */ 3613 ierr = PetscMalloc1((nz+1),&buf);CHKERRQ(ierr); 3614 3615 /* receive message of values*/ 3616 vals = buf; 3617 mycols = ibuf; 3618 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3619 3620 /* insert into matrix */ 3621 jj = rstart*bs; 3622 for (i=0; i<m; i++) { 3623 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3624 mycols += locrowlens[i]; 3625 vals += locrowlens[i]; 3626 jj++; 3627 } 3628 } 3629 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3630 ierr = PetscFree(buf);CHKERRQ(ierr); 3631 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3632 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3633 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3634 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3635 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3636 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3637 PetscFunctionReturn(0); 3638 } 3639 3640 #undef __FUNCT__ 3641 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3642 /*@ 3643 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3644 3645 Input Parameters: 3646 . mat - the matrix 3647 . fact - factor 3648 3649 Not Collective, each process can use a different factor 3650 3651 Level: advanced 3652 3653 Notes: 3654 This can also be set by the command line option: -mat_use_hash_table <fact> 3655 3656 .keywords: matrix, hashtable, factor, HT 3657 3658 .seealso: MatSetOption() 3659 @*/ 3660 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3661 { 3662 PetscErrorCode ierr; 3663 3664 PetscFunctionBegin; 3665 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3666 PetscFunctionReturn(0); 3667 } 3668 3669 #undef __FUNCT__ 3670 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3671 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3672 { 3673 Mat_MPIBAIJ *baij; 3674 3675 PetscFunctionBegin; 3676 baij = (Mat_MPIBAIJ*)mat->data; 3677 baij->ht_fact = fact; 3678 PetscFunctionReturn(0); 3679 } 3680 3681 #undef __FUNCT__ 3682 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3683 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3684 { 3685 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3686 3687 PetscFunctionBegin; 3688 *Ad = a->A; 3689 *Ao = a->B; 3690 *colmap = a->garray; 3691 PetscFunctionReturn(0); 3692 } 3693 3694 /* 3695 Special version for direct calls from Fortran (to eliminate two function call overheads 3696 */ 3697 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3698 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3699 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3700 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3701 #endif 3702 3703 #undef __FUNCT__ 3704 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3705 /*@C 3706 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3707 3708 Collective on Mat 3709 3710 Input Parameters: 3711 + mat - the matrix 3712 . min - number of input rows 3713 . im - input rows 3714 . nin - number of input columns 3715 . in - input columns 3716 . v - numerical values input 3717 - addvin - INSERT_VALUES or ADD_VALUES 3718 3719 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3720 3721 Level: advanced 3722 3723 .seealso: MatSetValuesBlocked() 3724 @*/ 3725 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3726 { 3727 /* convert input arguments to C version */ 3728 Mat mat = *matin; 3729 PetscInt m = *min, n = *nin; 3730 InsertMode addv = *addvin; 3731 3732 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3733 const MatScalar *value; 3734 MatScalar *barray = baij->barray; 3735 PetscBool roworiented = baij->roworiented; 3736 PetscErrorCode ierr; 3737 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3738 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3739 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3740 3741 PetscFunctionBegin; 3742 /* tasks normally handled by MatSetValuesBlocked() */ 3743 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3744 #if defined(PETSC_USE_DEBUG) 3745 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3746 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3747 #endif 3748 if (mat->assembled) { 3749 mat->was_assembled = PETSC_TRUE; 3750 mat->assembled = PETSC_FALSE; 3751 } 3752 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3753 3754 3755 if (!barray) { 3756 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 3757 baij->barray = barray; 3758 } 3759 3760 if (roworiented) stepval = (n-1)*bs; 3761 else stepval = (m-1)*bs; 3762 3763 for (i=0; i<m; i++) { 3764 if (im[i] < 0) continue; 3765 #if defined(PETSC_USE_DEBUG) 3766 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3767 #endif 3768 if (im[i] >= rstart && im[i] < rend) { 3769 row = im[i] - rstart; 3770 for (j=0; j<n; j++) { 3771 /* If NumCol = 1 then a copy is not required */ 3772 if ((roworiented) && (n == 1)) { 3773 barray = (MatScalar*)v + i*bs2; 3774 } else if ((!roworiented) && (m == 1)) { 3775 barray = (MatScalar*)v + j*bs2; 3776 } else { /* Here a copy is required */ 3777 if (roworiented) { 3778 value = v + i*(stepval+bs)*bs + j*bs; 3779 } else { 3780 value = v + j*(stepval+bs)*bs + i*bs; 3781 } 3782 for (ii=0; ii<bs; ii++,value+=stepval) { 3783 for (jj=0; jj<bs; jj++) { 3784 *barray++ = *value++; 3785 } 3786 } 3787 barray -=bs2; 3788 } 3789 3790 if (in[j] >= cstart && in[j] < cend) { 3791 col = in[j] - cstart; 3792 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3793 } else if (in[j] < 0) continue; 3794 #if defined(PETSC_USE_DEBUG) 3795 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3796 #endif 3797 else { 3798 if (mat->was_assembled) { 3799 if (!baij->colmap) { 3800 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3801 } 3802 3803 #if defined(PETSC_USE_DEBUG) 3804 #if defined(PETSC_USE_CTABLE) 3805 { PetscInt data; 3806 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3807 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3808 } 3809 #else 3810 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3811 #endif 3812 #endif 3813 #if defined(PETSC_USE_CTABLE) 3814 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3815 col = (col - 1)/bs; 3816 #else 3817 col = (baij->colmap[in[j]] - 1)/bs; 3818 #endif 3819 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3820 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3821 col = in[j]; 3822 } 3823 } else col = in[j]; 3824 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3825 } 3826 } 3827 } else { 3828 if (!baij->donotstash) { 3829 if (roworiented) { 3830 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3831 } else { 3832 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3833 } 3834 } 3835 } 3836 } 3837 3838 /* task normally handled by MatSetValuesBlocked() */ 3839 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3840 PetscFunctionReturn(0); 3841 } 3842 3843 #undef __FUNCT__ 3844 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 3845 /*@ 3846 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 3847 CSR format the local rows. 3848 3849 Collective on MPI_Comm 3850 3851 Input Parameters: 3852 + comm - MPI communicator 3853 . bs - the block size, only a block size of 1 is supported 3854 . m - number of local rows (Cannot be PETSC_DECIDE) 3855 . n - This value should be the same as the local size used in creating the 3856 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3857 calculated if N is given) For square matrices n is almost always m. 3858 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3859 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3860 . i - row indices 3861 . j - column indices 3862 - a - matrix values 3863 3864 Output Parameter: 3865 . mat - the matrix 3866 3867 Level: intermediate 3868 3869 Notes: 3870 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3871 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3872 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3873 3874 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3875 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3876 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3877 with column-major ordering within blocks. 3878 3879 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3880 3881 .keywords: matrix, aij, compressed row, sparse, parallel 3882 3883 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3884 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3885 @*/ 3886 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3887 { 3888 PetscErrorCode ierr; 3889 3890 PetscFunctionBegin; 3891 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3892 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3893 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3894 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3895 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3896 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3897 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3898 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3899 PetscFunctionReturn(0); 3900 } 3901