1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 #include <petscsf.h> 5 6 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 7 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat); 8 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 9 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 10 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 11 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 14 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 17 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 18 { 19 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 20 PetscErrorCode ierr; 21 PetscInt i,*idxb = 0; 22 PetscScalar *va,*vb; 23 Vec vtmp; 24 25 PetscFunctionBegin; 26 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 27 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 28 if (idx) { 29 for (i=0; i<A->rmap->n; i++) { 30 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 31 } 32 } 33 34 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 35 if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);} 36 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 37 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 38 39 for (i=0; i<A->rmap->n; i++) { 40 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 41 va[i] = vb[i]; 42 if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs); 43 } 44 } 45 46 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 47 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 48 ierr = PetscFree(idxb);CHKERRQ(ierr); 49 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 50 PetscFunctionReturn(0); 51 } 52 53 #undef __FUNCT__ 54 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 55 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 56 { 57 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 58 PetscErrorCode ierr; 59 60 PetscFunctionBegin; 61 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 62 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 68 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 69 { 70 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 75 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 76 PetscFunctionReturn(0); 77 } 78 79 /* 80 Local utility routine that creates a mapping from the global column 81 number to the local number in the off-diagonal part of the local 82 storage of the matrix. This is done in a non scalable way since the 83 length of colmap equals the global matrix length. 84 */ 85 #undef __FUNCT__ 86 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private" 87 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 88 { 89 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 90 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 91 PetscErrorCode ierr; 92 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 93 94 PetscFunctionBegin; 95 #if defined(PETSC_USE_CTABLE) 96 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 97 for (i=0; i<nbs; i++) { 98 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 99 } 100 #else 101 ierr = PetscMalloc1((baij->Nbs+1),&baij->colmap);CHKERRQ(ierr); 102 ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 103 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 105 #endif 106 PetscFunctionReturn(0); 107 } 108 109 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 110 { \ 111 \ 112 brow = row/bs; \ 113 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 114 rmax = aimax[brow]; nrow = ailen[brow]; \ 115 bcol = col/bs; \ 116 ridx = row % bs; cidx = col % bs; \ 117 low = 0; high = nrow; \ 118 while (high-low > 3) { \ 119 t = (low+high)/2; \ 120 if (rp[t] > bcol) high = t; \ 121 else low = t; \ 122 } \ 123 for (_i=low; _i<high; _i++) { \ 124 if (rp[_i] > bcol) break; \ 125 if (rp[_i] == bcol) { \ 126 bap = ap + bs2*_i + bs*cidx + ridx; \ 127 if (addv == ADD_VALUES) *bap += value; \ 128 else *bap = value; \ 129 goto a_noinsert; \ 130 } \ 131 } \ 132 if (a->nonew == 1) goto a_noinsert; \ 133 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 134 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 135 N = nrow++ - 1; \ 136 /* shift up all the later entries in this row */ \ 137 for (ii=N; ii>=_i; ii--) { \ 138 rp[ii+1] = rp[ii]; \ 139 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 140 } \ 141 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 142 rp[_i] = bcol; \ 143 ap[bs2*_i + bs*cidx + ridx] = value; \ 144 a_noinsert:; \ 145 ailen[brow] = nrow; \ 146 } 147 148 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 149 { \ 150 brow = row/bs; \ 151 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 152 rmax = bimax[brow]; nrow = bilen[brow]; \ 153 bcol = col/bs; \ 154 ridx = row % bs; cidx = col % bs; \ 155 low = 0; high = nrow; \ 156 while (high-low > 3) { \ 157 t = (low+high)/2; \ 158 if (rp[t] > bcol) high = t; \ 159 else low = t; \ 160 } \ 161 for (_i=low; _i<high; _i++) { \ 162 if (rp[_i] > bcol) break; \ 163 if (rp[_i] == bcol) { \ 164 bap = ap + bs2*_i + bs*cidx + ridx; \ 165 if (addv == ADD_VALUES) *bap += value; \ 166 else *bap = value; \ 167 goto b_noinsert; \ 168 } \ 169 } \ 170 if (b->nonew == 1) goto b_noinsert; \ 171 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 172 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 173 N = nrow++ - 1; \ 174 /* shift up all the later entries in this row */ \ 175 for (ii=N; ii>=_i; ii--) { \ 176 rp[ii+1] = rp[ii]; \ 177 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 178 } \ 179 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 180 rp[_i] = bcol; \ 181 ap[bs2*_i + bs*cidx + ridx] = value; \ 182 b_noinsert:; \ 183 bilen[brow] = nrow; \ 184 } 185 186 #undef __FUNCT__ 187 #define __FUNCT__ "MatSetValues_MPIBAIJ" 188 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 189 { 190 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 191 MatScalar value; 192 PetscBool roworiented = baij->roworiented; 193 PetscErrorCode ierr; 194 PetscInt i,j,row,col; 195 PetscInt rstart_orig=mat->rmap->rstart; 196 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 197 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 198 199 /* Some Variables required in the macro */ 200 Mat A = baij->A; 201 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 202 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 203 MatScalar *aa =a->a; 204 205 Mat B = baij->B; 206 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 207 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 208 MatScalar *ba =b->a; 209 210 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 211 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 212 MatScalar *ap,*bap; 213 214 PetscFunctionBegin; 215 if (v) PetscValidScalarPointer(v,6); 216 for (i=0; i<m; i++) { 217 if (im[i] < 0) continue; 218 #if defined(PETSC_USE_DEBUG) 219 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 220 #endif 221 if (im[i] >= rstart_orig && im[i] < rend_orig) { 222 row = im[i] - rstart_orig; 223 for (j=0; j<n; j++) { 224 if (in[j] >= cstart_orig && in[j] < cend_orig) { 225 col = in[j] - cstart_orig; 226 if (roworiented) value = v[i*n+j]; 227 else value = v[i+j*m]; 228 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 229 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 230 } else if (in[j] < 0) continue; 231 #if defined(PETSC_USE_DEBUG) 232 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 233 #endif 234 else { 235 if (mat->was_assembled) { 236 if (!baij->colmap) { 237 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 238 } 239 #if defined(PETSC_USE_CTABLE) 240 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 241 col = col - 1; 242 #else 243 col = baij->colmap[in[j]/bs] - 1; 244 #endif 245 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 246 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 247 col = in[j]; 248 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 249 B = baij->B; 250 b = (Mat_SeqBAIJ*)(B)->data; 251 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 252 ba =b->a; 253 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 254 else col += in[j]%bs; 255 } else col = in[j]; 256 if (roworiented) value = v[i*n+j]; 257 else value = v[i+j*m]; 258 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 259 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 260 } 261 } 262 } else { 263 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 264 if (!baij->donotstash) { 265 mat->assembled = PETSC_FALSE; 266 if (roworiented) { 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 268 } else { 269 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 270 } 271 } 272 } 273 } 274 PetscFunctionReturn(0); 275 } 276 277 #undef __FUNCT__ 278 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 279 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 280 { 281 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 282 const PetscScalar *value; 283 MatScalar *barray = baij->barray; 284 PetscBool roworiented = baij->roworiented; 285 PetscErrorCode ierr; 286 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 287 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 288 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 289 290 PetscFunctionBegin; 291 if (!barray) { 292 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 293 baij->barray = barray; 294 } 295 296 if (roworiented) stepval = (n-1)*bs; 297 else stepval = (m-1)*bs; 298 299 for (i=0; i<m; i++) { 300 if (im[i] < 0) continue; 301 #if defined(PETSC_USE_DEBUG) 302 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 303 #endif 304 if (im[i] >= rstart && im[i] < rend) { 305 row = im[i] - rstart; 306 for (j=0; j<n; j++) { 307 /* If NumCol = 1 then a copy is not required */ 308 if ((roworiented) && (n == 1)) { 309 barray = (MatScalar*)v + i*bs2; 310 } else if ((!roworiented) && (m == 1)) { 311 barray = (MatScalar*)v + j*bs2; 312 } else { /* Here a copy is required */ 313 if (roworiented) { 314 value = v + (i*(stepval+bs) + j)*bs; 315 } else { 316 value = v + (j*(stepval+bs) + i)*bs; 317 } 318 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 319 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 320 barray += bs; 321 } 322 barray -= bs2; 323 } 324 325 if (in[j] >= cstart && in[j] < cend) { 326 col = in[j] - cstart; 327 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 328 } else if (in[j] < 0) continue; 329 #if defined(PETSC_USE_DEBUG) 330 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 331 #endif 332 else { 333 if (mat->was_assembled) { 334 if (!baij->colmap) { 335 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 336 } 337 338 #if defined(PETSC_USE_DEBUG) 339 #if defined(PETSC_USE_CTABLE) 340 { PetscInt data; 341 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 342 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 343 } 344 #else 345 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 346 #endif 347 #endif 348 #if defined(PETSC_USE_CTABLE) 349 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 350 col = (col - 1)/bs; 351 #else 352 col = (baij->colmap[in[j]] - 1)/bs; 353 #endif 354 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 355 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 356 col = in[j]; 357 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]); 358 } else col = in[j]; 359 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 360 } 361 } 362 } else { 363 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 364 if (!baij->donotstash) { 365 if (roworiented) { 366 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 367 } else { 368 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 369 } 370 } 371 } 372 } 373 PetscFunctionReturn(0); 374 } 375 376 #define HASH_KEY 0.6180339887 377 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 378 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 379 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 380 #undef __FUNCT__ 381 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 382 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 383 { 384 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 385 PetscBool roworiented = baij->roworiented; 386 PetscErrorCode ierr; 387 PetscInt i,j,row,col; 388 PetscInt rstart_orig=mat->rmap->rstart; 389 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 390 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 391 PetscReal tmp; 392 MatScalar **HD = baij->hd,value; 393 #if defined(PETSC_USE_DEBUG) 394 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 395 #endif 396 397 PetscFunctionBegin; 398 if (v) PetscValidScalarPointer(v,6); 399 for (i=0; i<m; i++) { 400 #if defined(PETSC_USE_DEBUG) 401 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 402 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 403 #endif 404 row = im[i]; 405 if (row >= rstart_orig && row < rend_orig) { 406 for (j=0; j<n; j++) { 407 col = in[j]; 408 if (roworiented) value = v[i*n+j]; 409 else value = v[i+j*m]; 410 /* Look up PetscInto the Hash Table */ 411 key = (row/bs)*Nbs+(col/bs)+1; 412 h1 = HASH(size,key,tmp); 413 414 415 idx = h1; 416 #if defined(PETSC_USE_DEBUG) 417 insert_ct++; 418 total_ct++; 419 if (HT[idx] != key) { 420 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 421 if (idx == size) { 422 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 423 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 424 } 425 } 426 #else 427 if (HT[idx] != key) { 428 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 429 if (idx == size) { 430 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 431 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 432 } 433 } 434 #endif 435 /* A HASH table entry is found, so insert the values at the correct address */ 436 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 437 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 438 } 439 } else if (!baij->donotstash) { 440 if (roworiented) { 441 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 442 } else { 443 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 444 } 445 } 446 } 447 #if defined(PETSC_USE_DEBUG) 448 baij->ht_total_ct = total_ct; 449 baij->ht_insert_ct = insert_ct; 450 #endif 451 PetscFunctionReturn(0); 452 } 453 454 #undef __FUNCT__ 455 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 456 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 457 { 458 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 459 PetscBool roworiented = baij->roworiented; 460 PetscErrorCode ierr; 461 PetscInt i,j,ii,jj,row,col; 462 PetscInt rstart=baij->rstartbs; 463 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 464 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 465 PetscReal tmp; 466 MatScalar **HD = baij->hd,*baij_a; 467 const PetscScalar *v_t,*value; 468 #if defined(PETSC_USE_DEBUG) 469 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 470 #endif 471 472 PetscFunctionBegin; 473 if (roworiented) stepval = (n-1)*bs; 474 else stepval = (m-1)*bs; 475 476 for (i=0; i<m; i++) { 477 #if defined(PETSC_USE_DEBUG) 478 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 479 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 480 #endif 481 row = im[i]; 482 v_t = v + i*nbs2; 483 if (row >= rstart && row < rend) { 484 for (j=0; j<n; j++) { 485 col = in[j]; 486 487 /* Look up into the Hash Table */ 488 key = row*Nbs+col+1; 489 h1 = HASH(size,key,tmp); 490 491 idx = h1; 492 #if defined(PETSC_USE_DEBUG) 493 total_ct++; 494 insert_ct++; 495 if (HT[idx] != key) { 496 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 497 if (idx == size) { 498 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 499 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 500 } 501 } 502 #else 503 if (HT[idx] != key) { 504 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 505 if (idx == size) { 506 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 507 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 508 } 509 } 510 #endif 511 baij_a = HD[idx]; 512 if (roworiented) { 513 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 514 /* value = v + (i*(stepval+bs)+j)*bs; */ 515 value = v_t; 516 v_t += bs; 517 if (addv == ADD_VALUES) { 518 for (ii=0; ii<bs; ii++,value+=stepval) { 519 for (jj=ii; jj<bs2; jj+=bs) { 520 baij_a[jj] += *value++; 521 } 522 } 523 } else { 524 for (ii=0; ii<bs; ii++,value+=stepval) { 525 for (jj=ii; jj<bs2; jj+=bs) { 526 baij_a[jj] = *value++; 527 } 528 } 529 } 530 } else { 531 value = v + j*(stepval+bs)*bs + i*bs; 532 if (addv == ADD_VALUES) { 533 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 534 for (jj=0; jj<bs; jj++) { 535 baij_a[jj] += *value++; 536 } 537 } 538 } else { 539 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 540 for (jj=0; jj<bs; jj++) { 541 baij_a[jj] = *value++; 542 } 543 } 544 } 545 } 546 } 547 } else { 548 if (!baij->donotstash) { 549 if (roworiented) { 550 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 551 } else { 552 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 553 } 554 } 555 } 556 } 557 #if defined(PETSC_USE_DEBUG) 558 baij->ht_total_ct = total_ct; 559 baij->ht_insert_ct = insert_ct; 560 #endif 561 PetscFunctionReturn(0); 562 } 563 564 #undef __FUNCT__ 565 #define __FUNCT__ "MatGetValues_MPIBAIJ" 566 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 567 { 568 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 569 PetscErrorCode ierr; 570 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 571 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 572 573 PetscFunctionBegin; 574 for (i=0; i<m; i++) { 575 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 576 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 577 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 578 row = idxm[i] - bsrstart; 579 for (j=0; j<n; j++) { 580 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 581 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 582 if (idxn[j] >= bscstart && idxn[j] < bscend) { 583 col = idxn[j] - bscstart; 584 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 585 } else { 586 if (!baij->colmap) { 587 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 588 } 589 #if defined(PETSC_USE_CTABLE) 590 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 591 data--; 592 #else 593 data = baij->colmap[idxn[j]/bs]-1; 594 #endif 595 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 596 else { 597 col = data + idxn[j]%bs; 598 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 599 } 600 } 601 } 602 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 603 } 604 PetscFunctionReturn(0); 605 } 606 607 #undef __FUNCT__ 608 #define __FUNCT__ "MatNorm_MPIBAIJ" 609 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 610 { 611 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 612 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 613 PetscErrorCode ierr; 614 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 615 PetscReal sum = 0.0; 616 MatScalar *v; 617 618 PetscFunctionBegin; 619 if (baij->size == 1) { 620 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 621 } else { 622 if (type == NORM_FROBENIUS) { 623 v = amat->a; 624 nz = amat->nz*bs2; 625 for (i=0; i<nz; i++) { 626 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 627 } 628 v = bmat->a; 629 nz = bmat->nz*bs2; 630 for (i=0; i<nz; i++) { 631 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 632 } 633 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 634 *nrm = PetscSqrtReal(*nrm); 635 } else if (type == NORM_1) { /* max column sum */ 636 PetscReal *tmp,*tmp2; 637 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 638 ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr); 639 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 640 v = amat->a; jj = amat->j; 641 for (i=0; i<amat->nz; i++) { 642 for (j=0; j<bs; j++) { 643 col = bs*(cstart + *jj) + j; /* column index */ 644 for (row=0; row<bs; row++) { 645 tmp[col] += PetscAbsScalar(*v); v++; 646 } 647 } 648 jj++; 649 } 650 v = bmat->a; jj = bmat->j; 651 for (i=0; i<bmat->nz; i++) { 652 for (j=0; j<bs; j++) { 653 col = bs*garray[*jj] + j; 654 for (row=0; row<bs; row++) { 655 tmp[col] += PetscAbsScalar(*v); v++; 656 } 657 } 658 jj++; 659 } 660 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 661 *nrm = 0.0; 662 for (j=0; j<mat->cmap->N; j++) { 663 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 664 } 665 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 666 } else if (type == NORM_INFINITY) { /* max row sum */ 667 PetscReal *sums; 668 ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr); 669 sum = 0.0; 670 for (j=0; j<amat->mbs; j++) { 671 for (row=0; row<bs; row++) sums[row] = 0.0; 672 v = amat->a + bs2*amat->i[j]; 673 nz = amat->i[j+1]-amat->i[j]; 674 for (i=0; i<nz; i++) { 675 for (col=0; col<bs; col++) { 676 for (row=0; row<bs; row++) { 677 sums[row] += PetscAbsScalar(*v); v++; 678 } 679 } 680 } 681 v = bmat->a + bs2*bmat->i[j]; 682 nz = bmat->i[j+1]-bmat->i[j]; 683 for (i=0; i<nz; i++) { 684 for (col=0; col<bs; col++) { 685 for (row=0; row<bs; row++) { 686 sums[row] += PetscAbsScalar(*v); v++; 687 } 688 } 689 } 690 for (row=0; row<bs; row++) { 691 if (sums[row] > sum) sum = sums[row]; 692 } 693 } 694 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 695 ierr = PetscFree(sums);CHKERRQ(ierr); 696 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 697 } 698 PetscFunctionReturn(0); 699 } 700 701 /* 702 Creates the hash table, and sets the table 703 This table is created only once. 704 If new entried need to be added to the matrix 705 then the hash table has to be destroyed and 706 recreated. 707 */ 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 710 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 711 { 712 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 713 Mat A = baij->A,B=baij->B; 714 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 715 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 716 PetscErrorCode ierr; 717 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 718 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 719 PetscInt *HT,key; 720 MatScalar **HD; 721 PetscReal tmp; 722 #if defined(PETSC_USE_INFO) 723 PetscInt ct=0,max=0; 724 #endif 725 726 PetscFunctionBegin; 727 if (baij->ht) PetscFunctionReturn(0); 728 729 baij->ht_size = (PetscInt)(factor*nz); 730 ht_size = baij->ht_size; 731 732 /* Allocate Memory for Hash Table */ 733 ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr); 734 HD = baij->hd; 735 HT = baij->ht; 736 737 /* Loop Over A */ 738 for (i=0; i<a->mbs; i++) { 739 for (j=ai[i]; j<ai[i+1]; j++) { 740 row = i+rstart; 741 col = aj[j]+cstart; 742 743 key = row*Nbs + col + 1; 744 h1 = HASH(ht_size,key,tmp); 745 for (k=0; k<ht_size; k++) { 746 if (!HT[(h1+k)%ht_size]) { 747 HT[(h1+k)%ht_size] = key; 748 HD[(h1+k)%ht_size] = a->a + j*bs2; 749 break; 750 #if defined(PETSC_USE_INFO) 751 } else { 752 ct++; 753 #endif 754 } 755 } 756 #if defined(PETSC_USE_INFO) 757 if (k> max) max = k; 758 #endif 759 } 760 } 761 /* Loop Over B */ 762 for (i=0; i<b->mbs; i++) { 763 for (j=bi[i]; j<bi[i+1]; j++) { 764 row = i+rstart; 765 col = garray[bj[j]]; 766 key = row*Nbs + col + 1; 767 h1 = HASH(ht_size,key,tmp); 768 for (k=0; k<ht_size; k++) { 769 if (!HT[(h1+k)%ht_size]) { 770 HT[(h1+k)%ht_size] = key; 771 HD[(h1+k)%ht_size] = b->a + j*bs2; 772 break; 773 #if defined(PETSC_USE_INFO) 774 } else { 775 ct++; 776 #endif 777 } 778 } 779 #if defined(PETSC_USE_INFO) 780 if (k> max) max = k; 781 #endif 782 } 783 } 784 785 /* Print Summary */ 786 #if defined(PETSC_USE_INFO) 787 for (i=0,j=0; i<ht_size; i++) { 788 if (HT[i]) j++; 789 } 790 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 791 #endif 792 PetscFunctionReturn(0); 793 } 794 795 #undef __FUNCT__ 796 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 797 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 798 { 799 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 800 PetscErrorCode ierr; 801 PetscInt nstash,reallocs; 802 InsertMode addv; 803 804 PetscFunctionBegin; 805 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 806 807 /* make sure all processors are either in INSERTMODE or ADDMODE */ 808 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 809 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 810 mat->insertmode = addv; /* in case this processor had no cache */ 811 812 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 813 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 814 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 815 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 816 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 817 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 818 PetscFunctionReturn(0); 819 } 820 821 #undef __FUNCT__ 822 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 823 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 824 { 825 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 826 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 827 PetscErrorCode ierr; 828 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 829 PetscInt *row,*col; 830 PetscBool r1,r2,r3,other_disassembled; 831 MatScalar *val; 832 InsertMode addv = mat->insertmode; 833 PetscMPIInt n; 834 835 PetscFunctionBegin; 836 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 837 if (!baij->donotstash && !mat->nooffprocentries) { 838 while (1) { 839 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 840 if (!flg) break; 841 842 for (i=0; i<n;) { 843 /* Now identify the consecutive vals belonging to the same row */ 844 for (j=i,rstart=row[j]; j<n; j++) { 845 if (row[j] != rstart) break; 846 } 847 if (j < n) ncols = j-i; 848 else ncols = n-i; 849 /* Now assemble all these values with a single function call */ 850 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 851 i = j; 852 } 853 } 854 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 855 /* Now process the block-stash. Since the values are stashed column-oriented, 856 set the roworiented flag to column oriented, and after MatSetValues() 857 restore the original flags */ 858 r1 = baij->roworiented; 859 r2 = a->roworiented; 860 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 861 862 baij->roworiented = PETSC_FALSE; 863 a->roworiented = PETSC_FALSE; 864 865 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 866 while (1) { 867 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 868 if (!flg) break; 869 870 for (i=0; i<n;) { 871 /* Now identify the consecutive vals belonging to the same row */ 872 for (j=i,rstart=row[j]; j<n; j++) { 873 if (row[j] != rstart) break; 874 } 875 if (j < n) ncols = j-i; 876 else ncols = n-i; 877 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 878 i = j; 879 } 880 } 881 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 882 883 baij->roworiented = r1; 884 a->roworiented = r2; 885 886 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 887 } 888 889 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 890 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 891 892 /* determine if any processor has disassembled, if so we must 893 also disassemble ourselfs, in order that we may reassemble. */ 894 /* 895 if nonzero structure of submatrix B cannot change then we know that 896 no processor disassembled thus we can skip this stuff 897 */ 898 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 899 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 900 if (mat->was_assembled && !other_disassembled) { 901 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 902 } 903 } 904 905 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 906 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 907 } 908 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 909 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 910 911 #if defined(PETSC_USE_INFO) 912 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 913 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 914 915 baij->ht_total_ct = 0; 916 baij->ht_insert_ct = 0; 917 } 918 #endif 919 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 920 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 921 922 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 923 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 924 } 925 926 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 927 928 baij->rowvalues = 0; 929 930 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 931 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 932 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 933 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 934 } 935 PetscFunctionReturn(0); 936 } 937 938 #include <petscdraw.h> 939 #undef __FUNCT__ 940 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 941 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 942 { 943 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 944 PetscErrorCode ierr; 945 PetscMPIInt size = baij->size,rank = baij->rank; 946 PetscInt bs = mat->rmap->bs; 947 PetscBool iascii,isdraw; 948 PetscViewer sviewer; 949 PetscViewerFormat format; 950 951 PetscFunctionBegin; 952 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 953 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 954 if (iascii) { 955 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 956 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 957 MatInfo info; 958 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 959 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 960 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 961 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 962 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 963 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 964 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 965 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 966 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 967 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 968 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 969 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 970 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 971 PetscFunctionReturn(0); 972 } else if (format == PETSC_VIEWER_ASCII_INFO) { 973 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 974 PetscFunctionReturn(0); 975 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 976 PetscFunctionReturn(0); 977 } 978 } 979 980 if (isdraw) { 981 PetscDraw draw; 982 PetscBool isnull; 983 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 984 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 985 } 986 987 if (size == 1) { 988 ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 989 ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 990 } else { 991 /* assemble the entire matrix onto first processor. */ 992 Mat A; 993 Mat_SeqBAIJ *Aloc; 994 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 995 MatScalar *a; 996 997 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 998 /* Perhaps this should be the type of mat? */ 999 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1000 if (!rank) { 1001 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1002 } else { 1003 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1004 } 1005 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1006 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 1007 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1008 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1009 1010 /* copy over the A part */ 1011 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1012 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1013 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1014 1015 for (i=0; i<mbs; i++) { 1016 rvals[0] = bs*(baij->rstartbs + i); 1017 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1018 for (j=ai[i]; j<ai[i+1]; j++) { 1019 col = (baij->cstartbs+aj[j])*bs; 1020 for (k=0; k<bs; k++) { 1021 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1022 col++; a += bs; 1023 } 1024 } 1025 } 1026 /* copy over the B part */ 1027 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1028 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1029 for (i=0; i<mbs; i++) { 1030 rvals[0] = bs*(baij->rstartbs + i); 1031 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1032 for (j=ai[i]; j<ai[i+1]; j++) { 1033 col = baij->garray[aj[j]]*bs; 1034 for (k=0; k<bs; k++) { 1035 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1036 col++; a += bs; 1037 } 1038 } 1039 } 1040 ierr = PetscFree(rvals);CHKERRQ(ierr); 1041 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1042 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1043 /* 1044 Everyone has to call to draw the matrix since the graphics waits are 1045 synchronized across all processors that share the PetscDraw object 1046 */ 1047 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1048 if (!rank) { 1049 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1050 /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/ 1051 PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ); 1052 ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1053 } 1054 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1055 ierr = MatDestroy(&A);CHKERRQ(ierr); 1056 } 1057 PetscFunctionReturn(0); 1058 } 1059 1060 #undef __FUNCT__ 1061 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1062 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1063 { 1064 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1065 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)a->A->data; 1066 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 1067 PetscErrorCode ierr; 1068 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1069 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1070 int fd; 1071 PetscScalar *column_values; 1072 FILE *file; 1073 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1074 PetscInt message_count,flowcontrolcount; 1075 1076 PetscFunctionBegin; 1077 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1078 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1079 nz = bs2*(A->nz + B->nz); 1080 rlen = mat->rmap->n; 1081 if (!rank) { 1082 header[0] = MAT_FILE_CLASSID; 1083 header[1] = mat->rmap->N; 1084 header[2] = mat->cmap->N; 1085 1086 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1087 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1088 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1089 /* get largest number of rows any processor has */ 1090 range = mat->rmap->range; 1091 for (i=1; i<size; i++) { 1092 rlen = PetscMax(rlen,range[i+1] - range[i]); 1093 } 1094 } else { 1095 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1096 } 1097 1098 ierr = PetscMalloc1((rlen/bs),&crow_lens);CHKERRQ(ierr); 1099 /* compute lengths of each row */ 1100 for (i=0; i<a->mbs; i++) { 1101 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1102 } 1103 /* store the row lengths to the file */ 1104 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1105 if (!rank) { 1106 MPI_Status status; 1107 ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr); 1108 rlen = (range[1] - range[0])/bs; 1109 for (i=0; i<rlen; i++) { 1110 for (j=0; j<bs; j++) { 1111 row_lens[i*bs+j] = bs*crow_lens[i]; 1112 } 1113 } 1114 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1115 for (i=1; i<size; i++) { 1116 rlen = (range[i+1] - range[i])/bs; 1117 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1118 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1119 for (k=0; k<rlen; k++) { 1120 for (j=0; j<bs; j++) { 1121 row_lens[k*bs+j] = bs*crow_lens[k]; 1122 } 1123 } 1124 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1125 } 1126 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1127 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1128 } else { 1129 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1130 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1131 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1132 } 1133 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1134 1135 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1136 information needed to make it for each row from a block row. This does require more communication but still not more than 1137 the communication needed for the nonzero values */ 1138 nzmax = nz; /* space a largest processor needs */ 1139 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1140 ierr = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr); 1141 cnt = 0; 1142 for (i=0; i<a->mbs; i++) { 1143 pcnt = cnt; 1144 for (j=B->i[i]; j<B->i[i+1]; j++) { 1145 if ((col = garray[B->j[j]]) > cstart) break; 1146 for (l=0; l<bs; l++) { 1147 column_indices[cnt++] = bs*col+l; 1148 } 1149 } 1150 for (k=A->i[i]; k<A->i[i+1]; k++) { 1151 for (l=0; l<bs; l++) { 1152 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1153 } 1154 } 1155 for (; j<B->i[i+1]; j++) { 1156 for (l=0; l<bs; l++) { 1157 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1158 } 1159 } 1160 len = cnt - pcnt; 1161 for (k=1; k<bs; k++) { 1162 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1163 cnt += len; 1164 } 1165 } 1166 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1167 1168 /* store the columns to the file */ 1169 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1170 if (!rank) { 1171 MPI_Status status; 1172 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1173 for (i=1; i<size; i++) { 1174 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1175 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1176 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1177 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1178 } 1179 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1180 } else { 1181 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1182 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1183 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1184 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1185 } 1186 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1187 1188 /* load up the numerical values */ 1189 ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr); 1190 cnt = 0; 1191 for (i=0; i<a->mbs; i++) { 1192 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1193 for (j=B->i[i]; j<B->i[i+1]; j++) { 1194 if (garray[B->j[j]] > cstart) break; 1195 for (l=0; l<bs; l++) { 1196 for (ll=0; ll<bs; ll++) { 1197 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1198 } 1199 } 1200 cnt += bs; 1201 } 1202 for (k=A->i[i]; k<A->i[i+1]; k++) { 1203 for (l=0; l<bs; l++) { 1204 for (ll=0; ll<bs; ll++) { 1205 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1206 } 1207 } 1208 cnt += bs; 1209 } 1210 for (; j<B->i[i+1]; j++) { 1211 for (l=0; l<bs; l++) { 1212 for (ll=0; ll<bs; ll++) { 1213 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1214 } 1215 } 1216 cnt += bs; 1217 } 1218 cnt += (bs-1)*rlen; 1219 } 1220 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1221 1222 /* store the column values to the file */ 1223 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1224 if (!rank) { 1225 MPI_Status status; 1226 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1227 for (i=1; i<size; i++) { 1228 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1229 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1230 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1231 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1232 } 1233 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1234 } else { 1235 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1236 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1237 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1238 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1239 } 1240 ierr = PetscFree(column_values);CHKERRQ(ierr); 1241 1242 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1243 if (file) { 1244 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1245 } 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatView_MPIBAIJ" 1251 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1252 { 1253 PetscErrorCode ierr; 1254 PetscBool iascii,isdraw,issocket,isbinary; 1255 1256 PetscFunctionBegin; 1257 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1258 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1259 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1260 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1261 if (iascii || isdraw || issocket) { 1262 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1263 } else if (isbinary) { 1264 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1265 } 1266 PetscFunctionReturn(0); 1267 } 1268 1269 #undef __FUNCT__ 1270 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1271 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1272 { 1273 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1274 PetscErrorCode ierr; 1275 1276 PetscFunctionBegin; 1277 #if defined(PETSC_USE_LOG) 1278 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1279 #endif 1280 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1281 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1282 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1283 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1284 #if defined(PETSC_USE_CTABLE) 1285 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1286 #else 1287 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1288 #endif 1289 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1290 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1291 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1292 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1293 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1294 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1295 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1296 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1297 1298 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1307 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr); 1308 PetscFunctionReturn(0); 1309 } 1310 1311 #undef __FUNCT__ 1312 #define __FUNCT__ "MatMult_MPIBAIJ" 1313 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1314 { 1315 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1316 PetscErrorCode ierr; 1317 PetscInt nt; 1318 1319 PetscFunctionBegin; 1320 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1321 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1322 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1323 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1324 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1325 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1326 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1327 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1328 PetscFunctionReturn(0); 1329 } 1330 1331 #undef __FUNCT__ 1332 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1333 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1334 { 1335 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1336 PetscErrorCode ierr; 1337 1338 PetscFunctionBegin; 1339 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1340 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1341 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1342 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1343 PetscFunctionReturn(0); 1344 } 1345 1346 #undef __FUNCT__ 1347 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1348 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1349 { 1350 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1351 PetscErrorCode ierr; 1352 PetscBool merged; 1353 1354 PetscFunctionBegin; 1355 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1356 /* do nondiagonal part */ 1357 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1358 if (!merged) { 1359 /* send it on its way */ 1360 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1361 /* do local part */ 1362 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1363 /* receive remote parts: note this assumes the values are not actually */ 1364 /* inserted in yy until the next line */ 1365 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1366 } else { 1367 /* do local part */ 1368 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1369 /* send it on its way */ 1370 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1371 /* values actually were received in the Begin() but we need to call this nop */ 1372 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1373 } 1374 PetscFunctionReturn(0); 1375 } 1376 1377 #undef __FUNCT__ 1378 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1379 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1380 { 1381 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1382 PetscErrorCode ierr; 1383 1384 PetscFunctionBegin; 1385 /* do nondiagonal part */ 1386 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1387 /* send it on its way */ 1388 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1389 /* do local part */ 1390 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1391 /* receive remote parts: note this assumes the values are not actually */ 1392 /* inserted in yy until the next line, which is true for my implementation*/ 1393 /* but is not perhaps always true. */ 1394 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1395 PetscFunctionReturn(0); 1396 } 1397 1398 /* 1399 This only works correctly for square matrices where the subblock A->A is the 1400 diagonal block 1401 */ 1402 #undef __FUNCT__ 1403 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1404 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1405 { 1406 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1407 PetscErrorCode ierr; 1408 1409 PetscFunctionBegin; 1410 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1411 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1412 PetscFunctionReturn(0); 1413 } 1414 1415 #undef __FUNCT__ 1416 #define __FUNCT__ "MatScale_MPIBAIJ" 1417 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1418 { 1419 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1420 PetscErrorCode ierr; 1421 1422 PetscFunctionBegin; 1423 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1424 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1430 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1431 { 1432 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1433 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1434 PetscErrorCode ierr; 1435 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1436 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1437 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1438 1439 PetscFunctionBegin; 1440 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1441 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1442 mat->getrowactive = PETSC_TRUE; 1443 1444 if (!mat->rowvalues && (idx || v)) { 1445 /* 1446 allocate enough space to hold information from the longest row. 1447 */ 1448 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1449 PetscInt max = 1,mbs = mat->mbs,tmp; 1450 for (i=0; i<mbs; i++) { 1451 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1452 if (max < tmp) max = tmp; 1453 } 1454 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1455 } 1456 lrow = row - brstart; 1457 1458 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1459 if (!v) {pvA = 0; pvB = 0;} 1460 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1461 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1462 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1463 nztot = nzA + nzB; 1464 1465 cmap = mat->garray; 1466 if (v || idx) { 1467 if (nztot) { 1468 /* Sort by increasing column numbers, assuming A and B already sorted */ 1469 PetscInt imark = -1; 1470 if (v) { 1471 *v = v_p = mat->rowvalues; 1472 for (i=0; i<nzB; i++) { 1473 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1474 else break; 1475 } 1476 imark = i; 1477 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1478 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1479 } 1480 if (idx) { 1481 *idx = idx_p = mat->rowindices; 1482 if (imark > -1) { 1483 for (i=0; i<imark; i++) { 1484 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1485 } 1486 } else { 1487 for (i=0; i<nzB; i++) { 1488 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1489 else break; 1490 } 1491 imark = i; 1492 } 1493 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1494 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1495 } 1496 } else { 1497 if (idx) *idx = 0; 1498 if (v) *v = 0; 1499 } 1500 } 1501 *nz = nztot; 1502 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1503 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1504 PetscFunctionReturn(0); 1505 } 1506 1507 #undef __FUNCT__ 1508 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1509 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1510 { 1511 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1512 1513 PetscFunctionBegin; 1514 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1515 baij->getrowactive = PETSC_FALSE; 1516 PetscFunctionReturn(0); 1517 } 1518 1519 #undef __FUNCT__ 1520 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1521 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1522 { 1523 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1524 PetscErrorCode ierr; 1525 1526 PetscFunctionBegin; 1527 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1528 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1529 PetscFunctionReturn(0); 1530 } 1531 1532 #undef __FUNCT__ 1533 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1534 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1535 { 1536 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1537 Mat A = a->A,B = a->B; 1538 PetscErrorCode ierr; 1539 PetscReal isend[5],irecv[5]; 1540 1541 PetscFunctionBegin; 1542 info->block_size = (PetscReal)matin->rmap->bs; 1543 1544 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1545 1546 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1547 isend[3] = info->memory; isend[4] = info->mallocs; 1548 1549 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1550 1551 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1552 isend[3] += info->memory; isend[4] += info->mallocs; 1553 1554 if (flag == MAT_LOCAL) { 1555 info->nz_used = isend[0]; 1556 info->nz_allocated = isend[1]; 1557 info->nz_unneeded = isend[2]; 1558 info->memory = isend[3]; 1559 info->mallocs = isend[4]; 1560 } else if (flag == MAT_GLOBAL_MAX) { 1561 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1562 1563 info->nz_used = irecv[0]; 1564 info->nz_allocated = irecv[1]; 1565 info->nz_unneeded = irecv[2]; 1566 info->memory = irecv[3]; 1567 info->mallocs = irecv[4]; 1568 } else if (flag == MAT_GLOBAL_SUM) { 1569 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1570 1571 info->nz_used = irecv[0]; 1572 info->nz_allocated = irecv[1]; 1573 info->nz_unneeded = irecv[2]; 1574 info->memory = irecv[3]; 1575 info->mallocs = irecv[4]; 1576 } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1577 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1578 info->fill_ratio_needed = 0; 1579 info->factor_mallocs = 0; 1580 PetscFunctionReturn(0); 1581 } 1582 1583 #undef __FUNCT__ 1584 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1585 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1586 { 1587 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1588 PetscErrorCode ierr; 1589 1590 PetscFunctionBegin; 1591 switch (op) { 1592 case MAT_NEW_NONZERO_LOCATIONS: 1593 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1594 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1595 case MAT_KEEP_NONZERO_PATTERN: 1596 case MAT_NEW_NONZERO_LOCATION_ERR: 1597 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1598 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1599 break; 1600 case MAT_ROW_ORIENTED: 1601 a->roworiented = flg; 1602 1603 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1604 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1605 break; 1606 case MAT_NEW_DIAGONALS: 1607 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1608 break; 1609 case MAT_IGNORE_OFF_PROC_ENTRIES: 1610 a->donotstash = flg; 1611 break; 1612 case MAT_USE_HASH_TABLE: 1613 a->ht_flag = flg; 1614 break; 1615 case MAT_SYMMETRIC: 1616 case MAT_STRUCTURALLY_SYMMETRIC: 1617 case MAT_HERMITIAN: 1618 case MAT_SYMMETRY_ETERNAL: 1619 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1620 break; 1621 default: 1622 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1623 } 1624 PetscFunctionReturn(0); 1625 } 1626 1627 #undef __FUNCT__ 1628 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1629 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1630 { 1631 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1632 Mat_SeqBAIJ *Aloc; 1633 Mat B; 1634 PetscErrorCode ierr; 1635 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1636 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1637 MatScalar *a; 1638 1639 PetscFunctionBegin; 1640 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1641 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1642 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1643 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1644 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1645 /* Do not know preallocation information, but must set block size */ 1646 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1647 } else { 1648 B = *matout; 1649 } 1650 1651 /* copy over the A part */ 1652 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1653 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1654 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1655 1656 for (i=0; i<mbs; i++) { 1657 rvals[0] = bs*(baij->rstartbs + i); 1658 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1659 for (j=ai[i]; j<ai[i+1]; j++) { 1660 col = (baij->cstartbs+aj[j])*bs; 1661 for (k=0; k<bs; k++) { 1662 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1663 1664 col++; a += bs; 1665 } 1666 } 1667 } 1668 /* copy over the B part */ 1669 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1670 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1671 for (i=0; i<mbs; i++) { 1672 rvals[0] = bs*(baij->rstartbs + i); 1673 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1674 for (j=ai[i]; j<ai[i+1]; j++) { 1675 col = baij->garray[aj[j]]*bs; 1676 for (k=0; k<bs; k++) { 1677 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1678 col++; 1679 a += bs; 1680 } 1681 } 1682 } 1683 ierr = PetscFree(rvals);CHKERRQ(ierr); 1684 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1685 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1686 1687 if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B; 1688 else { 1689 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1690 } 1691 PetscFunctionReturn(0); 1692 } 1693 1694 #undef __FUNCT__ 1695 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1696 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1697 { 1698 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1699 Mat a = baij->A,b = baij->B; 1700 PetscErrorCode ierr; 1701 PetscInt s1,s2,s3; 1702 1703 PetscFunctionBegin; 1704 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1705 if (rr) { 1706 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1707 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1708 /* Overlap communication with computation. */ 1709 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1710 } 1711 if (ll) { 1712 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1713 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1714 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1715 } 1716 /* scale the diagonal block */ 1717 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1718 1719 if (rr) { 1720 /* Do a scatter end and then right scale the off-diagonal block */ 1721 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1722 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1723 } 1724 PetscFunctionReturn(0); 1725 } 1726 1727 #undef __FUNCT__ 1728 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1729 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1730 { 1731 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1732 PetscInt *owners = A->rmap->range; 1733 PetscInt n = A->rmap->n; 1734 PetscMPIInt size = l->size; 1735 PetscSF sf; 1736 PetscInt *lrows; 1737 PetscSFNode *rrows; 1738 PetscInt lastidx = -1, r, p = 0, len = 0; 1739 PetscErrorCode ierr; 1740 1741 PetscFunctionBegin; 1742 /* Create SF where leaves are input rows and roots are owned rows */ 1743 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1744 for (r = 0; r < n; ++r) lrows[r] = -1; 1745 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1746 for (r = 0; r < N; ++r) { 1747 const PetscInt idx = rows[r]; 1748 PetscBool found = PETSC_FALSE; 1749 /* Trick for efficient searching for sorted rows */ 1750 if (lastidx > idx) p = 0; 1751 lastidx = idx; 1752 for (; p < size; ++p) { 1753 if (idx >= owners[p] && idx < owners[p+1]) { 1754 rrows[r].rank = p; 1755 rrows[r].index = rows[r] - owners[p]; 1756 found = PETSC_TRUE; 1757 break; 1758 } 1759 } 1760 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 1761 } 1762 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1763 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1764 /* Collect flags for rows to be zeroed */ 1765 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1766 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1767 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1768 /* Compress and put in row numbers */ 1769 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1770 /* fix right hand side if needed */ 1771 if (x && b) { 1772 const PetscScalar *xx; 1773 PetscScalar *bb; 1774 1775 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1776 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1777 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 1778 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1779 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1780 } 1781 1782 /* actually zap the local rows */ 1783 /* 1784 Zero the required rows. If the "diagonal block" of the matrix 1785 is square and the user wishes to set the diagonal we use separate 1786 code so that MatSetValues() is not called for each diagonal allocating 1787 new memory, thus calling lots of mallocs and slowing things down. 1788 1789 */ 1790 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1791 ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,0,0);CHKERRQ(ierr); 1792 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1793 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,0,0);CHKERRQ(ierr); 1794 } else if (diag != 0.0) { 1795 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr); 1796 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1797 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1798 for (r = 0; r < len; ++r) { 1799 const PetscInt row = lrows[r] + A->rmap->rstart; 1800 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1801 } 1802 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1803 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1804 } else { 1805 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr); 1806 } 1807 ierr = PetscFree(lrows);CHKERRQ(ierr); 1808 1809 /* only change matrix nonzero state if pattern was allowed to be changed */ 1810 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1811 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1812 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1813 } 1814 PetscFunctionReturn(0); 1815 } 1816 1817 #undef __FUNCT__ 1818 #define __FUNCT__ "MatZeroRowsColumns_MPIBAIJ" 1819 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1820 { 1821 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1822 PetscErrorCode ierr; 1823 PetscMPIInt size = l->size,n = A->rmap->n,lastidx = -1; 1824 PetscInt i,j,k,r,p = 0,len = 0,row,col,count; 1825 PetscInt *lrows,*owners = A->rmap->range; 1826 PetscSFNode *rrows; 1827 PetscSF sf; 1828 const PetscScalar *xx; 1829 PetscScalar *bb,*mask; 1830 Vec xmask,lmask; 1831 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data; 1832 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1833 PetscScalar *aa; 1834 #if defined(PETSC_DEBUG) 1835 PetscBool found = PETSC_FALSE; 1836 #endif 1837 1838 PetscFunctionBegin; 1839 /* Create SF where leaves are input rows and roots are owned rows */ 1840 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1841 for (r = 0; r < n; ++r) lrows[r] = -1; 1842 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1843 for (r = 0; r < N; ++r) { 1844 const PetscInt idx = rows[r]; 1845 PetscBool found = PETSC_FALSE; 1846 /* Trick for efficient searching for sorted rows */ 1847 if (lastidx > idx) p = 0; 1848 lastidx = idx; 1849 for (; p < size; ++p) { 1850 if (idx >= owners[p] && idx < owners[p+1]) { 1851 rrows[r].rank = p; 1852 rrows[r].index = rows[r] - owners[p]; 1853 found = PETSC_TRUE; 1854 break; 1855 } 1856 } 1857 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 1858 } 1859 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1860 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1861 /* Collect flags for rows to be zeroed */ 1862 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1863 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1864 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1865 /* Compress and put in row numbers */ 1866 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1867 /* zero diagonal part of matrix */ 1868 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1869 /* handle off diagonal part of matrix */ 1870 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 1871 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1872 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1873 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1874 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1875 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1876 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1877 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1878 if (x) { 1879 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1880 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1881 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1882 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1883 } 1884 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1885 /* remove zeroed rows of off diagonal matrix */ 1886 for (i = 0; i < len; ++i) { 1887 row = lrows[i]; 1888 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1889 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1890 for (k = 0; k < count; ++k) { 1891 aa[0] = 0.0; 1892 aa += bs; 1893 } 1894 } 1895 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1896 for (i = 0; i < l->B->rmap->N; ++i) { 1897 row = i/bs; 1898 for (j = baij->i[row]; j < baij->i[row+1]; ++j) { 1899 for (k = 0; k < bs; ++k) { 1900 col = bs*baij->j[j] + k; 1901 if (PetscAbsScalar(mask[col])) { 1902 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 1903 if (b) bb[i] -= aa[0]*xx[col]; 1904 aa[0] = 0.0; 1905 } 1906 } 1907 } 1908 } 1909 if (x) { 1910 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1911 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1912 } 1913 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1914 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1915 ierr = PetscFree(lrows);CHKERRQ(ierr); 1916 1917 /* only change matrix nonzero state if pattern was allowed to be changed */ 1918 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1919 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1920 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1921 } 1922 PetscFunctionReturn(0); 1923 } 1924 1925 #undef __FUNCT__ 1926 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1927 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1928 { 1929 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1930 PetscErrorCode ierr; 1931 1932 PetscFunctionBegin; 1933 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1934 PetscFunctionReturn(0); 1935 } 1936 1937 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1938 1939 #undef __FUNCT__ 1940 #define __FUNCT__ "MatEqual_MPIBAIJ" 1941 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1942 { 1943 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1944 Mat a,b,c,d; 1945 PetscBool flg; 1946 PetscErrorCode ierr; 1947 1948 PetscFunctionBegin; 1949 a = matA->A; b = matA->B; 1950 c = matB->A; d = matB->B; 1951 1952 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1953 if (flg) { 1954 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1955 } 1956 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1957 PetscFunctionReturn(0); 1958 } 1959 1960 #undef __FUNCT__ 1961 #define __FUNCT__ "MatCopy_MPIBAIJ" 1962 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1963 { 1964 PetscErrorCode ierr; 1965 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1966 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1967 1968 PetscFunctionBegin; 1969 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1970 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1971 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1972 } else { 1973 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1974 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1975 } 1976 PetscFunctionReturn(0); 1977 } 1978 1979 #undef __FUNCT__ 1980 #define __FUNCT__ "MatSetUp_MPIBAIJ" 1981 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1982 { 1983 PetscErrorCode ierr; 1984 1985 PetscFunctionBegin; 1986 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1987 PetscFunctionReturn(0); 1988 } 1989 1990 #undef __FUNCT__ 1991 #define __FUNCT__ "MatAXPY_MPIBAIJ" 1992 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1993 { 1994 PetscErrorCode ierr; 1995 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 1996 PetscBLASInt bnz,one=1; 1997 Mat_SeqBAIJ *x,*y; 1998 1999 PetscFunctionBegin; 2000 if (str == SAME_NONZERO_PATTERN) { 2001 PetscScalar alpha = a; 2002 x = (Mat_SeqBAIJ*)xx->A->data; 2003 y = (Mat_SeqBAIJ*)yy->A->data; 2004 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2005 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2006 x = (Mat_SeqBAIJ*)xx->B->data; 2007 y = (Mat_SeqBAIJ*)yy->B->data; 2008 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2009 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2010 } else { 2011 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2012 } 2013 PetscFunctionReturn(0); 2014 } 2015 2016 #undef __FUNCT__ 2017 #define __FUNCT__ "MatRealPart_MPIBAIJ" 2018 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 2019 { 2020 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2021 PetscErrorCode ierr; 2022 2023 PetscFunctionBegin; 2024 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2025 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2026 PetscFunctionReturn(0); 2027 } 2028 2029 #undef __FUNCT__ 2030 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2031 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2032 { 2033 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2034 PetscErrorCode ierr; 2035 2036 PetscFunctionBegin; 2037 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2038 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2039 PetscFunctionReturn(0); 2040 } 2041 2042 #undef __FUNCT__ 2043 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2044 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2045 { 2046 PetscErrorCode ierr; 2047 IS iscol_local; 2048 PetscInt csize; 2049 2050 PetscFunctionBegin; 2051 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2052 if (call == MAT_REUSE_MATRIX) { 2053 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2054 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2055 } else { 2056 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2057 } 2058 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2059 if (call == MAT_INITIAL_MATRIX) { 2060 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2061 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2062 } 2063 PetscFunctionReturn(0); 2064 } 2065 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*); 2066 #undef __FUNCT__ 2067 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2068 /* 2069 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2070 in local and then by concatenating the local matrices the end result. 2071 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2072 */ 2073 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2074 { 2075 PetscErrorCode ierr; 2076 PetscMPIInt rank,size; 2077 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2078 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow; 2079 Mat M,Mreuse; 2080 MatScalar *vwork,*aa; 2081 MPI_Comm comm; 2082 IS isrow_new, iscol_new; 2083 PetscBool idflag,allrows, allcols; 2084 Mat_SeqBAIJ *aij; 2085 2086 PetscFunctionBegin; 2087 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2088 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2089 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2090 /* The compression and expansion should be avoided. Doesn't point 2091 out errors, might change the indices, hence buggey */ 2092 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 2093 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 2094 2095 /* Check for special case: each processor gets entire matrix columns */ 2096 ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr); 2097 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 2098 if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE; 2099 else allcols = PETSC_FALSE; 2100 2101 ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr); 2102 ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr); 2103 if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE; 2104 else allrows = PETSC_FALSE; 2105 2106 if (call == MAT_REUSE_MATRIX) { 2107 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 2108 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2109 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2110 } else { 2111 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2112 } 2113 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 2114 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 2115 /* 2116 m - number of local rows 2117 n - number of columns (same on all processors) 2118 rstart - first row in new global matrix generated 2119 */ 2120 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2121 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2122 m = m/bs; 2123 n = n/bs; 2124 2125 if (call == MAT_INITIAL_MATRIX) { 2126 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2127 ii = aij->i; 2128 jj = aij->j; 2129 2130 /* 2131 Determine the number of non-zeros in the diagonal and off-diagonal 2132 portions of the matrix in order to do correct preallocation 2133 */ 2134 2135 /* first get start and end of "diagonal" columns */ 2136 if (csize == PETSC_DECIDE) { 2137 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2138 if (mglobal == n*bs) { /* square matrix */ 2139 nlocal = m; 2140 } else { 2141 nlocal = n/size + ((n % size) > rank); 2142 } 2143 } else { 2144 nlocal = csize/bs; 2145 } 2146 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2147 rstart = rend - nlocal; 2148 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2149 2150 /* next, compute all the lengths */ 2151 ierr = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr); 2152 for (i=0; i<m; i++) { 2153 jend = ii[i+1] - ii[i]; 2154 olen = 0; 2155 dlen = 0; 2156 for (j=0; j<jend; j++) { 2157 if (*jj < rstart || *jj >= rend) olen++; 2158 else dlen++; 2159 jj++; 2160 } 2161 olens[i] = olen; 2162 dlens[i] = dlen; 2163 } 2164 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2165 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2166 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2167 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2168 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 2169 } else { 2170 PetscInt ml,nl; 2171 2172 M = *newmat; 2173 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2174 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2175 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2176 /* 2177 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2178 rather than the slower MatSetValues(). 2179 */ 2180 M->was_assembled = PETSC_TRUE; 2181 M->assembled = PETSC_FALSE; 2182 } 2183 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2184 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2185 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2186 ii = aij->i; 2187 jj = aij->j; 2188 aa = aij->a; 2189 for (i=0; i<m; i++) { 2190 row = rstart/bs + i; 2191 nz = ii[i+1] - ii[i]; 2192 cwork = jj; jj += nz; 2193 vwork = aa; aa += nz*bs*bs; 2194 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2195 } 2196 2197 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2198 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2199 *newmat = M; 2200 2201 /* save submatrix used in processor for next request */ 2202 if (call == MAT_INITIAL_MATRIX) { 2203 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2204 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2205 } 2206 PetscFunctionReturn(0); 2207 } 2208 2209 #undef __FUNCT__ 2210 #define __FUNCT__ "MatPermute_MPIBAIJ" 2211 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2212 { 2213 MPI_Comm comm,pcomm; 2214 PetscInt clocal_size,nrows; 2215 const PetscInt *rows; 2216 PetscMPIInt size; 2217 IS crowp,lcolp; 2218 PetscErrorCode ierr; 2219 2220 PetscFunctionBegin; 2221 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2222 /* make a collective version of 'rowp' */ 2223 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2224 if (pcomm==comm) { 2225 crowp = rowp; 2226 } else { 2227 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2228 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2229 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2230 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2231 } 2232 ierr = ISSetPermutation(crowp);CHKERRQ(ierr); 2233 /* make a local version of 'colp' */ 2234 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2235 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2236 if (size==1) { 2237 lcolp = colp; 2238 } else { 2239 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2240 } 2241 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2242 /* now we just get the submatrix */ 2243 ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr); 2244 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2245 /* clean up */ 2246 if (pcomm!=comm) { 2247 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2248 } 2249 if (size>1) { 2250 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2251 } 2252 PetscFunctionReturn(0); 2253 } 2254 2255 #undef __FUNCT__ 2256 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2257 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2258 { 2259 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2260 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2261 2262 PetscFunctionBegin; 2263 if (nghosts) *nghosts = B->nbs; 2264 if (ghosts) *ghosts = baij->garray; 2265 PetscFunctionReturn(0); 2266 } 2267 2268 #undef __FUNCT__ 2269 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2270 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2271 { 2272 Mat B; 2273 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2274 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2275 Mat_SeqAIJ *b; 2276 PetscErrorCode ierr; 2277 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2278 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2279 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2280 2281 PetscFunctionBegin; 2282 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2283 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2284 2285 /* ---------------------------------------------------------------- 2286 Tell every processor the number of nonzeros per row 2287 */ 2288 ierr = PetscMalloc1((A->rmap->N/bs),&lens);CHKERRQ(ierr); 2289 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2290 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2291 } 2292 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2293 ierr = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr); 2294 displs = recvcounts + size; 2295 for (i=0; i<size; i++) { 2296 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2297 displs[i] = A->rmap->range[i]/bs; 2298 } 2299 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2300 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2301 #else 2302 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2303 #endif 2304 /* --------------------------------------------------------------- 2305 Create the sequential matrix of the same type as the local block diagonal 2306 */ 2307 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2308 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2309 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2310 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2311 b = (Mat_SeqAIJ*)B->data; 2312 2313 /*-------------------------------------------------------------------- 2314 Copy my part of matrix column indices over 2315 */ 2316 sendcount = ad->nz + bd->nz; 2317 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2318 a_jsendbuf = ad->j; 2319 b_jsendbuf = bd->j; 2320 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2321 cnt = 0; 2322 for (i=0; i<n; i++) { 2323 2324 /* put in lower diagonal portion */ 2325 m = bd->i[i+1] - bd->i[i]; 2326 while (m > 0) { 2327 /* is it above diagonal (in bd (compressed) numbering) */ 2328 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2329 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2330 m--; 2331 } 2332 2333 /* put in diagonal portion */ 2334 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2335 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2336 } 2337 2338 /* put in upper diagonal portion */ 2339 while (m-- > 0) { 2340 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2341 } 2342 } 2343 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2344 2345 /*-------------------------------------------------------------------- 2346 Gather all column indices to all processors 2347 */ 2348 for (i=0; i<size; i++) { 2349 recvcounts[i] = 0; 2350 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2351 recvcounts[i] += lens[j]; 2352 } 2353 } 2354 displs[0] = 0; 2355 for (i=1; i<size; i++) { 2356 displs[i] = displs[i-1] + recvcounts[i-1]; 2357 } 2358 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2359 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2360 #else 2361 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2362 #endif 2363 /*-------------------------------------------------------------------- 2364 Assemble the matrix into useable form (note numerical values not yet set) 2365 */ 2366 /* set the b->ilen (length of each row) values */ 2367 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2368 /* set the b->i indices */ 2369 b->i[0] = 0; 2370 for (i=1; i<=A->rmap->N/bs; i++) { 2371 b->i[i] = b->i[i-1] + lens[i-1]; 2372 } 2373 ierr = PetscFree(lens);CHKERRQ(ierr); 2374 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2375 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2376 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2377 2378 if (A->symmetric) { 2379 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2380 } else if (A->hermitian) { 2381 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2382 } else if (A->structurally_symmetric) { 2383 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2384 } 2385 *newmat = B; 2386 PetscFunctionReturn(0); 2387 } 2388 2389 #undef __FUNCT__ 2390 #define __FUNCT__ "MatSOR_MPIBAIJ" 2391 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2392 { 2393 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2394 PetscErrorCode ierr; 2395 Vec bb1 = 0; 2396 2397 PetscFunctionBegin; 2398 if (flag == SOR_APPLY_UPPER) { 2399 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2400 PetscFunctionReturn(0); 2401 } 2402 2403 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2404 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2405 } 2406 2407 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2408 if (flag & SOR_ZERO_INITIAL_GUESS) { 2409 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2410 its--; 2411 } 2412 2413 while (its--) { 2414 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2415 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2416 2417 /* update rhs: bb1 = bb - B*x */ 2418 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2419 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2420 2421 /* local sweep */ 2422 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2423 } 2424 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2425 if (flag & SOR_ZERO_INITIAL_GUESS) { 2426 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2427 its--; 2428 } 2429 while (its--) { 2430 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2431 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2432 2433 /* update rhs: bb1 = bb - B*x */ 2434 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2435 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2436 2437 /* local sweep */ 2438 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2439 } 2440 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2441 if (flag & SOR_ZERO_INITIAL_GUESS) { 2442 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2443 its--; 2444 } 2445 while (its--) { 2446 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2447 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2448 2449 /* update rhs: bb1 = bb - B*x */ 2450 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2451 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2452 2453 /* local sweep */ 2454 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2455 } 2456 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2457 2458 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2459 PetscFunctionReturn(0); 2460 } 2461 2462 #undef __FUNCT__ 2463 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ" 2464 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms) 2465 { 2466 PetscErrorCode ierr; 2467 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 2468 PetscInt N,i,*garray = aij->garray; 2469 PetscInt ib,jb,bs = A->rmap->bs; 2470 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 2471 MatScalar *a_val = a_aij->a; 2472 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 2473 MatScalar *b_val = b_aij->a; 2474 PetscReal *work; 2475 2476 PetscFunctionBegin; 2477 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 2478 ierr = PetscCalloc1(N,&work);CHKERRQ(ierr); 2479 if (type == NORM_2) { 2480 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2481 for (jb=0; jb<bs; jb++) { 2482 for (ib=0; ib<bs; ib++) { 2483 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2484 a_val++; 2485 } 2486 } 2487 } 2488 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2489 for (jb=0; jb<bs; jb++) { 2490 for (ib=0; ib<bs; ib++) { 2491 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2492 b_val++; 2493 } 2494 } 2495 } 2496 } else if (type == NORM_1) { 2497 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2498 for (jb=0; jb<bs; jb++) { 2499 for (ib=0; ib<bs; ib++) { 2500 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2501 a_val++; 2502 } 2503 } 2504 } 2505 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2506 for (jb=0; jb<bs; jb++) { 2507 for (ib=0; ib<bs; ib++) { 2508 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2509 b_val++; 2510 } 2511 } 2512 } 2513 } else if (type == NORM_INFINITY) { 2514 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2515 for (jb=0; jb<bs; jb++) { 2516 for (ib=0; ib<bs; ib++) { 2517 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2518 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2519 a_val++; 2520 } 2521 } 2522 } 2523 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2524 for (jb=0; jb<bs; jb++) { 2525 for (ib=0; ib<bs; ib++) { 2526 int col = garray[b_aij->j[i]] * bs + jb; 2527 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2528 b_val++; 2529 } 2530 } 2531 } 2532 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 2533 if (type == NORM_INFINITY) { 2534 ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2535 } else { 2536 ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2537 } 2538 ierr = PetscFree(work);CHKERRQ(ierr); 2539 if (type == NORM_2) { 2540 for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]); 2541 } 2542 PetscFunctionReturn(0); 2543 } 2544 2545 #undef __FUNCT__ 2546 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2547 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2548 { 2549 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2550 PetscErrorCode ierr; 2551 2552 PetscFunctionBegin; 2553 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2554 PetscFunctionReturn(0); 2555 } 2556 2557 2558 /* -------------------------------------------------------------------*/ 2559 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2560 MatGetRow_MPIBAIJ, 2561 MatRestoreRow_MPIBAIJ, 2562 MatMult_MPIBAIJ, 2563 /* 4*/ MatMultAdd_MPIBAIJ, 2564 MatMultTranspose_MPIBAIJ, 2565 MatMultTransposeAdd_MPIBAIJ, 2566 0, 2567 0, 2568 0, 2569 /*10*/ 0, 2570 0, 2571 0, 2572 MatSOR_MPIBAIJ, 2573 MatTranspose_MPIBAIJ, 2574 /*15*/ MatGetInfo_MPIBAIJ, 2575 MatEqual_MPIBAIJ, 2576 MatGetDiagonal_MPIBAIJ, 2577 MatDiagonalScale_MPIBAIJ, 2578 MatNorm_MPIBAIJ, 2579 /*20*/ MatAssemblyBegin_MPIBAIJ, 2580 MatAssemblyEnd_MPIBAIJ, 2581 MatSetOption_MPIBAIJ, 2582 MatZeroEntries_MPIBAIJ, 2583 /*24*/ MatZeroRows_MPIBAIJ, 2584 0, 2585 0, 2586 0, 2587 0, 2588 /*29*/ MatSetUp_MPIBAIJ, 2589 0, 2590 0, 2591 0, 2592 0, 2593 /*34*/ MatDuplicate_MPIBAIJ, 2594 0, 2595 0, 2596 0, 2597 0, 2598 /*39*/ MatAXPY_MPIBAIJ, 2599 MatGetSubMatrices_MPIBAIJ, 2600 MatIncreaseOverlap_MPIBAIJ, 2601 MatGetValues_MPIBAIJ, 2602 MatCopy_MPIBAIJ, 2603 /*44*/ 0, 2604 MatScale_MPIBAIJ, 2605 0, 2606 0, 2607 MatZeroRowsColumns_MPIBAIJ, 2608 /*49*/ 0, 2609 0, 2610 0, 2611 0, 2612 0, 2613 /*54*/ MatFDColoringCreate_MPIXAIJ, 2614 0, 2615 MatSetUnfactored_MPIBAIJ, 2616 MatPermute_MPIBAIJ, 2617 MatSetValuesBlocked_MPIBAIJ, 2618 /*59*/ MatGetSubMatrix_MPIBAIJ, 2619 MatDestroy_MPIBAIJ, 2620 MatView_MPIBAIJ, 2621 0, 2622 0, 2623 /*64*/ 0, 2624 0, 2625 0, 2626 0, 2627 0, 2628 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2629 0, 2630 0, 2631 0, 2632 0, 2633 /*74*/ 0, 2634 MatFDColoringApply_BAIJ, 2635 0, 2636 0, 2637 0, 2638 /*79*/ 0, 2639 0, 2640 0, 2641 0, 2642 MatLoad_MPIBAIJ, 2643 /*84*/ 0, 2644 0, 2645 0, 2646 0, 2647 0, 2648 /*89*/ 0, 2649 0, 2650 0, 2651 0, 2652 0, 2653 /*94*/ 0, 2654 0, 2655 0, 2656 0, 2657 0, 2658 /*99*/ 0, 2659 0, 2660 0, 2661 0, 2662 0, 2663 /*104*/0, 2664 MatRealPart_MPIBAIJ, 2665 MatImaginaryPart_MPIBAIJ, 2666 0, 2667 0, 2668 /*109*/0, 2669 0, 2670 0, 2671 0, 2672 0, 2673 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2674 0, 2675 MatGetGhosts_MPIBAIJ, 2676 0, 2677 0, 2678 /*119*/0, 2679 0, 2680 0, 2681 0, 2682 MatGetMultiProcBlock_MPIBAIJ, 2683 /*124*/0, 2684 MatGetColumnNorms_MPIBAIJ, 2685 MatInvertBlockDiagonal_MPIBAIJ, 2686 0, 2687 0, 2688 /*129*/ 0, 2689 0, 2690 0, 2691 0, 2692 0, 2693 /*134*/ 0, 2694 0, 2695 0, 2696 0, 2697 0, 2698 /*139*/ 0, 2699 0, 2700 0, 2701 MatFDColoringSetUp_MPIXAIJ 2702 }; 2703 2704 #undef __FUNCT__ 2705 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2706 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2707 { 2708 PetscFunctionBegin; 2709 *a = ((Mat_MPIBAIJ*)A->data)->A; 2710 PetscFunctionReturn(0); 2711 } 2712 2713 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2714 2715 #undef __FUNCT__ 2716 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2717 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2718 { 2719 PetscInt m,rstart,cstart,cend; 2720 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2721 const PetscInt *JJ =0; 2722 PetscScalar *values=0; 2723 PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2724 PetscErrorCode ierr; 2725 2726 PetscFunctionBegin; 2727 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2728 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2729 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2730 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2731 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2732 m = B->rmap->n/bs; 2733 rstart = B->rmap->rstart/bs; 2734 cstart = B->cmap->rstart/bs; 2735 cend = B->cmap->rend/bs; 2736 2737 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2738 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2739 for (i=0; i<m; i++) { 2740 nz = ii[i+1] - ii[i]; 2741 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2742 nz_max = PetscMax(nz_max,nz); 2743 JJ = jj + ii[i]; 2744 for (j=0; j<nz; j++) { 2745 if (*JJ >= cstart) break; 2746 JJ++; 2747 } 2748 d = 0; 2749 for (; j<nz; j++) { 2750 if (*JJ++ >= cend) break; 2751 d++; 2752 } 2753 d_nnz[i] = d; 2754 o_nnz[i] = nz - d; 2755 } 2756 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2757 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2758 2759 values = (PetscScalar*)V; 2760 if (!values) { 2761 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2762 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2763 } 2764 for (i=0; i<m; i++) { 2765 PetscInt row = i + rstart; 2766 PetscInt ncols = ii[i+1] - ii[i]; 2767 const PetscInt *icols = jj + ii[i]; 2768 if (!roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2769 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2770 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2771 } else { /* block ordering does not match so we can only insert one block at a time. */ 2772 PetscInt j; 2773 for (j=0; j<ncols; j++) { 2774 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2775 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2776 } 2777 } 2778 } 2779 2780 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2781 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2782 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2783 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2784 PetscFunctionReturn(0); 2785 } 2786 2787 #undef __FUNCT__ 2788 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2789 /*@C 2790 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2791 (the default parallel PETSc format). 2792 2793 Collective on MPI_Comm 2794 2795 Input Parameters: 2796 + A - the matrix 2797 . bs - the block size 2798 . i - the indices into j for the start of each local row (starts with zero) 2799 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2800 - v - optional values in the matrix 2801 2802 Level: developer 2803 2804 Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 2805 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 2806 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2807 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2808 block column and the second index is over columns within a block. 2809 2810 .keywords: matrix, aij, compressed row, sparse, parallel 2811 2812 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ 2813 @*/ 2814 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2815 { 2816 PetscErrorCode ierr; 2817 2818 PetscFunctionBegin; 2819 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2820 PetscValidType(B,1); 2821 PetscValidLogicalCollectiveInt(B,bs,2); 2822 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2823 PetscFunctionReturn(0); 2824 } 2825 2826 #undef __FUNCT__ 2827 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2828 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2829 { 2830 Mat_MPIBAIJ *b; 2831 PetscErrorCode ierr; 2832 PetscInt i; 2833 2834 PetscFunctionBegin; 2835 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2836 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2837 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2838 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2839 2840 if (d_nnz) { 2841 for (i=0; i<B->rmap->n/bs; i++) { 2842 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2843 } 2844 } 2845 if (o_nnz) { 2846 for (i=0; i<B->rmap->n/bs; i++) { 2847 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2848 } 2849 } 2850 2851 b = (Mat_MPIBAIJ*)B->data; 2852 b->bs2 = bs*bs; 2853 b->mbs = B->rmap->n/bs; 2854 b->nbs = B->cmap->n/bs; 2855 b->Mbs = B->rmap->N/bs; 2856 b->Nbs = B->cmap->N/bs; 2857 2858 for (i=0; i<=b->size; i++) { 2859 b->rangebs[i] = B->rmap->range[i]/bs; 2860 } 2861 b->rstartbs = B->rmap->rstart/bs; 2862 b->rendbs = B->rmap->rend/bs; 2863 b->cstartbs = B->cmap->rstart/bs; 2864 b->cendbs = B->cmap->rend/bs; 2865 2866 if (!B->preallocated) { 2867 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2868 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2869 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2870 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2871 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2872 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2873 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2874 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2875 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2876 } 2877 2878 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2879 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2880 B->preallocated = PETSC_TRUE; 2881 PetscFunctionReturn(0); 2882 } 2883 2884 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2885 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2886 2887 #undef __FUNCT__ 2888 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 2889 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 2890 { 2891 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2892 PetscErrorCode ierr; 2893 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2894 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2895 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2896 2897 PetscFunctionBegin; 2898 ierr = PetscMalloc1((M+1),&ii);CHKERRQ(ierr); 2899 ii[0] = 0; 2900 for (i=0; i<M; i++) { 2901 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2902 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2903 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2904 /* remove one from count of matrix has diagonal */ 2905 for (j=id[i]; j<id[i+1]; j++) { 2906 if (jd[j] == i) {ii[i+1]--;break;} 2907 } 2908 } 2909 ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr); 2910 cnt = 0; 2911 for (i=0; i<M; i++) { 2912 for (j=io[i]; j<io[i+1]; j++) { 2913 if (garray[jo[j]] > rstart) break; 2914 jj[cnt++] = garray[jo[j]]; 2915 } 2916 for (k=id[i]; k<id[i+1]; k++) { 2917 if (jd[k] != i) { 2918 jj[cnt++] = rstart + jd[k]; 2919 } 2920 } 2921 for (; j<io[i+1]; j++) { 2922 jj[cnt++] = garray[jo[j]]; 2923 } 2924 } 2925 ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 2926 PetscFunctionReturn(0); 2927 } 2928 2929 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2930 2931 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 2932 2933 #undef __FUNCT__ 2934 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 2935 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 2936 { 2937 PetscErrorCode ierr; 2938 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2939 Mat B; 2940 Mat_MPIAIJ *b; 2941 2942 PetscFunctionBegin; 2943 if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 2944 2945 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2946 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2947 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2948 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 2949 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 2950 b = (Mat_MPIAIJ*) B->data; 2951 2952 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 2953 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2954 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 2955 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 2956 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 2957 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2958 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2959 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2960 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2961 if (reuse == MAT_REUSE_MATRIX) { 2962 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2963 } else { 2964 *newmat = B; 2965 } 2966 PetscFunctionReturn(0); 2967 } 2968 2969 #if defined(PETSC_HAVE_MUMPS) 2970 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 2971 #endif 2972 2973 /*MC 2974 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2975 2976 Options Database Keys: 2977 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2978 . -mat_block_size <bs> - set the blocksize used to store the matrix 2979 - -mat_use_hash_table <fact> 2980 2981 Level: beginner 2982 2983 .seealso: MatCreateMPIBAIJ 2984 M*/ 2985 2986 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 2987 2988 #undef __FUNCT__ 2989 #define __FUNCT__ "MatCreate_MPIBAIJ" 2990 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2991 { 2992 Mat_MPIBAIJ *b; 2993 PetscErrorCode ierr; 2994 PetscBool flg; 2995 2996 PetscFunctionBegin; 2997 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2998 B->data = (void*)b; 2999 3000 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3001 B->assembled = PETSC_FALSE; 3002 3003 B->insertmode = NOT_SET_VALUES; 3004 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 3005 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 3006 3007 /* build local table of row and column ownerships */ 3008 ierr = PetscMalloc1((b->size+1),&b->rangebs);CHKERRQ(ierr); 3009 3010 /* build cache for off array entries formed */ 3011 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 3012 3013 b->donotstash = PETSC_FALSE; 3014 b->colmap = NULL; 3015 b->garray = NULL; 3016 b->roworiented = PETSC_TRUE; 3017 3018 /* stuff used in block assembly */ 3019 b->barray = 0; 3020 3021 /* stuff used for matrix vector multiply */ 3022 b->lvec = 0; 3023 b->Mvctx = 0; 3024 3025 /* stuff for MatGetRow() */ 3026 b->rowindices = 0; 3027 b->rowvalues = 0; 3028 b->getrowactive = PETSC_FALSE; 3029 3030 /* hash table stuff */ 3031 b->ht = 0; 3032 b->hd = 0; 3033 b->ht_size = 0; 3034 b->ht_flag = PETSC_FALSE; 3035 b->ht_fact = 0; 3036 b->ht_total_ct = 0; 3037 b->ht_insert_ct = 0; 3038 3039 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3040 b->ijonly = PETSC_FALSE; 3041 3042 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3043 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr); 3044 if (flg) { 3045 PetscReal fact = 1.39; 3046 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3047 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 3048 if (fact <= 1.0) fact = 1.39; 3049 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3050 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3051 } 3052 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3053 3054 #if defined(PETSC_HAVE_MUMPS) 3055 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3056 #endif 3057 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3058 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3059 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3060 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3061 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3062 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3063 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3064 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3065 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3066 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3067 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 3068 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3069 PetscFunctionReturn(0); 3070 } 3071 3072 /*MC 3073 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3074 3075 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3076 and MATMPIBAIJ otherwise. 3077 3078 Options Database Keys: 3079 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3080 3081 Level: beginner 3082 3083 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3084 M*/ 3085 3086 #undef __FUNCT__ 3087 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3088 /*@C 3089 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3090 (block compressed row). For good matrix assembly performance 3091 the user should preallocate the matrix storage by setting the parameters 3092 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3093 performance can be increased by more than a factor of 50. 3094 3095 Collective on Mat 3096 3097 Input Parameters: 3098 + A - the matrix 3099 . bs - size of block 3100 . d_nz - number of block nonzeros per block row in diagonal portion of local 3101 submatrix (same for all local rows) 3102 . d_nnz - array containing the number of block nonzeros in the various block rows 3103 of the in diagonal portion of the local (possibly different for each block 3104 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3105 set it even if it is zero. 3106 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3107 submatrix (same for all local rows). 3108 - o_nnz - array containing the number of nonzeros in the various block rows of the 3109 off-diagonal portion of the local submatrix (possibly different for 3110 each block row) or NULL. 3111 3112 If the *_nnz parameter is given then the *_nz parameter is ignored 3113 3114 Options Database Keys: 3115 + -mat_block_size - size of the blocks to use 3116 - -mat_use_hash_table <fact> 3117 3118 Notes: 3119 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3120 than it must be used on all processors that share the object for that argument. 3121 3122 Storage Information: 3123 For a square global matrix we define each processor's diagonal portion 3124 to be its local rows and the corresponding columns (a square submatrix); 3125 each processor's off-diagonal portion encompasses the remainder of the 3126 local matrix (a rectangular submatrix). 3127 3128 The user can specify preallocated storage for the diagonal part of 3129 the local submatrix with either d_nz or d_nnz (not both). Set 3130 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3131 memory allocation. Likewise, specify preallocated storage for the 3132 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3133 3134 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3135 the figure below we depict these three local rows and all columns (0-11). 3136 3137 .vb 3138 0 1 2 3 4 5 6 7 8 9 10 11 3139 -------------------------- 3140 row 3 |o o o d d d o o o o o o 3141 row 4 |o o o d d d o o o o o o 3142 row 5 |o o o d d d o o o o o o 3143 -------------------------- 3144 .ve 3145 3146 Thus, any entries in the d locations are stored in the d (diagonal) 3147 submatrix, and any entries in the o locations are stored in the 3148 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3149 stored simply in the MATSEQBAIJ format for compressed row storage. 3150 3151 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3152 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3153 In general, for PDE problems in which most nonzeros are near the diagonal, 3154 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3155 or you will get TERRIBLE performance; see the users' manual chapter on 3156 matrices. 3157 3158 You can call MatGetInfo() to get information on how effective the preallocation was; 3159 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3160 You can also run with the option -info and look for messages with the string 3161 malloc in them to see if additional memory allocation was needed. 3162 3163 Level: intermediate 3164 3165 .keywords: matrix, block, aij, compressed row, sparse, parallel 3166 3167 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3168 @*/ 3169 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3170 { 3171 PetscErrorCode ierr; 3172 3173 PetscFunctionBegin; 3174 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3175 PetscValidType(B,1); 3176 PetscValidLogicalCollectiveInt(B,bs,2); 3177 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3178 PetscFunctionReturn(0); 3179 } 3180 3181 #undef __FUNCT__ 3182 #define __FUNCT__ "MatCreateBAIJ" 3183 /*@C 3184 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3185 (block compressed row). For good matrix assembly performance 3186 the user should preallocate the matrix storage by setting the parameters 3187 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3188 performance can be increased by more than a factor of 50. 3189 3190 Collective on MPI_Comm 3191 3192 Input Parameters: 3193 + comm - MPI communicator 3194 . bs - size of blockk 3195 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3196 This value should be the same as the local size used in creating the 3197 y vector for the matrix-vector product y = Ax. 3198 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3199 This value should be the same as the local size used in creating the 3200 x vector for the matrix-vector product y = Ax. 3201 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3202 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3203 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3204 submatrix (same for all local rows) 3205 . d_nnz - array containing the number of nonzero blocks in the various block rows 3206 of the in diagonal portion of the local (possibly different for each block 3207 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3208 and set it even if it is zero. 3209 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3210 submatrix (same for all local rows). 3211 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3212 off-diagonal portion of the local submatrix (possibly different for 3213 each block row) or NULL. 3214 3215 Output Parameter: 3216 . A - the matrix 3217 3218 Options Database Keys: 3219 + -mat_block_size - size of the blocks to use 3220 - -mat_use_hash_table <fact> 3221 3222 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3223 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3224 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3225 3226 Notes: 3227 If the *_nnz parameter is given then the *_nz parameter is ignored 3228 3229 A nonzero block is any block that as 1 or more nonzeros in it 3230 3231 The user MUST specify either the local or global matrix dimensions 3232 (possibly both). 3233 3234 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3235 than it must be used on all processors that share the object for that argument. 3236 3237 Storage Information: 3238 For a square global matrix we define each processor's diagonal portion 3239 to be its local rows and the corresponding columns (a square submatrix); 3240 each processor's off-diagonal portion encompasses the remainder of the 3241 local matrix (a rectangular submatrix). 3242 3243 The user can specify preallocated storage for the diagonal part of 3244 the local submatrix with either d_nz or d_nnz (not both). Set 3245 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3246 memory allocation. Likewise, specify preallocated storage for the 3247 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3248 3249 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3250 the figure below we depict these three local rows and all columns (0-11). 3251 3252 .vb 3253 0 1 2 3 4 5 6 7 8 9 10 11 3254 -------------------------- 3255 row 3 |o o o d d d o o o o o o 3256 row 4 |o o o d d d o o o o o o 3257 row 5 |o o o d d d o o o o o o 3258 -------------------------- 3259 .ve 3260 3261 Thus, any entries in the d locations are stored in the d (diagonal) 3262 submatrix, and any entries in the o locations are stored in the 3263 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3264 stored simply in the MATSEQBAIJ format for compressed row storage. 3265 3266 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3267 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3268 In general, for PDE problems in which most nonzeros are near the diagonal, 3269 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3270 or you will get TERRIBLE performance; see the users' manual chapter on 3271 matrices. 3272 3273 Level: intermediate 3274 3275 .keywords: matrix, block, aij, compressed row, sparse, parallel 3276 3277 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3278 @*/ 3279 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3280 { 3281 PetscErrorCode ierr; 3282 PetscMPIInt size; 3283 3284 PetscFunctionBegin; 3285 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3286 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3287 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3288 if (size > 1) { 3289 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3290 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3291 } else { 3292 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3293 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3294 } 3295 PetscFunctionReturn(0); 3296 } 3297 3298 #undef __FUNCT__ 3299 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3300 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3301 { 3302 Mat mat; 3303 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3304 PetscErrorCode ierr; 3305 PetscInt len=0; 3306 3307 PetscFunctionBegin; 3308 *newmat = 0; 3309 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3310 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3311 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3312 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3313 3314 mat->factortype = matin->factortype; 3315 mat->preallocated = PETSC_TRUE; 3316 mat->assembled = PETSC_TRUE; 3317 mat->insertmode = NOT_SET_VALUES; 3318 3319 a = (Mat_MPIBAIJ*)mat->data; 3320 mat->rmap->bs = matin->rmap->bs; 3321 a->bs2 = oldmat->bs2; 3322 a->mbs = oldmat->mbs; 3323 a->nbs = oldmat->nbs; 3324 a->Mbs = oldmat->Mbs; 3325 a->Nbs = oldmat->Nbs; 3326 3327 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3328 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3329 3330 a->size = oldmat->size; 3331 a->rank = oldmat->rank; 3332 a->donotstash = oldmat->donotstash; 3333 a->roworiented = oldmat->roworiented; 3334 a->rowindices = 0; 3335 a->rowvalues = 0; 3336 a->getrowactive = PETSC_FALSE; 3337 a->barray = 0; 3338 a->rstartbs = oldmat->rstartbs; 3339 a->rendbs = oldmat->rendbs; 3340 a->cstartbs = oldmat->cstartbs; 3341 a->cendbs = oldmat->cendbs; 3342 3343 /* hash table stuff */ 3344 a->ht = 0; 3345 a->hd = 0; 3346 a->ht_size = 0; 3347 a->ht_flag = oldmat->ht_flag; 3348 a->ht_fact = oldmat->ht_fact; 3349 a->ht_total_ct = 0; 3350 a->ht_insert_ct = 0; 3351 3352 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3353 if (oldmat->colmap) { 3354 #if defined(PETSC_USE_CTABLE) 3355 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3356 #else 3357 ierr = PetscMalloc1((a->Nbs),&a->colmap);CHKERRQ(ierr); 3358 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3359 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3360 #endif 3361 } else a->colmap = 0; 3362 3363 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3364 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 3365 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3366 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3367 } else a->garray = 0; 3368 3369 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3370 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3371 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3372 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3373 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3374 3375 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3376 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3377 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3378 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3379 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3380 *newmat = mat; 3381 PetscFunctionReturn(0); 3382 } 3383 3384 #undef __FUNCT__ 3385 #define __FUNCT__ "MatLoad_MPIBAIJ" 3386 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3387 { 3388 PetscErrorCode ierr; 3389 int fd; 3390 PetscInt i,nz,j,rstart,rend; 3391 PetscScalar *vals,*buf; 3392 MPI_Comm comm; 3393 MPI_Status status; 3394 PetscMPIInt rank,size,maxnz; 3395 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3396 PetscInt *locrowlens = NULL,*procsnz = NULL,*browners = NULL; 3397 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3398 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3399 PetscInt *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount; 3400 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3401 3402 PetscFunctionBegin; 3403 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3404 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3405 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3406 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3407 3408 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3409 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3410 if (!rank) { 3411 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3412 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3413 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3414 } 3415 3416 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3417 3418 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3419 M = header[1]; N = header[2]; 3420 3421 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3422 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3423 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3424 3425 /* If global sizes are set, check if they are consistent with that given in the file */ 3426 if (sizesset) { 3427 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3428 } 3429 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3430 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3431 3432 if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices"); 3433 3434 /* 3435 This code adds extra rows to make sure the number of rows is 3436 divisible by the blocksize 3437 */ 3438 Mbs = M/bs; 3439 extra_rows = bs - M + bs*Mbs; 3440 if (extra_rows == bs) extra_rows = 0; 3441 else Mbs++; 3442 if (extra_rows && !rank) { 3443 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3444 } 3445 3446 /* determine ownership of all rows */ 3447 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3448 mbs = Mbs/size + ((Mbs % size) > rank); 3449 m = mbs*bs; 3450 } else { /* User set */ 3451 m = newmat->rmap->n; 3452 mbs = m/bs; 3453 } 3454 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 3455 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3456 3457 /* process 0 needs enough room for process with most rows */ 3458 if (!rank) { 3459 mmax = rowners[1]; 3460 for (i=2; i<=size; i++) { 3461 mmax = PetscMax(mmax,rowners[i]); 3462 } 3463 mmax*=bs; 3464 } else mmax = -1; /* unused, but compiler warns anyway */ 3465 3466 rowners[0] = 0; 3467 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3468 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3469 rstart = rowners[rank]; 3470 rend = rowners[rank+1]; 3471 3472 /* distribute row lengths to all processors */ 3473 ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr); 3474 if (!rank) { 3475 mend = m; 3476 if (size == 1) mend = mend - extra_rows; 3477 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3478 for (j=mend; j<m; j++) locrowlens[j] = 1; 3479 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3480 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3481 for (j=0; j<m; j++) { 3482 procsnz[0] += locrowlens[j]; 3483 } 3484 for (i=1; i<size; i++) { 3485 mend = browners[i+1] - browners[i]; 3486 if (i == size-1) mend = mend - extra_rows; 3487 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3488 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3489 /* calculate the number of nonzeros on each processor */ 3490 for (j=0; j<browners[i+1]-browners[i]; j++) { 3491 procsnz[i] += rowlengths[j]; 3492 } 3493 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3494 } 3495 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3496 } else { 3497 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3498 } 3499 3500 if (!rank) { 3501 /* determine max buffer needed and allocate it */ 3502 maxnz = procsnz[0]; 3503 for (i=1; i<size; i++) { 3504 maxnz = PetscMax(maxnz,procsnz[i]); 3505 } 3506 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3507 3508 /* read in my part of the matrix column indices */ 3509 nz = procsnz[0]; 3510 ierr = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr); 3511 mycols = ibuf; 3512 if (size == 1) nz -= extra_rows; 3513 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3514 if (size == 1) { 3515 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 3516 } 3517 3518 /* read in every ones (except the last) and ship off */ 3519 for (i=1; i<size-1; i++) { 3520 nz = procsnz[i]; 3521 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3522 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3523 } 3524 /* read in the stuff for the last proc */ 3525 if (size != 1) { 3526 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3527 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3528 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3529 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3530 } 3531 ierr = PetscFree(cols);CHKERRQ(ierr); 3532 } else { 3533 /* determine buffer space needed for message */ 3534 nz = 0; 3535 for (i=0; i<m; i++) { 3536 nz += locrowlens[i]; 3537 } 3538 ierr = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr); 3539 mycols = ibuf; 3540 /* receive message of column indices*/ 3541 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3542 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3543 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3544 } 3545 3546 /* loop over local rows, determining number of off diagonal entries */ 3547 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 3548 ierr = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 3549 rowcount = 0; nzcount = 0; 3550 for (i=0; i<mbs; i++) { 3551 dcount = 0; 3552 odcount = 0; 3553 for (j=0; j<bs; j++) { 3554 kmax = locrowlens[rowcount]; 3555 for (k=0; k<kmax; k++) { 3556 tmp = mycols[nzcount++]/bs; 3557 if (!mask[tmp]) { 3558 mask[tmp] = 1; 3559 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3560 else masked1[dcount++] = tmp; 3561 } 3562 } 3563 rowcount++; 3564 } 3565 3566 dlens[i] = dcount; 3567 odlens[i] = odcount; 3568 3569 /* zero out the mask elements we set */ 3570 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3571 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3572 } 3573 3574 3575 if (!sizesset) { 3576 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3577 } 3578 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3579 3580 if (!rank) { 3581 ierr = PetscMalloc1((maxnz+1),&buf);CHKERRQ(ierr); 3582 /* read in my part of the matrix numerical values */ 3583 nz = procsnz[0]; 3584 vals = buf; 3585 mycols = ibuf; 3586 if (size == 1) nz -= extra_rows; 3587 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3588 if (size == 1) { 3589 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 3590 } 3591 3592 /* insert into matrix */ 3593 jj = rstart*bs; 3594 for (i=0; i<m; i++) { 3595 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3596 mycols += locrowlens[i]; 3597 vals += locrowlens[i]; 3598 jj++; 3599 } 3600 /* read in other processors (except the last one) and ship out */ 3601 for (i=1; i<size-1; i++) { 3602 nz = procsnz[i]; 3603 vals = buf; 3604 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3605 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3606 } 3607 /* the last proc */ 3608 if (size != 1) { 3609 nz = procsnz[i] - extra_rows; 3610 vals = buf; 3611 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3612 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3613 ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3614 } 3615 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3616 } else { 3617 /* receive numeric values */ 3618 ierr = PetscMalloc1((nz+1),&buf);CHKERRQ(ierr); 3619 3620 /* receive message of values*/ 3621 vals = buf; 3622 mycols = ibuf; 3623 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3624 3625 /* insert into matrix */ 3626 jj = rstart*bs; 3627 for (i=0; i<m; i++) { 3628 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3629 mycols += locrowlens[i]; 3630 vals += locrowlens[i]; 3631 jj++; 3632 } 3633 } 3634 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3635 ierr = PetscFree(buf);CHKERRQ(ierr); 3636 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3637 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3638 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3639 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3640 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3641 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3642 PetscFunctionReturn(0); 3643 } 3644 3645 #undef __FUNCT__ 3646 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3647 /*@ 3648 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3649 3650 Input Parameters: 3651 . mat - the matrix 3652 . fact - factor 3653 3654 Not Collective, each process can use a different factor 3655 3656 Level: advanced 3657 3658 Notes: 3659 This can also be set by the command line option: -mat_use_hash_table <fact> 3660 3661 .keywords: matrix, hashtable, factor, HT 3662 3663 .seealso: MatSetOption() 3664 @*/ 3665 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3666 { 3667 PetscErrorCode ierr; 3668 3669 PetscFunctionBegin; 3670 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3671 PetscFunctionReturn(0); 3672 } 3673 3674 #undef __FUNCT__ 3675 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3676 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3677 { 3678 Mat_MPIBAIJ *baij; 3679 3680 PetscFunctionBegin; 3681 baij = (Mat_MPIBAIJ*)mat->data; 3682 baij->ht_fact = fact; 3683 PetscFunctionReturn(0); 3684 } 3685 3686 #undef __FUNCT__ 3687 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3688 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3689 { 3690 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3691 3692 PetscFunctionBegin; 3693 *Ad = a->A; 3694 *Ao = a->B; 3695 *colmap = a->garray; 3696 PetscFunctionReturn(0); 3697 } 3698 3699 /* 3700 Special version for direct calls from Fortran (to eliminate two function call overheads 3701 */ 3702 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3703 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3704 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3705 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3706 #endif 3707 3708 #undef __FUNCT__ 3709 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3710 /*@C 3711 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3712 3713 Collective on Mat 3714 3715 Input Parameters: 3716 + mat - the matrix 3717 . min - number of input rows 3718 . im - input rows 3719 . nin - number of input columns 3720 . in - input columns 3721 . v - numerical values input 3722 - addvin - INSERT_VALUES or ADD_VALUES 3723 3724 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3725 3726 Level: advanced 3727 3728 .seealso: MatSetValuesBlocked() 3729 @*/ 3730 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3731 { 3732 /* convert input arguments to C version */ 3733 Mat mat = *matin; 3734 PetscInt m = *min, n = *nin; 3735 InsertMode addv = *addvin; 3736 3737 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3738 const MatScalar *value; 3739 MatScalar *barray = baij->barray; 3740 PetscBool roworiented = baij->roworiented; 3741 PetscErrorCode ierr; 3742 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3743 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3744 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3745 3746 PetscFunctionBegin; 3747 /* tasks normally handled by MatSetValuesBlocked() */ 3748 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3749 #if defined(PETSC_USE_DEBUG) 3750 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3751 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3752 #endif 3753 if (mat->assembled) { 3754 mat->was_assembled = PETSC_TRUE; 3755 mat->assembled = PETSC_FALSE; 3756 } 3757 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3758 3759 3760 if (!barray) { 3761 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 3762 baij->barray = barray; 3763 } 3764 3765 if (roworiented) stepval = (n-1)*bs; 3766 else stepval = (m-1)*bs; 3767 3768 for (i=0; i<m; i++) { 3769 if (im[i] < 0) continue; 3770 #if defined(PETSC_USE_DEBUG) 3771 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3772 #endif 3773 if (im[i] >= rstart && im[i] < rend) { 3774 row = im[i] - rstart; 3775 for (j=0; j<n; j++) { 3776 /* If NumCol = 1 then a copy is not required */ 3777 if ((roworiented) && (n == 1)) { 3778 barray = (MatScalar*)v + i*bs2; 3779 } else if ((!roworiented) && (m == 1)) { 3780 barray = (MatScalar*)v + j*bs2; 3781 } else { /* Here a copy is required */ 3782 if (roworiented) { 3783 value = v + i*(stepval+bs)*bs + j*bs; 3784 } else { 3785 value = v + j*(stepval+bs)*bs + i*bs; 3786 } 3787 for (ii=0; ii<bs; ii++,value+=stepval) { 3788 for (jj=0; jj<bs; jj++) { 3789 *barray++ = *value++; 3790 } 3791 } 3792 barray -=bs2; 3793 } 3794 3795 if (in[j] >= cstart && in[j] < cend) { 3796 col = in[j] - cstart; 3797 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3798 } else if (in[j] < 0) continue; 3799 #if defined(PETSC_USE_DEBUG) 3800 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3801 #endif 3802 else { 3803 if (mat->was_assembled) { 3804 if (!baij->colmap) { 3805 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3806 } 3807 3808 #if defined(PETSC_USE_DEBUG) 3809 #if defined(PETSC_USE_CTABLE) 3810 { PetscInt data; 3811 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3812 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3813 } 3814 #else 3815 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3816 #endif 3817 #endif 3818 #if defined(PETSC_USE_CTABLE) 3819 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3820 col = (col - 1)/bs; 3821 #else 3822 col = (baij->colmap[in[j]] - 1)/bs; 3823 #endif 3824 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3825 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3826 col = in[j]; 3827 } 3828 } else col = in[j]; 3829 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3830 } 3831 } 3832 } else { 3833 if (!baij->donotstash) { 3834 if (roworiented) { 3835 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3836 } else { 3837 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3838 } 3839 } 3840 } 3841 } 3842 3843 /* task normally handled by MatSetValuesBlocked() */ 3844 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3845 PetscFunctionReturn(0); 3846 } 3847 3848 #undef __FUNCT__ 3849 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 3850 /*@ 3851 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 3852 CSR format the local rows. 3853 3854 Collective on MPI_Comm 3855 3856 Input Parameters: 3857 + comm - MPI communicator 3858 . bs - the block size, only a block size of 1 is supported 3859 . m - number of local rows (Cannot be PETSC_DECIDE) 3860 . n - This value should be the same as the local size used in creating the 3861 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3862 calculated if N is given) For square matrices n is almost always m. 3863 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3864 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3865 . i - row indices 3866 . j - column indices 3867 - a - matrix values 3868 3869 Output Parameter: 3870 . mat - the matrix 3871 3872 Level: intermediate 3873 3874 Notes: 3875 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3876 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3877 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3878 3879 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3880 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3881 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3882 with column-major ordering within blocks. 3883 3884 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3885 3886 .keywords: matrix, aij, compressed row, sparse, parallel 3887 3888 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3889 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3890 @*/ 3891 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3892 { 3893 PetscErrorCode ierr; 3894 3895 PetscFunctionBegin; 3896 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3897 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3898 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3899 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3900 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3901 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3902 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3903 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3904 PetscFunctionReturn(0); 3905 } 3906