1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 #include <petscsf.h> 5 6 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat); 7 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat); 8 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []); 9 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 10 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 11 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 12 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]); 13 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec); 14 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 17 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 18 { 19 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 20 PetscErrorCode ierr; 21 PetscInt i,*idxb = 0; 22 PetscScalar *va,*vb; 23 Vec vtmp; 24 25 PetscFunctionBegin; 26 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 27 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 28 if (idx) { 29 for (i=0; i<A->rmap->n; i++) { 30 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 31 } 32 } 33 34 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 35 if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);} 36 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 37 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 38 39 for (i=0; i<A->rmap->n; i++) { 40 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 41 va[i] = vb[i]; 42 if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs); 43 } 44 } 45 46 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 47 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 48 ierr = PetscFree(idxb);CHKERRQ(ierr); 49 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 50 PetscFunctionReturn(0); 51 } 52 53 #undef __FUNCT__ 54 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 55 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 56 { 57 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 58 PetscErrorCode ierr; 59 60 PetscFunctionBegin; 61 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 62 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 68 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 69 { 70 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 75 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 76 PetscFunctionReturn(0); 77 } 78 79 /* 80 Local utility routine that creates a mapping from the global column 81 number to the local number in the off-diagonal part of the local 82 storage of the matrix. This is done in a non scalable way since the 83 length of colmap equals the global matrix length. 84 */ 85 #undef __FUNCT__ 86 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private" 87 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 88 { 89 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 90 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 91 PetscErrorCode ierr; 92 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 93 94 PetscFunctionBegin; 95 #if defined(PETSC_USE_CTABLE) 96 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 97 for (i=0; i<nbs; i++) { 98 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 99 } 100 #else 101 ierr = PetscMalloc1((baij->Nbs+1),&baij->colmap);CHKERRQ(ierr); 102 ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 103 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 104 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 105 #endif 106 PetscFunctionReturn(0); 107 } 108 109 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 110 { \ 111 \ 112 brow = row/bs; \ 113 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 114 rmax = aimax[brow]; nrow = ailen[brow]; \ 115 bcol = col/bs; \ 116 ridx = row % bs; cidx = col % bs; \ 117 low = 0; high = nrow; \ 118 while (high-low > 3) { \ 119 t = (low+high)/2; \ 120 if (rp[t] > bcol) high = t; \ 121 else low = t; \ 122 } \ 123 for (_i=low; _i<high; _i++) { \ 124 if (rp[_i] > bcol) break; \ 125 if (rp[_i] == bcol) { \ 126 bap = ap + bs2*_i + bs*cidx + ridx; \ 127 if (addv == ADD_VALUES) *bap += value; \ 128 else *bap = value; \ 129 goto a_noinsert; \ 130 } \ 131 } \ 132 if (a->nonew == 1) goto a_noinsert; \ 133 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 134 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 135 N = nrow++ - 1; \ 136 /* shift up all the later entries in this row */ \ 137 for (ii=N; ii>=_i; ii--) { \ 138 rp[ii+1] = rp[ii]; \ 139 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 140 } \ 141 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 142 rp[_i] = bcol; \ 143 ap[bs2*_i + bs*cidx + ridx] = value; \ 144 a_noinsert:; \ 145 ailen[brow] = nrow; \ 146 } 147 148 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 149 { \ 150 brow = row/bs; \ 151 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 152 rmax = bimax[brow]; nrow = bilen[brow]; \ 153 bcol = col/bs; \ 154 ridx = row % bs; cidx = col % bs; \ 155 low = 0; high = nrow; \ 156 while (high-low > 3) { \ 157 t = (low+high)/2; \ 158 if (rp[t] > bcol) high = t; \ 159 else low = t; \ 160 } \ 161 for (_i=low; _i<high; _i++) { \ 162 if (rp[_i] > bcol) break; \ 163 if (rp[_i] == bcol) { \ 164 bap = ap + bs2*_i + bs*cidx + ridx; \ 165 if (addv == ADD_VALUES) *bap += value; \ 166 else *bap = value; \ 167 goto b_noinsert; \ 168 } \ 169 } \ 170 if (b->nonew == 1) goto b_noinsert; \ 171 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 172 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 173 N = nrow++ - 1; \ 174 /* shift up all the later entries in this row */ \ 175 for (ii=N; ii>=_i; ii--) { \ 176 rp[ii+1] = rp[ii]; \ 177 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 178 } \ 179 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 180 rp[_i] = bcol; \ 181 ap[bs2*_i + bs*cidx + ridx] = value; \ 182 b_noinsert:; \ 183 bilen[brow] = nrow; \ 184 } 185 186 #undef __FUNCT__ 187 #define __FUNCT__ "MatSetValues_MPIBAIJ" 188 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 189 { 190 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 191 MatScalar value; 192 PetscBool roworiented = baij->roworiented; 193 PetscErrorCode ierr; 194 PetscInt i,j,row,col; 195 PetscInt rstart_orig=mat->rmap->rstart; 196 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 197 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 198 199 /* Some Variables required in the macro */ 200 Mat A = baij->A; 201 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 202 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 203 MatScalar *aa =a->a; 204 205 Mat B = baij->B; 206 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 207 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 208 MatScalar *ba =b->a; 209 210 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 211 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 212 MatScalar *ap,*bap; 213 214 PetscFunctionBegin; 215 for (i=0; i<m; i++) { 216 if (im[i] < 0) continue; 217 #if defined(PETSC_USE_DEBUG) 218 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 219 #endif 220 if (im[i] >= rstart_orig && im[i] < rend_orig) { 221 row = im[i] - rstart_orig; 222 for (j=0; j<n; j++) { 223 if (in[j] >= cstart_orig && in[j] < cend_orig) { 224 col = in[j] - cstart_orig; 225 if (roworiented) value = v[i*n+j]; 226 else value = v[i+j*m]; 227 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 228 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 229 } else if (in[j] < 0) continue; 230 #if defined(PETSC_USE_DEBUG) 231 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 232 #endif 233 else { 234 if (mat->was_assembled) { 235 if (!baij->colmap) { 236 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 237 } 238 #if defined(PETSC_USE_CTABLE) 239 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 240 col = col - 1; 241 #else 242 col = baij->colmap[in[j]/bs] - 1; 243 #endif 244 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 245 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 246 col = in[j]; 247 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 248 B = baij->B; 249 b = (Mat_SeqBAIJ*)(B)->data; 250 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 251 ba =b->a; 252 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 253 else col += in[j]%bs; 254 } else col = in[j]; 255 if (roworiented) value = v[i*n+j]; 256 else value = v[i+j*m]; 257 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 258 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 259 } 260 } 261 } else { 262 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 263 if (!baij->donotstash) { 264 mat->assembled = PETSC_FALSE; 265 if (roworiented) { 266 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 267 } else { 268 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 269 } 270 } 271 } 272 } 273 PetscFunctionReturn(0); 274 } 275 276 #undef __FUNCT__ 277 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 278 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 279 { 280 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 281 const PetscScalar *value; 282 MatScalar *barray = baij->barray; 283 PetscBool roworiented = baij->roworiented; 284 PetscErrorCode ierr; 285 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 286 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 287 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 288 289 PetscFunctionBegin; 290 if (!barray) { 291 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 292 baij->barray = barray; 293 } 294 295 if (roworiented) stepval = (n-1)*bs; 296 else stepval = (m-1)*bs; 297 298 for (i=0; i<m; i++) { 299 if (im[i] < 0) continue; 300 #if defined(PETSC_USE_DEBUG) 301 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 302 #endif 303 if (im[i] >= rstart && im[i] < rend) { 304 row = im[i] - rstart; 305 for (j=0; j<n; j++) { 306 /* If NumCol = 1 then a copy is not required */ 307 if ((roworiented) && (n == 1)) { 308 barray = (MatScalar*)v + i*bs2; 309 } else if ((!roworiented) && (m == 1)) { 310 barray = (MatScalar*)v + j*bs2; 311 } else { /* Here a copy is required */ 312 if (roworiented) { 313 value = v + (i*(stepval+bs) + j)*bs; 314 } else { 315 value = v + (j*(stepval+bs) + i)*bs; 316 } 317 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 318 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 319 barray += bs; 320 } 321 barray -= bs2; 322 } 323 324 if (in[j] >= cstart && in[j] < cend) { 325 col = in[j] - cstart; 326 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 327 } else if (in[j] < 0) continue; 328 #if defined(PETSC_USE_DEBUG) 329 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 330 #endif 331 else { 332 if (mat->was_assembled) { 333 if (!baij->colmap) { 334 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 335 } 336 337 #if defined(PETSC_USE_DEBUG) 338 #if defined(PETSC_USE_CTABLE) 339 { PetscInt data; 340 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 341 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 342 } 343 #else 344 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 345 #endif 346 #endif 347 #if defined(PETSC_USE_CTABLE) 348 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 349 col = (col - 1)/bs; 350 #else 351 col = (baij->colmap[in[j]] - 1)/bs; 352 #endif 353 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 354 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 355 col = in[j]; 356 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]); 357 } else col = in[j]; 358 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 359 } 360 } 361 } else { 362 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 363 if (!baij->donotstash) { 364 if (roworiented) { 365 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 366 } else { 367 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 368 } 369 } 370 } 371 } 372 PetscFunctionReturn(0); 373 } 374 375 #define HASH_KEY 0.6180339887 376 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 377 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 378 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 379 #undef __FUNCT__ 380 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 381 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 382 { 383 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 384 PetscBool roworiented = baij->roworiented; 385 PetscErrorCode ierr; 386 PetscInt i,j,row,col; 387 PetscInt rstart_orig=mat->rmap->rstart; 388 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 389 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 390 PetscReal tmp; 391 MatScalar **HD = baij->hd,value; 392 #if defined(PETSC_USE_DEBUG) 393 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 394 #endif 395 396 PetscFunctionBegin; 397 for (i=0; i<m; i++) { 398 #if defined(PETSC_USE_DEBUG) 399 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 400 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 401 #endif 402 row = im[i]; 403 if (row >= rstart_orig && row < rend_orig) { 404 for (j=0; j<n; j++) { 405 col = in[j]; 406 if (roworiented) value = v[i*n+j]; 407 else value = v[i+j*m]; 408 /* Look up PetscInto the Hash Table */ 409 key = (row/bs)*Nbs+(col/bs)+1; 410 h1 = HASH(size,key,tmp); 411 412 413 idx = h1; 414 #if defined(PETSC_USE_DEBUG) 415 insert_ct++; 416 total_ct++; 417 if (HT[idx] != key) { 418 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 419 if (idx == size) { 420 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 421 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 422 } 423 } 424 #else 425 if (HT[idx] != key) { 426 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 427 if (idx == size) { 428 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 429 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 430 } 431 } 432 #endif 433 /* A HASH table entry is found, so insert the values at the correct address */ 434 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 435 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 436 } 437 } else if (!baij->donotstash) { 438 if (roworiented) { 439 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 440 } else { 441 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 442 } 443 } 444 } 445 #if defined(PETSC_USE_DEBUG) 446 baij->ht_total_ct = total_ct; 447 baij->ht_insert_ct = insert_ct; 448 #endif 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 454 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 455 { 456 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 457 PetscBool roworiented = baij->roworiented; 458 PetscErrorCode ierr; 459 PetscInt i,j,ii,jj,row,col; 460 PetscInt rstart=baij->rstartbs; 461 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 462 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 463 PetscReal tmp; 464 MatScalar **HD = baij->hd,*baij_a; 465 const PetscScalar *v_t,*value; 466 #if defined(PETSC_USE_DEBUG) 467 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 468 #endif 469 470 PetscFunctionBegin; 471 if (roworiented) stepval = (n-1)*bs; 472 else stepval = (m-1)*bs; 473 474 for (i=0; i<m; i++) { 475 #if defined(PETSC_USE_DEBUG) 476 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 477 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 478 #endif 479 row = im[i]; 480 v_t = v + i*nbs2; 481 if (row >= rstart && row < rend) { 482 for (j=0; j<n; j++) { 483 col = in[j]; 484 485 /* Look up into the Hash Table */ 486 key = row*Nbs+col+1; 487 h1 = HASH(size,key,tmp); 488 489 idx = h1; 490 #if defined(PETSC_USE_DEBUG) 491 total_ct++; 492 insert_ct++; 493 if (HT[idx] != key) { 494 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 495 if (idx == size) { 496 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 497 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 498 } 499 } 500 #else 501 if (HT[idx] != key) { 502 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 503 if (idx == size) { 504 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 505 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 506 } 507 } 508 #endif 509 baij_a = HD[idx]; 510 if (roworiented) { 511 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 512 /* value = v + (i*(stepval+bs)+j)*bs; */ 513 value = v_t; 514 v_t += bs; 515 if (addv == ADD_VALUES) { 516 for (ii=0; ii<bs; ii++,value+=stepval) { 517 for (jj=ii; jj<bs2; jj+=bs) { 518 baij_a[jj] += *value++; 519 } 520 } 521 } else { 522 for (ii=0; ii<bs; ii++,value+=stepval) { 523 for (jj=ii; jj<bs2; jj+=bs) { 524 baij_a[jj] = *value++; 525 } 526 } 527 } 528 } else { 529 value = v + j*(stepval+bs)*bs + i*bs; 530 if (addv == ADD_VALUES) { 531 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 532 for (jj=0; jj<bs; jj++) { 533 baij_a[jj] += *value++; 534 } 535 } 536 } else { 537 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 538 for (jj=0; jj<bs; jj++) { 539 baij_a[jj] = *value++; 540 } 541 } 542 } 543 } 544 } 545 } else { 546 if (!baij->donotstash) { 547 if (roworiented) { 548 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 549 } else { 550 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 551 } 552 } 553 } 554 } 555 #if defined(PETSC_USE_DEBUG) 556 baij->ht_total_ct = total_ct; 557 baij->ht_insert_ct = insert_ct; 558 #endif 559 PetscFunctionReturn(0); 560 } 561 562 #undef __FUNCT__ 563 #define __FUNCT__ "MatGetValues_MPIBAIJ" 564 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 565 { 566 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 567 PetscErrorCode ierr; 568 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 569 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 570 571 PetscFunctionBegin; 572 for (i=0; i<m; i++) { 573 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 574 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 575 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 576 row = idxm[i] - bsrstart; 577 for (j=0; j<n; j++) { 578 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 579 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 580 if (idxn[j] >= bscstart && idxn[j] < bscend) { 581 col = idxn[j] - bscstart; 582 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 583 } else { 584 if (!baij->colmap) { 585 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 586 } 587 #if defined(PETSC_USE_CTABLE) 588 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 589 data--; 590 #else 591 data = baij->colmap[idxn[j]/bs]-1; 592 #endif 593 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 594 else { 595 col = data + idxn[j]%bs; 596 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 597 } 598 } 599 } 600 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 601 } 602 PetscFunctionReturn(0); 603 } 604 605 #undef __FUNCT__ 606 #define __FUNCT__ "MatNorm_MPIBAIJ" 607 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 608 { 609 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 610 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 611 PetscErrorCode ierr; 612 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 613 PetscReal sum = 0.0; 614 MatScalar *v; 615 616 PetscFunctionBegin; 617 if (baij->size == 1) { 618 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 619 } else { 620 if (type == NORM_FROBENIUS) { 621 v = amat->a; 622 nz = amat->nz*bs2; 623 for (i=0; i<nz; i++) { 624 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 625 } 626 v = bmat->a; 627 nz = bmat->nz*bs2; 628 for (i=0; i<nz; i++) { 629 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 630 } 631 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 632 *nrm = PetscSqrtReal(*nrm); 633 } else if (type == NORM_1) { /* max column sum */ 634 PetscReal *tmp,*tmp2; 635 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 636 ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr); 637 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 638 v = amat->a; jj = amat->j; 639 for (i=0; i<amat->nz; i++) { 640 for (j=0; j<bs; j++) { 641 col = bs*(cstart + *jj) + j; /* column index */ 642 for (row=0; row<bs; row++) { 643 tmp[col] += PetscAbsScalar(*v); v++; 644 } 645 } 646 jj++; 647 } 648 v = bmat->a; jj = bmat->j; 649 for (i=0; i<bmat->nz; i++) { 650 for (j=0; j<bs; j++) { 651 col = bs*garray[*jj] + j; 652 for (row=0; row<bs; row++) { 653 tmp[col] += PetscAbsScalar(*v); v++; 654 } 655 } 656 jj++; 657 } 658 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 659 *nrm = 0.0; 660 for (j=0; j<mat->cmap->N; j++) { 661 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 662 } 663 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 664 } else if (type == NORM_INFINITY) { /* max row sum */ 665 PetscReal *sums; 666 ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr); 667 sum = 0.0; 668 for (j=0; j<amat->mbs; j++) { 669 for (row=0; row<bs; row++) sums[row] = 0.0; 670 v = amat->a + bs2*amat->i[j]; 671 nz = amat->i[j+1]-amat->i[j]; 672 for (i=0; i<nz; i++) { 673 for (col=0; col<bs; col++) { 674 for (row=0; row<bs; row++) { 675 sums[row] += PetscAbsScalar(*v); v++; 676 } 677 } 678 } 679 v = bmat->a + bs2*bmat->i[j]; 680 nz = bmat->i[j+1]-bmat->i[j]; 681 for (i=0; i<nz; i++) { 682 for (col=0; col<bs; col++) { 683 for (row=0; row<bs; row++) { 684 sums[row] += PetscAbsScalar(*v); v++; 685 } 686 } 687 } 688 for (row=0; row<bs; row++) { 689 if (sums[row] > sum) sum = sums[row]; 690 } 691 } 692 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 693 ierr = PetscFree(sums);CHKERRQ(ierr); 694 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 695 } 696 PetscFunctionReturn(0); 697 } 698 699 /* 700 Creates the hash table, and sets the table 701 This table is created only once. 702 If new entried need to be added to the matrix 703 then the hash table has to be destroyed and 704 recreated. 705 */ 706 #undef __FUNCT__ 707 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 708 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 709 { 710 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 711 Mat A = baij->A,B=baij->B; 712 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 713 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 714 PetscErrorCode ierr; 715 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 716 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 717 PetscInt *HT,key; 718 MatScalar **HD; 719 PetscReal tmp; 720 #if defined(PETSC_USE_INFO) 721 PetscInt ct=0,max=0; 722 #endif 723 724 PetscFunctionBegin; 725 if (baij->ht) PetscFunctionReturn(0); 726 727 baij->ht_size = (PetscInt)(factor*nz); 728 ht_size = baij->ht_size; 729 730 /* Allocate Memory for Hash Table */ 731 ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr); 732 HD = baij->hd; 733 HT = baij->ht; 734 735 /* Loop Over A */ 736 for (i=0; i<a->mbs; i++) { 737 for (j=ai[i]; j<ai[i+1]; j++) { 738 row = i+rstart; 739 col = aj[j]+cstart; 740 741 key = row*Nbs + col + 1; 742 h1 = HASH(ht_size,key,tmp); 743 for (k=0; k<ht_size; k++) { 744 if (!HT[(h1+k)%ht_size]) { 745 HT[(h1+k)%ht_size] = key; 746 HD[(h1+k)%ht_size] = a->a + j*bs2; 747 break; 748 #if defined(PETSC_USE_INFO) 749 } else { 750 ct++; 751 #endif 752 } 753 } 754 #if defined(PETSC_USE_INFO) 755 if (k> max) max = k; 756 #endif 757 } 758 } 759 /* Loop Over B */ 760 for (i=0; i<b->mbs; i++) { 761 for (j=bi[i]; j<bi[i+1]; j++) { 762 row = i+rstart; 763 col = garray[bj[j]]; 764 key = row*Nbs + col + 1; 765 h1 = HASH(ht_size,key,tmp); 766 for (k=0; k<ht_size; k++) { 767 if (!HT[(h1+k)%ht_size]) { 768 HT[(h1+k)%ht_size] = key; 769 HD[(h1+k)%ht_size] = b->a + j*bs2; 770 break; 771 #if defined(PETSC_USE_INFO) 772 } else { 773 ct++; 774 #endif 775 } 776 } 777 #if defined(PETSC_USE_INFO) 778 if (k> max) max = k; 779 #endif 780 } 781 } 782 783 /* Print Summary */ 784 #if defined(PETSC_USE_INFO) 785 for (i=0,j=0; i<ht_size; i++) { 786 if (HT[i]) j++; 787 } 788 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 789 #endif 790 PetscFunctionReturn(0); 791 } 792 793 #undef __FUNCT__ 794 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 795 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 796 { 797 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 798 PetscErrorCode ierr; 799 PetscInt nstash,reallocs; 800 InsertMode addv; 801 802 PetscFunctionBegin; 803 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 804 805 /* make sure all processors are either in INSERTMODE or ADDMODE */ 806 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 807 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 808 mat->insertmode = addv; /* in case this processor had no cache */ 809 810 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 811 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 812 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 813 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 814 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 815 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 816 PetscFunctionReturn(0); 817 } 818 819 #undef __FUNCT__ 820 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 821 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 822 { 823 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 824 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 825 PetscErrorCode ierr; 826 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 827 PetscInt *row,*col; 828 PetscBool r1,r2,r3,other_disassembled; 829 MatScalar *val; 830 InsertMode addv = mat->insertmode; 831 PetscMPIInt n; 832 833 PetscFunctionBegin; 834 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 835 if (!baij->donotstash && !mat->nooffprocentries) { 836 while (1) { 837 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 838 if (!flg) break; 839 840 for (i=0; i<n;) { 841 /* Now identify the consecutive vals belonging to the same row */ 842 for (j=i,rstart=row[j]; j<n; j++) { 843 if (row[j] != rstart) break; 844 } 845 if (j < n) ncols = j-i; 846 else ncols = n-i; 847 /* Now assemble all these values with a single function call */ 848 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 849 i = j; 850 } 851 } 852 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 853 /* Now process the block-stash. Since the values are stashed column-oriented, 854 set the roworiented flag to column oriented, and after MatSetValues() 855 restore the original flags */ 856 r1 = baij->roworiented; 857 r2 = a->roworiented; 858 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 859 860 baij->roworiented = PETSC_FALSE; 861 a->roworiented = PETSC_FALSE; 862 863 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 864 while (1) { 865 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 866 if (!flg) break; 867 868 for (i=0; i<n;) { 869 /* Now identify the consecutive vals belonging to the same row */ 870 for (j=i,rstart=row[j]; j<n; j++) { 871 if (row[j] != rstart) break; 872 } 873 if (j < n) ncols = j-i; 874 else ncols = n-i; 875 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 876 i = j; 877 } 878 } 879 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 880 881 baij->roworiented = r1; 882 a->roworiented = r2; 883 884 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 885 } 886 887 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 888 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 889 890 /* determine if any processor has disassembled, if so we must 891 also disassemble ourselfs, in order that we may reassemble. */ 892 /* 893 if nonzero structure of submatrix B cannot change then we know that 894 no processor disassembled thus we can skip this stuff 895 */ 896 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 897 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 898 if (mat->was_assembled && !other_disassembled) { 899 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 900 } 901 } 902 903 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 904 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 905 } 906 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 907 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 908 909 #if defined(PETSC_USE_INFO) 910 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 911 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 912 913 baij->ht_total_ct = 0; 914 baij->ht_insert_ct = 0; 915 } 916 #endif 917 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 918 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 919 920 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 921 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 922 } 923 924 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 925 926 baij->rowvalues = 0; 927 928 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 929 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 930 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 931 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 932 } 933 PetscFunctionReturn(0); 934 } 935 936 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer); 937 #include <petscdraw.h> 938 #undef __FUNCT__ 939 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 940 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 941 { 942 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 943 PetscErrorCode ierr; 944 PetscMPIInt rank = baij->rank; 945 PetscInt bs = mat->rmap->bs; 946 PetscBool iascii,isdraw; 947 PetscViewer sviewer; 948 PetscViewerFormat format; 949 950 PetscFunctionBegin; 951 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 952 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 953 if (iascii) { 954 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 955 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 956 MatInfo info; 957 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 958 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 959 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 960 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 961 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 962 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 963 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 964 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 965 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 966 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 967 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 968 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 969 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 970 PetscFunctionReturn(0); 971 } else if (format == PETSC_VIEWER_ASCII_INFO) { 972 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 973 PetscFunctionReturn(0); 974 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 975 PetscFunctionReturn(0); 976 } 977 } 978 979 if (isdraw) { 980 PetscDraw draw; 981 PetscBool isnull; 982 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 983 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 984 } 985 986 { 987 /* assemble the entire matrix onto first processor. */ 988 Mat A; 989 Mat_SeqBAIJ *Aloc; 990 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 991 MatScalar *a; 992 993 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 994 /* Perhaps this should be the type of mat? */ 995 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 996 if (!rank) { 997 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 998 } else { 999 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1000 } 1001 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1002 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 1003 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1004 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1005 1006 /* copy over the A part */ 1007 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1008 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1009 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1010 1011 for (i=0; i<mbs; i++) { 1012 rvals[0] = bs*(baij->rstartbs + i); 1013 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1014 for (j=ai[i]; j<ai[i+1]; j++) { 1015 col = (baij->cstartbs+aj[j])*bs; 1016 for (k=0; k<bs; k++) { 1017 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1018 col++; a += bs; 1019 } 1020 } 1021 } 1022 /* copy over the B part */ 1023 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1024 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1025 for (i=0; i<mbs; i++) { 1026 rvals[0] = bs*(baij->rstartbs + i); 1027 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1028 for (j=ai[i]; j<ai[i+1]; j++) { 1029 col = baij->garray[aj[j]]*bs; 1030 for (k=0; k<bs; k++) { 1031 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1032 col++; a += bs; 1033 } 1034 } 1035 } 1036 ierr = PetscFree(rvals);CHKERRQ(ierr); 1037 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1038 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1039 /* 1040 Everyone has to call to draw the matrix since the graphics waits are 1041 synchronized across all processors that share the PetscDraw object 1042 */ 1043 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1044 if (!rank) { 1045 ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1046 } 1047 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1048 ierr = MatDestroy(&A);CHKERRQ(ierr); 1049 } 1050 PetscFunctionReturn(0); 1051 } 1052 1053 #undef __FUNCT__ 1054 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1055 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1056 { 1057 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1058 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)a->A->data; 1059 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 1060 PetscErrorCode ierr; 1061 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1062 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1063 int fd; 1064 PetscScalar *column_values; 1065 FILE *file; 1066 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1067 PetscInt message_count,flowcontrolcount; 1068 1069 PetscFunctionBegin; 1070 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1071 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1072 nz = bs2*(A->nz + B->nz); 1073 rlen = mat->rmap->n; 1074 if (!rank) { 1075 header[0] = MAT_FILE_CLASSID; 1076 header[1] = mat->rmap->N; 1077 header[2] = mat->cmap->N; 1078 1079 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1080 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1081 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1082 /* get largest number of rows any processor has */ 1083 range = mat->rmap->range; 1084 for (i=1; i<size; i++) { 1085 rlen = PetscMax(rlen,range[i+1] - range[i]); 1086 } 1087 } else { 1088 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1089 } 1090 1091 ierr = PetscMalloc1((rlen/bs),&crow_lens);CHKERRQ(ierr); 1092 /* compute lengths of each row */ 1093 for (i=0; i<a->mbs; i++) { 1094 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1095 } 1096 /* store the row lengths to the file */ 1097 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1098 if (!rank) { 1099 MPI_Status status; 1100 ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr); 1101 rlen = (range[1] - range[0])/bs; 1102 for (i=0; i<rlen; i++) { 1103 for (j=0; j<bs; j++) { 1104 row_lens[i*bs+j] = bs*crow_lens[i]; 1105 } 1106 } 1107 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1108 for (i=1; i<size; i++) { 1109 rlen = (range[i+1] - range[i])/bs; 1110 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1111 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1112 for (k=0; k<rlen; k++) { 1113 for (j=0; j<bs; j++) { 1114 row_lens[k*bs+j] = bs*crow_lens[k]; 1115 } 1116 } 1117 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1118 } 1119 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1120 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1121 } else { 1122 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1123 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1124 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1125 } 1126 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1127 1128 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1129 information needed to make it for each row from a block row. This does require more communication but still not more than 1130 the communication needed for the nonzero values */ 1131 nzmax = nz; /* space a largest processor needs */ 1132 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1133 ierr = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr); 1134 cnt = 0; 1135 for (i=0; i<a->mbs; i++) { 1136 pcnt = cnt; 1137 for (j=B->i[i]; j<B->i[i+1]; j++) { 1138 if ((col = garray[B->j[j]]) > cstart) break; 1139 for (l=0; l<bs; l++) { 1140 column_indices[cnt++] = bs*col+l; 1141 } 1142 } 1143 for (k=A->i[i]; k<A->i[i+1]; k++) { 1144 for (l=0; l<bs; l++) { 1145 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1146 } 1147 } 1148 for (; j<B->i[i+1]; j++) { 1149 for (l=0; l<bs; l++) { 1150 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1151 } 1152 } 1153 len = cnt - pcnt; 1154 for (k=1; k<bs; k++) { 1155 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1156 cnt += len; 1157 } 1158 } 1159 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1160 1161 /* store the columns to the file */ 1162 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1163 if (!rank) { 1164 MPI_Status status; 1165 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1166 for (i=1; i<size; i++) { 1167 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1168 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1169 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1170 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1171 } 1172 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1173 } else { 1174 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1175 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1176 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1177 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1178 } 1179 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1180 1181 /* load up the numerical values */ 1182 ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr); 1183 cnt = 0; 1184 for (i=0; i<a->mbs; i++) { 1185 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1186 for (j=B->i[i]; j<B->i[i+1]; j++) { 1187 if (garray[B->j[j]] > cstart) break; 1188 for (l=0; l<bs; l++) { 1189 for (ll=0; ll<bs; ll++) { 1190 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1191 } 1192 } 1193 cnt += bs; 1194 } 1195 for (k=A->i[i]; k<A->i[i+1]; k++) { 1196 for (l=0; l<bs; l++) { 1197 for (ll=0; ll<bs; ll++) { 1198 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1199 } 1200 } 1201 cnt += bs; 1202 } 1203 for (; j<B->i[i+1]; j++) { 1204 for (l=0; l<bs; l++) { 1205 for (ll=0; ll<bs; ll++) { 1206 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1207 } 1208 } 1209 cnt += bs; 1210 } 1211 cnt += (bs-1)*rlen; 1212 } 1213 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1214 1215 /* store the column values to the file */ 1216 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1217 if (!rank) { 1218 MPI_Status status; 1219 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1220 for (i=1; i<size; i++) { 1221 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1222 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1223 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1224 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1225 } 1226 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1227 } else { 1228 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1229 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1230 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1231 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1232 } 1233 ierr = PetscFree(column_values);CHKERRQ(ierr); 1234 1235 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1236 if (file) { 1237 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1238 } 1239 PetscFunctionReturn(0); 1240 } 1241 1242 #undef __FUNCT__ 1243 #define __FUNCT__ "MatView_MPIBAIJ" 1244 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1245 { 1246 PetscErrorCode ierr; 1247 PetscBool iascii,isdraw,issocket,isbinary; 1248 1249 PetscFunctionBegin; 1250 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1251 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1252 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1253 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1254 if (iascii || isdraw || issocket) { 1255 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1256 } else if (isbinary) { 1257 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1258 } 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1264 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1265 { 1266 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1267 PetscErrorCode ierr; 1268 1269 PetscFunctionBegin; 1270 #if defined(PETSC_USE_LOG) 1271 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1272 #endif 1273 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1274 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1275 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1276 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1277 #if defined(PETSC_USE_CTABLE) 1278 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1279 #else 1280 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1281 #endif 1282 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1283 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1284 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1285 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1286 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1287 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1288 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1289 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1290 1291 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1292 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1293 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1294 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr); 1301 PetscFunctionReturn(0); 1302 } 1303 1304 #undef __FUNCT__ 1305 #define __FUNCT__ "MatMult_MPIBAIJ" 1306 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1307 { 1308 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1309 PetscErrorCode ierr; 1310 PetscInt nt; 1311 1312 PetscFunctionBegin; 1313 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1314 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1315 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1316 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1317 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1318 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1319 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1320 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1321 PetscFunctionReturn(0); 1322 } 1323 1324 #undef __FUNCT__ 1325 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1326 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1327 { 1328 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1329 PetscErrorCode ierr; 1330 1331 PetscFunctionBegin; 1332 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1333 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1334 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1335 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1336 PetscFunctionReturn(0); 1337 } 1338 1339 #undef __FUNCT__ 1340 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1341 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1342 { 1343 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1344 PetscErrorCode ierr; 1345 PetscBool merged; 1346 1347 PetscFunctionBegin; 1348 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1349 /* do nondiagonal part */ 1350 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1351 if (!merged) { 1352 /* send it on its way */ 1353 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1354 /* do local part */ 1355 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1356 /* receive remote parts: note this assumes the values are not actually */ 1357 /* inserted in yy until the next line */ 1358 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1359 } else { 1360 /* do local part */ 1361 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1362 /* send it on its way */ 1363 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1364 /* values actually were received in the Begin() but we need to call this nop */ 1365 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1366 } 1367 PetscFunctionReturn(0); 1368 } 1369 1370 #undef __FUNCT__ 1371 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1372 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1373 { 1374 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1375 PetscErrorCode ierr; 1376 1377 PetscFunctionBegin; 1378 /* do nondiagonal part */ 1379 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1380 /* send it on its way */ 1381 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1382 /* do local part */ 1383 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1384 /* receive remote parts: note this assumes the values are not actually */ 1385 /* inserted in yy until the next line, which is true for my implementation*/ 1386 /* but is not perhaps always true. */ 1387 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1388 PetscFunctionReturn(0); 1389 } 1390 1391 /* 1392 This only works correctly for square matrices where the subblock A->A is the 1393 diagonal block 1394 */ 1395 #undef __FUNCT__ 1396 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1397 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1398 { 1399 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1400 PetscErrorCode ierr; 1401 1402 PetscFunctionBegin; 1403 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1404 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1405 PetscFunctionReturn(0); 1406 } 1407 1408 #undef __FUNCT__ 1409 #define __FUNCT__ "MatScale_MPIBAIJ" 1410 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1411 { 1412 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1413 PetscErrorCode ierr; 1414 1415 PetscFunctionBegin; 1416 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1417 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1418 PetscFunctionReturn(0); 1419 } 1420 1421 #undef __FUNCT__ 1422 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1423 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1424 { 1425 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1426 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1427 PetscErrorCode ierr; 1428 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1429 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1430 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1431 1432 PetscFunctionBegin; 1433 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1434 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1435 mat->getrowactive = PETSC_TRUE; 1436 1437 if (!mat->rowvalues && (idx || v)) { 1438 /* 1439 allocate enough space to hold information from the longest row. 1440 */ 1441 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1442 PetscInt max = 1,mbs = mat->mbs,tmp; 1443 for (i=0; i<mbs; i++) { 1444 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1445 if (max < tmp) max = tmp; 1446 } 1447 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1448 } 1449 lrow = row - brstart; 1450 1451 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1452 if (!v) {pvA = 0; pvB = 0;} 1453 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1454 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1455 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1456 nztot = nzA + nzB; 1457 1458 cmap = mat->garray; 1459 if (v || idx) { 1460 if (nztot) { 1461 /* Sort by increasing column numbers, assuming A and B already sorted */ 1462 PetscInt imark = -1; 1463 if (v) { 1464 *v = v_p = mat->rowvalues; 1465 for (i=0; i<nzB; i++) { 1466 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1467 else break; 1468 } 1469 imark = i; 1470 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1471 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1472 } 1473 if (idx) { 1474 *idx = idx_p = mat->rowindices; 1475 if (imark > -1) { 1476 for (i=0; i<imark; i++) { 1477 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1478 } 1479 } else { 1480 for (i=0; i<nzB; i++) { 1481 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1482 else break; 1483 } 1484 imark = i; 1485 } 1486 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1487 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1488 } 1489 } else { 1490 if (idx) *idx = 0; 1491 if (v) *v = 0; 1492 } 1493 } 1494 *nz = nztot; 1495 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1496 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1497 PetscFunctionReturn(0); 1498 } 1499 1500 #undef __FUNCT__ 1501 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1502 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1503 { 1504 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1505 1506 PetscFunctionBegin; 1507 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1508 baij->getrowactive = PETSC_FALSE; 1509 PetscFunctionReturn(0); 1510 } 1511 1512 #undef __FUNCT__ 1513 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1514 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1515 { 1516 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1517 PetscErrorCode ierr; 1518 1519 PetscFunctionBegin; 1520 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1521 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1522 PetscFunctionReturn(0); 1523 } 1524 1525 #undef __FUNCT__ 1526 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1527 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1528 { 1529 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1530 Mat A = a->A,B = a->B; 1531 PetscErrorCode ierr; 1532 PetscReal isend[5],irecv[5]; 1533 1534 PetscFunctionBegin; 1535 info->block_size = (PetscReal)matin->rmap->bs; 1536 1537 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1538 1539 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1540 isend[3] = info->memory; isend[4] = info->mallocs; 1541 1542 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1543 1544 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1545 isend[3] += info->memory; isend[4] += info->mallocs; 1546 1547 if (flag == MAT_LOCAL) { 1548 info->nz_used = isend[0]; 1549 info->nz_allocated = isend[1]; 1550 info->nz_unneeded = isend[2]; 1551 info->memory = isend[3]; 1552 info->mallocs = isend[4]; 1553 } else if (flag == MAT_GLOBAL_MAX) { 1554 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1555 1556 info->nz_used = irecv[0]; 1557 info->nz_allocated = irecv[1]; 1558 info->nz_unneeded = irecv[2]; 1559 info->memory = irecv[3]; 1560 info->mallocs = irecv[4]; 1561 } else if (flag == MAT_GLOBAL_SUM) { 1562 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1563 1564 info->nz_used = irecv[0]; 1565 info->nz_allocated = irecv[1]; 1566 info->nz_unneeded = irecv[2]; 1567 info->memory = irecv[3]; 1568 info->mallocs = irecv[4]; 1569 } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1570 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1571 info->fill_ratio_needed = 0; 1572 info->factor_mallocs = 0; 1573 PetscFunctionReturn(0); 1574 } 1575 1576 #undef __FUNCT__ 1577 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1578 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1579 { 1580 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1581 PetscErrorCode ierr; 1582 1583 PetscFunctionBegin; 1584 switch (op) { 1585 case MAT_NEW_NONZERO_LOCATIONS: 1586 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1587 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1588 case MAT_KEEP_NONZERO_PATTERN: 1589 case MAT_NEW_NONZERO_LOCATION_ERR: 1590 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1591 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1592 break; 1593 case MAT_ROW_ORIENTED: 1594 a->roworiented = flg; 1595 1596 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1597 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1598 break; 1599 case MAT_NEW_DIAGONALS: 1600 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1601 break; 1602 case MAT_IGNORE_OFF_PROC_ENTRIES: 1603 a->donotstash = flg; 1604 break; 1605 case MAT_USE_HASH_TABLE: 1606 a->ht_flag = flg; 1607 break; 1608 case MAT_SYMMETRIC: 1609 case MAT_STRUCTURALLY_SYMMETRIC: 1610 case MAT_HERMITIAN: 1611 case MAT_SYMMETRY_ETERNAL: 1612 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1613 break; 1614 default: 1615 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1616 } 1617 PetscFunctionReturn(0); 1618 } 1619 1620 #undef __FUNCT__ 1621 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1622 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1623 { 1624 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1625 Mat_SeqBAIJ *Aloc; 1626 Mat B; 1627 PetscErrorCode ierr; 1628 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1629 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1630 MatScalar *a; 1631 1632 PetscFunctionBegin; 1633 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1634 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1635 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1636 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1637 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1638 /* Do not know preallocation information, but must set block size */ 1639 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1640 } else { 1641 B = *matout; 1642 } 1643 1644 /* copy over the A part */ 1645 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1646 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1647 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1648 1649 for (i=0; i<mbs; i++) { 1650 rvals[0] = bs*(baij->rstartbs + i); 1651 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1652 for (j=ai[i]; j<ai[i+1]; j++) { 1653 col = (baij->cstartbs+aj[j])*bs; 1654 for (k=0; k<bs; k++) { 1655 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1656 1657 col++; a += bs; 1658 } 1659 } 1660 } 1661 /* copy over the B part */ 1662 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1663 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1664 for (i=0; i<mbs; i++) { 1665 rvals[0] = bs*(baij->rstartbs + i); 1666 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1667 for (j=ai[i]; j<ai[i+1]; j++) { 1668 col = baij->garray[aj[j]]*bs; 1669 for (k=0; k<bs; k++) { 1670 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1671 col++; 1672 a += bs; 1673 } 1674 } 1675 } 1676 ierr = PetscFree(rvals);CHKERRQ(ierr); 1677 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1678 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1679 1680 if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B; 1681 else { 1682 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1683 } 1684 PetscFunctionReturn(0); 1685 } 1686 1687 #undef __FUNCT__ 1688 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1689 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1690 { 1691 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1692 Mat a = baij->A,b = baij->B; 1693 PetscErrorCode ierr; 1694 PetscInt s1,s2,s3; 1695 1696 PetscFunctionBegin; 1697 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1698 if (rr) { 1699 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1700 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1701 /* Overlap communication with computation. */ 1702 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1703 } 1704 if (ll) { 1705 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1706 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1707 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1708 } 1709 /* scale the diagonal block */ 1710 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1711 1712 if (rr) { 1713 /* Do a scatter end and then right scale the off-diagonal block */ 1714 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1715 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1716 } 1717 PetscFunctionReturn(0); 1718 } 1719 1720 #undef __FUNCT__ 1721 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1722 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1723 { 1724 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1725 PetscInt *owners = A->rmap->range; 1726 PetscInt n = A->rmap->n; 1727 PetscMPIInt size = l->size; 1728 PetscSF sf; 1729 PetscInt *lrows; 1730 PetscSFNode *rrows; 1731 PetscInt lastidx = -1, r, p = 0, len = 0; 1732 PetscErrorCode ierr; 1733 1734 PetscFunctionBegin; 1735 /* Create SF where leaves are input rows and roots are owned rows */ 1736 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1737 for (r = 0; r < n; ++r) lrows[r] = -1; 1738 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1739 for (r = 0; r < N; ++r) { 1740 const PetscInt idx = rows[r]; 1741 PetscBool found = PETSC_FALSE; 1742 /* Trick for efficient searching for sorted rows */ 1743 if (lastidx > idx) p = 0; 1744 lastidx = idx; 1745 for (; p < size; ++p) { 1746 if (idx >= owners[p] && idx < owners[p+1]) { 1747 rrows[r].rank = p; 1748 rrows[r].index = rows[r] - owners[p]; 1749 found = PETSC_TRUE; 1750 break; 1751 } 1752 } 1753 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 1754 } 1755 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1756 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1757 /* Collect flags for rows to be zeroed */ 1758 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1759 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1760 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1761 /* Compress and put in row numbers */ 1762 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1763 /* fix right hand side if needed */ 1764 if (x && b) { 1765 const PetscScalar *xx; 1766 PetscScalar *bb; 1767 1768 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1769 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1770 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 1771 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1772 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1773 } 1774 1775 /* actually zap the local rows */ 1776 /* 1777 Zero the required rows. If the "diagonal block" of the matrix 1778 is square and the user wishes to set the diagonal we use separate 1779 code so that MatSetValues() is not called for each diagonal allocating 1780 new memory, thus calling lots of mallocs and slowing things down. 1781 1782 */ 1783 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1784 ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,0,0);CHKERRQ(ierr); 1785 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1786 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,0,0);CHKERRQ(ierr); 1787 } else if (diag != 0.0) { 1788 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr); 1789 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1790 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1791 for (r = 0; r < len; ++r) { 1792 const PetscInt row = lrows[r] + A->rmap->rstart; 1793 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1794 } 1795 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1796 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1797 } else { 1798 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr); 1799 } 1800 ierr = PetscFree(lrows);CHKERRQ(ierr); 1801 1802 /* only change matrix nonzero state if pattern was allowed to be changed */ 1803 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1804 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1805 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1806 } 1807 PetscFunctionReturn(0); 1808 } 1809 1810 #undef __FUNCT__ 1811 #define __FUNCT__ "MatZeroRowsColumns_MPIBAIJ" 1812 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1813 { 1814 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1815 PetscErrorCode ierr; 1816 PetscMPIInt size = l->size,n = A->rmap->n,lastidx = -1; 1817 PetscInt i,j,k,r,p = 0,len = 0,row,col,count; 1818 PetscInt *lrows,*owners = A->rmap->range; 1819 PetscSFNode *rrows; 1820 PetscSF sf; 1821 const PetscScalar *xx; 1822 PetscScalar *bb,*mask; 1823 Vec xmask,lmask; 1824 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data; 1825 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1826 PetscScalar *aa; 1827 #if defined(PETSC_DEBUG) 1828 PetscBool found = PETSC_FALSE; 1829 #endif 1830 1831 PetscFunctionBegin; 1832 /* Create SF where leaves are input rows and roots are owned rows */ 1833 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1834 for (r = 0; r < n; ++r) lrows[r] = -1; 1835 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1836 for (r = 0; r < N; ++r) { 1837 const PetscInt idx = rows[r]; 1838 PetscBool found = PETSC_FALSE; 1839 /* Trick for efficient searching for sorted rows */ 1840 if (lastidx > idx) p = 0; 1841 lastidx = idx; 1842 for (; p < size; ++p) { 1843 if (idx >= owners[p] && idx < owners[p+1]) { 1844 rrows[r].rank = p; 1845 rrows[r].index = rows[r] - owners[p]; 1846 found = PETSC_TRUE; 1847 break; 1848 } 1849 } 1850 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 1851 } 1852 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1853 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1854 /* Collect flags for rows to be zeroed */ 1855 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1856 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1857 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1858 /* Compress and put in row numbers */ 1859 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1860 /* zero diagonal part of matrix */ 1861 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1862 /* handle off diagonal part of matrix */ 1863 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 1864 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1865 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1866 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1867 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1868 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1869 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1870 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1871 if (x) { 1872 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1873 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1874 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1875 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1876 } 1877 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1878 /* remove zeroed rows of off diagonal matrix */ 1879 for (i = 0; i < len; ++i) { 1880 row = lrows[i]; 1881 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1882 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1883 for (k = 0; k < count; ++k) { 1884 aa[0] = 0.0; 1885 aa += bs; 1886 } 1887 } 1888 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1889 for (i = 0; i < l->B->rmap->N; ++i) { 1890 row = i/bs; 1891 for (j = baij->i[row]; j < baij->i[row+1]; ++j) { 1892 for (k = 0; k < bs; ++k) { 1893 col = bs*baij->j[j] + k; 1894 if (PetscAbsScalar(mask[col])) { 1895 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 1896 if (b) bb[i] -= aa[0]*xx[col]; 1897 aa[0] = 0.0; 1898 } 1899 } 1900 } 1901 } 1902 if (x) { 1903 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1904 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1905 } 1906 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1907 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1908 ierr = PetscFree(lrows);CHKERRQ(ierr); 1909 1910 /* only change matrix nonzero state if pattern was allowed to be changed */ 1911 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1912 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1913 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1914 } 1915 PetscFunctionReturn(0); 1916 } 1917 1918 #undef __FUNCT__ 1919 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1920 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1921 { 1922 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1923 PetscErrorCode ierr; 1924 1925 PetscFunctionBegin; 1926 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1927 PetscFunctionReturn(0); 1928 } 1929 1930 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1931 1932 #undef __FUNCT__ 1933 #define __FUNCT__ "MatEqual_MPIBAIJ" 1934 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1935 { 1936 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1937 Mat a,b,c,d; 1938 PetscBool flg; 1939 PetscErrorCode ierr; 1940 1941 PetscFunctionBegin; 1942 a = matA->A; b = matA->B; 1943 c = matB->A; d = matB->B; 1944 1945 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1946 if (flg) { 1947 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1948 } 1949 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1950 PetscFunctionReturn(0); 1951 } 1952 1953 #undef __FUNCT__ 1954 #define __FUNCT__ "MatCopy_MPIBAIJ" 1955 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1956 { 1957 PetscErrorCode ierr; 1958 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1959 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1960 1961 PetscFunctionBegin; 1962 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1963 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1964 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1965 } else { 1966 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1967 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1968 } 1969 PetscFunctionReturn(0); 1970 } 1971 1972 #undef __FUNCT__ 1973 #define __FUNCT__ "MatSetUp_MPIBAIJ" 1974 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1975 { 1976 PetscErrorCode ierr; 1977 1978 PetscFunctionBegin; 1979 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1980 PetscFunctionReturn(0); 1981 } 1982 1983 #undef __FUNCT__ 1984 #define __FUNCT__ "MatAXPYGetPreallocation_MPIBAIJ" 1985 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 1986 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 1987 { 1988 PetscInt i,bs=Y->rmap->bs,m=Y->rmap->N/bs; 1989 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 1990 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 1991 const PetscInt *xi = x->i,*yi = y->i; 1992 1993 PetscFunctionBegin; 1994 /* Set the number of nonzeros in the new matrix */ 1995 for (i=0; i<m; i++) { 1996 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 1997 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 1998 nnz[i] = 0; 1999 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2000 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2001 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2002 nnz[i]++; 2003 } 2004 for (; k<nzy; k++) nnz[i]++; 2005 } 2006 PetscFunctionReturn(0); 2007 } 2008 2009 #undef __FUNCT__ 2010 #define __FUNCT__ "MatAXPY_MPIBAIJ" 2011 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2012 { 2013 PetscErrorCode ierr; 2014 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 2015 PetscBLASInt bnz,one=1; 2016 Mat_SeqBAIJ *x,*y; 2017 2018 PetscFunctionBegin; 2019 if (str == SAME_NONZERO_PATTERN) { 2020 PetscScalar alpha = a; 2021 x = (Mat_SeqBAIJ*)xx->A->data; 2022 y = (Mat_SeqBAIJ*)yy->A->data; 2023 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2024 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2025 x = (Mat_SeqBAIJ*)xx->B->data; 2026 y = (Mat_SeqBAIJ*)yy->B->data; 2027 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2028 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2029 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2030 } else { 2031 Mat B; 2032 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 2033 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2034 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2035 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2036 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2037 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2038 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2039 ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr); 2040 ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2041 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2042 ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2043 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 2044 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2045 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2046 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2047 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2048 } 2049 PetscFunctionReturn(0); 2050 } 2051 2052 #undef __FUNCT__ 2053 #define __FUNCT__ "MatRealPart_MPIBAIJ" 2054 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 2055 { 2056 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2057 PetscErrorCode ierr; 2058 2059 PetscFunctionBegin; 2060 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2061 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2062 PetscFunctionReturn(0); 2063 } 2064 2065 #undef __FUNCT__ 2066 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2067 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2068 { 2069 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2070 PetscErrorCode ierr; 2071 2072 PetscFunctionBegin; 2073 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2074 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2075 PetscFunctionReturn(0); 2076 } 2077 2078 #undef __FUNCT__ 2079 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2080 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2081 { 2082 PetscErrorCode ierr; 2083 IS iscol_local; 2084 PetscInt csize; 2085 2086 PetscFunctionBegin; 2087 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2088 if (call == MAT_REUSE_MATRIX) { 2089 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2090 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2091 } else { 2092 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2093 } 2094 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2095 if (call == MAT_INITIAL_MATRIX) { 2096 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2097 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2098 } 2099 PetscFunctionReturn(0); 2100 } 2101 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*); 2102 #undef __FUNCT__ 2103 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2104 /* 2105 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2106 in local and then by concatenating the local matrices the end result. 2107 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 2108 */ 2109 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2110 { 2111 PetscErrorCode ierr; 2112 PetscMPIInt rank,size; 2113 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2114 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow; 2115 Mat M,Mreuse; 2116 MatScalar *vwork,*aa; 2117 MPI_Comm comm; 2118 IS isrow_new, iscol_new; 2119 PetscBool idflag,allrows, allcols; 2120 Mat_SeqBAIJ *aij; 2121 2122 PetscFunctionBegin; 2123 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2124 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2125 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2126 /* The compression and expansion should be avoided. Doesn't point 2127 out errors, might change the indices, hence buggey */ 2128 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 2129 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 2130 2131 /* Check for special case: each processor gets entire matrix columns */ 2132 ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr); 2133 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 2134 if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE; 2135 else allcols = PETSC_FALSE; 2136 2137 ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr); 2138 ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr); 2139 if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE; 2140 else allrows = PETSC_FALSE; 2141 2142 if (call == MAT_REUSE_MATRIX) { 2143 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 2144 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2145 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2146 } else { 2147 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2148 } 2149 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 2150 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 2151 /* 2152 m - number of local rows 2153 n - number of columns (same on all processors) 2154 rstart - first row in new global matrix generated 2155 */ 2156 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2157 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2158 m = m/bs; 2159 n = n/bs; 2160 2161 if (call == MAT_INITIAL_MATRIX) { 2162 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2163 ii = aij->i; 2164 jj = aij->j; 2165 2166 /* 2167 Determine the number of non-zeros in the diagonal and off-diagonal 2168 portions of the matrix in order to do correct preallocation 2169 */ 2170 2171 /* first get start and end of "diagonal" columns */ 2172 if (csize == PETSC_DECIDE) { 2173 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2174 if (mglobal == n*bs) { /* square matrix */ 2175 nlocal = m; 2176 } else { 2177 nlocal = n/size + ((n % size) > rank); 2178 } 2179 } else { 2180 nlocal = csize/bs; 2181 } 2182 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2183 rstart = rend - nlocal; 2184 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2185 2186 /* next, compute all the lengths */ 2187 ierr = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr); 2188 for (i=0; i<m; i++) { 2189 jend = ii[i+1] - ii[i]; 2190 olen = 0; 2191 dlen = 0; 2192 for (j=0; j<jend; j++) { 2193 if (*jj < rstart || *jj >= rend) olen++; 2194 else dlen++; 2195 jj++; 2196 } 2197 olens[i] = olen; 2198 dlens[i] = dlen; 2199 } 2200 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2201 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2202 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2203 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2204 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 2205 } else { 2206 PetscInt ml,nl; 2207 2208 M = *newmat; 2209 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2210 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2211 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2212 /* 2213 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2214 rather than the slower MatSetValues(). 2215 */ 2216 M->was_assembled = PETSC_TRUE; 2217 M->assembled = PETSC_FALSE; 2218 } 2219 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2220 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2221 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2222 ii = aij->i; 2223 jj = aij->j; 2224 aa = aij->a; 2225 for (i=0; i<m; i++) { 2226 row = rstart/bs + i; 2227 nz = ii[i+1] - ii[i]; 2228 cwork = jj; jj += nz; 2229 vwork = aa; aa += nz*bs*bs; 2230 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2231 } 2232 2233 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2234 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2235 *newmat = M; 2236 2237 /* save submatrix used in processor for next request */ 2238 if (call == MAT_INITIAL_MATRIX) { 2239 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2240 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2241 } 2242 PetscFunctionReturn(0); 2243 } 2244 2245 #undef __FUNCT__ 2246 #define __FUNCT__ "MatPermute_MPIBAIJ" 2247 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2248 { 2249 MPI_Comm comm,pcomm; 2250 PetscInt clocal_size,nrows; 2251 const PetscInt *rows; 2252 PetscMPIInt size; 2253 IS crowp,lcolp; 2254 PetscErrorCode ierr; 2255 2256 PetscFunctionBegin; 2257 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2258 /* make a collective version of 'rowp' */ 2259 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2260 if (pcomm==comm) { 2261 crowp = rowp; 2262 } else { 2263 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2264 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2265 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2266 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2267 } 2268 ierr = ISSetPermutation(crowp);CHKERRQ(ierr); 2269 /* make a local version of 'colp' */ 2270 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2271 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2272 if (size==1) { 2273 lcolp = colp; 2274 } else { 2275 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2276 } 2277 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2278 /* now we just get the submatrix */ 2279 ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr); 2280 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2281 /* clean up */ 2282 if (pcomm!=comm) { 2283 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2284 } 2285 if (size>1) { 2286 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2287 } 2288 PetscFunctionReturn(0); 2289 } 2290 2291 #undef __FUNCT__ 2292 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2293 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2294 { 2295 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2296 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2297 2298 PetscFunctionBegin; 2299 if (nghosts) *nghosts = B->nbs; 2300 if (ghosts) *ghosts = baij->garray; 2301 PetscFunctionReturn(0); 2302 } 2303 2304 #undef __FUNCT__ 2305 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2306 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2307 { 2308 Mat B; 2309 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2310 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2311 Mat_SeqAIJ *b; 2312 PetscErrorCode ierr; 2313 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2314 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2315 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2316 2317 PetscFunctionBegin; 2318 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2319 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2320 2321 /* ---------------------------------------------------------------- 2322 Tell every processor the number of nonzeros per row 2323 */ 2324 ierr = PetscMalloc1((A->rmap->N/bs),&lens);CHKERRQ(ierr); 2325 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2326 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2327 } 2328 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2329 ierr = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr); 2330 displs = recvcounts + size; 2331 for (i=0; i<size; i++) { 2332 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2333 displs[i] = A->rmap->range[i]/bs; 2334 } 2335 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2336 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2337 #else 2338 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2339 #endif 2340 /* --------------------------------------------------------------- 2341 Create the sequential matrix of the same type as the local block diagonal 2342 */ 2343 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2344 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2345 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2346 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2347 b = (Mat_SeqAIJ*)B->data; 2348 2349 /*-------------------------------------------------------------------- 2350 Copy my part of matrix column indices over 2351 */ 2352 sendcount = ad->nz + bd->nz; 2353 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2354 a_jsendbuf = ad->j; 2355 b_jsendbuf = bd->j; 2356 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2357 cnt = 0; 2358 for (i=0; i<n; i++) { 2359 2360 /* put in lower diagonal portion */ 2361 m = bd->i[i+1] - bd->i[i]; 2362 while (m > 0) { 2363 /* is it above diagonal (in bd (compressed) numbering) */ 2364 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2365 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2366 m--; 2367 } 2368 2369 /* put in diagonal portion */ 2370 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2371 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2372 } 2373 2374 /* put in upper diagonal portion */ 2375 while (m-- > 0) { 2376 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2377 } 2378 } 2379 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2380 2381 /*-------------------------------------------------------------------- 2382 Gather all column indices to all processors 2383 */ 2384 for (i=0; i<size; i++) { 2385 recvcounts[i] = 0; 2386 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2387 recvcounts[i] += lens[j]; 2388 } 2389 } 2390 displs[0] = 0; 2391 for (i=1; i<size; i++) { 2392 displs[i] = displs[i-1] + recvcounts[i-1]; 2393 } 2394 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2395 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2396 #else 2397 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2398 #endif 2399 /*-------------------------------------------------------------------- 2400 Assemble the matrix into useable form (note numerical values not yet set) 2401 */ 2402 /* set the b->ilen (length of each row) values */ 2403 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2404 /* set the b->i indices */ 2405 b->i[0] = 0; 2406 for (i=1; i<=A->rmap->N/bs; i++) { 2407 b->i[i] = b->i[i-1] + lens[i-1]; 2408 } 2409 ierr = PetscFree(lens);CHKERRQ(ierr); 2410 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2411 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2412 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2413 2414 if (A->symmetric) { 2415 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2416 } else if (A->hermitian) { 2417 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2418 } else if (A->structurally_symmetric) { 2419 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2420 } 2421 *newmat = B; 2422 PetscFunctionReturn(0); 2423 } 2424 2425 #undef __FUNCT__ 2426 #define __FUNCT__ "MatSOR_MPIBAIJ" 2427 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2428 { 2429 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2430 PetscErrorCode ierr; 2431 Vec bb1 = 0; 2432 2433 PetscFunctionBegin; 2434 if (flag == SOR_APPLY_UPPER) { 2435 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2436 PetscFunctionReturn(0); 2437 } 2438 2439 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2440 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2441 } 2442 2443 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2444 if (flag & SOR_ZERO_INITIAL_GUESS) { 2445 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2446 its--; 2447 } 2448 2449 while (its--) { 2450 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2451 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2452 2453 /* update rhs: bb1 = bb - B*x */ 2454 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2455 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2456 2457 /* local sweep */ 2458 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2459 } 2460 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2461 if (flag & SOR_ZERO_INITIAL_GUESS) { 2462 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2463 its--; 2464 } 2465 while (its--) { 2466 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2467 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2468 2469 /* update rhs: bb1 = bb - B*x */ 2470 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2471 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2472 2473 /* local sweep */ 2474 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2475 } 2476 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2477 if (flag & SOR_ZERO_INITIAL_GUESS) { 2478 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2479 its--; 2480 } 2481 while (its--) { 2482 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2483 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2484 2485 /* update rhs: bb1 = bb - B*x */ 2486 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2487 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2488 2489 /* local sweep */ 2490 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2491 } 2492 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2493 2494 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2495 PetscFunctionReturn(0); 2496 } 2497 2498 #undef __FUNCT__ 2499 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ" 2500 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms) 2501 { 2502 PetscErrorCode ierr; 2503 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 2504 PetscInt N,i,*garray = aij->garray; 2505 PetscInt ib,jb,bs = A->rmap->bs; 2506 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 2507 MatScalar *a_val = a_aij->a; 2508 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 2509 MatScalar *b_val = b_aij->a; 2510 PetscReal *work; 2511 2512 PetscFunctionBegin; 2513 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 2514 ierr = PetscCalloc1(N,&work);CHKERRQ(ierr); 2515 if (type == NORM_2) { 2516 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2517 for (jb=0; jb<bs; jb++) { 2518 for (ib=0; ib<bs; ib++) { 2519 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2520 a_val++; 2521 } 2522 } 2523 } 2524 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2525 for (jb=0; jb<bs; jb++) { 2526 for (ib=0; ib<bs; ib++) { 2527 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2528 b_val++; 2529 } 2530 } 2531 } 2532 } else if (type == NORM_1) { 2533 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2534 for (jb=0; jb<bs; jb++) { 2535 for (ib=0; ib<bs; ib++) { 2536 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2537 a_val++; 2538 } 2539 } 2540 } 2541 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2542 for (jb=0; jb<bs; jb++) { 2543 for (ib=0; ib<bs; ib++) { 2544 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2545 b_val++; 2546 } 2547 } 2548 } 2549 } else if (type == NORM_INFINITY) { 2550 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2551 for (jb=0; jb<bs; jb++) { 2552 for (ib=0; ib<bs; ib++) { 2553 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2554 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2555 a_val++; 2556 } 2557 } 2558 } 2559 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2560 for (jb=0; jb<bs; jb++) { 2561 for (ib=0; ib<bs; ib++) { 2562 int col = garray[b_aij->j[i]] * bs + jb; 2563 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2564 b_val++; 2565 } 2566 } 2567 } 2568 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 2569 if (type == NORM_INFINITY) { 2570 ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2571 } else { 2572 ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2573 } 2574 ierr = PetscFree(work);CHKERRQ(ierr); 2575 if (type == NORM_2) { 2576 for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]); 2577 } 2578 PetscFunctionReturn(0); 2579 } 2580 2581 #undef __FUNCT__ 2582 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2583 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2584 { 2585 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2586 PetscErrorCode ierr; 2587 2588 PetscFunctionBegin; 2589 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2590 PetscFunctionReturn(0); 2591 } 2592 2593 2594 /* -------------------------------------------------------------------*/ 2595 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2596 MatGetRow_MPIBAIJ, 2597 MatRestoreRow_MPIBAIJ, 2598 MatMult_MPIBAIJ, 2599 /* 4*/ MatMultAdd_MPIBAIJ, 2600 MatMultTranspose_MPIBAIJ, 2601 MatMultTransposeAdd_MPIBAIJ, 2602 0, 2603 0, 2604 0, 2605 /*10*/ 0, 2606 0, 2607 0, 2608 MatSOR_MPIBAIJ, 2609 MatTranspose_MPIBAIJ, 2610 /*15*/ MatGetInfo_MPIBAIJ, 2611 MatEqual_MPIBAIJ, 2612 MatGetDiagonal_MPIBAIJ, 2613 MatDiagonalScale_MPIBAIJ, 2614 MatNorm_MPIBAIJ, 2615 /*20*/ MatAssemblyBegin_MPIBAIJ, 2616 MatAssemblyEnd_MPIBAIJ, 2617 MatSetOption_MPIBAIJ, 2618 MatZeroEntries_MPIBAIJ, 2619 /*24*/ MatZeroRows_MPIBAIJ, 2620 0, 2621 0, 2622 0, 2623 0, 2624 /*29*/ MatSetUp_MPIBAIJ, 2625 0, 2626 0, 2627 0, 2628 0, 2629 /*34*/ MatDuplicate_MPIBAIJ, 2630 0, 2631 0, 2632 0, 2633 0, 2634 /*39*/ MatAXPY_MPIBAIJ, 2635 MatGetSubMatrices_MPIBAIJ, 2636 MatIncreaseOverlap_MPIBAIJ, 2637 MatGetValues_MPIBAIJ, 2638 MatCopy_MPIBAIJ, 2639 /*44*/ 0, 2640 MatScale_MPIBAIJ, 2641 0, 2642 0, 2643 MatZeroRowsColumns_MPIBAIJ, 2644 /*49*/ 0, 2645 0, 2646 0, 2647 0, 2648 0, 2649 /*54*/ MatFDColoringCreate_MPIXAIJ, 2650 0, 2651 MatSetUnfactored_MPIBAIJ, 2652 MatPermute_MPIBAIJ, 2653 MatSetValuesBlocked_MPIBAIJ, 2654 /*59*/ MatGetSubMatrix_MPIBAIJ, 2655 MatDestroy_MPIBAIJ, 2656 MatView_MPIBAIJ, 2657 0, 2658 0, 2659 /*64*/ 0, 2660 0, 2661 0, 2662 0, 2663 0, 2664 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2665 0, 2666 0, 2667 0, 2668 0, 2669 /*74*/ 0, 2670 MatFDColoringApply_BAIJ, 2671 0, 2672 0, 2673 0, 2674 /*79*/ 0, 2675 0, 2676 0, 2677 0, 2678 MatLoad_MPIBAIJ, 2679 /*84*/ 0, 2680 0, 2681 0, 2682 0, 2683 0, 2684 /*89*/ 0, 2685 0, 2686 0, 2687 0, 2688 0, 2689 /*94*/ 0, 2690 0, 2691 0, 2692 0, 2693 0, 2694 /*99*/ 0, 2695 0, 2696 0, 2697 0, 2698 0, 2699 /*104*/0, 2700 MatRealPart_MPIBAIJ, 2701 MatImaginaryPart_MPIBAIJ, 2702 0, 2703 0, 2704 /*109*/0, 2705 0, 2706 0, 2707 0, 2708 0, 2709 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2710 0, 2711 MatGetGhosts_MPIBAIJ, 2712 0, 2713 0, 2714 /*119*/0, 2715 0, 2716 0, 2717 0, 2718 MatGetMultiProcBlock_MPIBAIJ, 2719 /*124*/0, 2720 MatGetColumnNorms_MPIBAIJ, 2721 MatInvertBlockDiagonal_MPIBAIJ, 2722 0, 2723 0, 2724 /*129*/ 0, 2725 0, 2726 0, 2727 0, 2728 0, 2729 /*134*/ 0, 2730 0, 2731 0, 2732 0, 2733 0, 2734 /*139*/ 0, 2735 0, 2736 0, 2737 MatFDColoringSetUp_MPIXAIJ 2738 }; 2739 2740 #undef __FUNCT__ 2741 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2742 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2743 { 2744 PetscFunctionBegin; 2745 *a = ((Mat_MPIBAIJ*)A->data)->A; 2746 PetscFunctionReturn(0); 2747 } 2748 2749 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2750 2751 #undef __FUNCT__ 2752 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2753 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2754 { 2755 PetscInt m,rstart,cstart,cend; 2756 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2757 const PetscInt *JJ =0; 2758 PetscScalar *values=0; 2759 PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2760 PetscErrorCode ierr; 2761 2762 PetscFunctionBegin; 2763 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2764 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2765 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2766 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2767 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2768 m = B->rmap->n/bs; 2769 rstart = B->rmap->rstart/bs; 2770 cstart = B->cmap->rstart/bs; 2771 cend = B->cmap->rend/bs; 2772 2773 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2774 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2775 for (i=0; i<m; i++) { 2776 nz = ii[i+1] - ii[i]; 2777 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2778 nz_max = PetscMax(nz_max,nz); 2779 JJ = jj + ii[i]; 2780 for (j=0; j<nz; j++) { 2781 if (*JJ >= cstart) break; 2782 JJ++; 2783 } 2784 d = 0; 2785 for (; j<nz; j++) { 2786 if (*JJ++ >= cend) break; 2787 d++; 2788 } 2789 d_nnz[i] = d; 2790 o_nnz[i] = nz - d; 2791 } 2792 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2793 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2794 2795 values = (PetscScalar*)V; 2796 if (!values) { 2797 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2798 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2799 } 2800 for (i=0; i<m; i++) { 2801 PetscInt row = i + rstart; 2802 PetscInt ncols = ii[i+1] - ii[i]; 2803 const PetscInt *icols = jj + ii[i]; 2804 if (!roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2805 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2806 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2807 } else { /* block ordering does not match so we can only insert one block at a time. */ 2808 PetscInt j; 2809 for (j=0; j<ncols; j++) { 2810 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2811 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2812 } 2813 } 2814 } 2815 2816 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2817 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2818 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2819 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2820 PetscFunctionReturn(0); 2821 } 2822 2823 #undef __FUNCT__ 2824 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2825 /*@C 2826 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2827 (the default parallel PETSc format). 2828 2829 Collective on MPI_Comm 2830 2831 Input Parameters: 2832 + B - the matrix 2833 . bs - the block size 2834 . i - the indices into j for the start of each local row (starts with zero) 2835 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2836 - v - optional values in the matrix 2837 2838 Level: developer 2839 2840 Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 2841 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 2842 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2843 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2844 block column and the second index is over columns within a block. 2845 2846 .keywords: matrix, aij, compressed row, sparse, parallel 2847 2848 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ 2849 @*/ 2850 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2851 { 2852 PetscErrorCode ierr; 2853 2854 PetscFunctionBegin; 2855 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2856 PetscValidType(B,1); 2857 PetscValidLogicalCollectiveInt(B,bs,2); 2858 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2859 PetscFunctionReturn(0); 2860 } 2861 2862 #undef __FUNCT__ 2863 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2864 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2865 { 2866 Mat_MPIBAIJ *b; 2867 PetscErrorCode ierr; 2868 PetscInt i; 2869 2870 PetscFunctionBegin; 2871 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2872 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2873 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2874 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2875 2876 if (d_nnz) { 2877 for (i=0; i<B->rmap->n/bs; i++) { 2878 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2879 } 2880 } 2881 if (o_nnz) { 2882 for (i=0; i<B->rmap->n/bs; i++) { 2883 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2884 } 2885 } 2886 2887 b = (Mat_MPIBAIJ*)B->data; 2888 b->bs2 = bs*bs; 2889 b->mbs = B->rmap->n/bs; 2890 b->nbs = B->cmap->n/bs; 2891 b->Mbs = B->rmap->N/bs; 2892 b->Nbs = B->cmap->N/bs; 2893 2894 for (i=0; i<=b->size; i++) { 2895 b->rangebs[i] = B->rmap->range[i]/bs; 2896 } 2897 b->rstartbs = B->rmap->rstart/bs; 2898 b->rendbs = B->rmap->rend/bs; 2899 b->cstartbs = B->cmap->rstart/bs; 2900 b->cendbs = B->cmap->rend/bs; 2901 2902 if (!B->preallocated) { 2903 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2904 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2905 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2906 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2907 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2908 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2909 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2910 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2911 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2912 } 2913 2914 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2915 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2916 B->preallocated = PETSC_TRUE; 2917 PetscFunctionReturn(0); 2918 } 2919 2920 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2921 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2922 2923 #undef __FUNCT__ 2924 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 2925 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 2926 { 2927 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2928 PetscErrorCode ierr; 2929 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2930 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2931 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2932 2933 PetscFunctionBegin; 2934 ierr = PetscMalloc1((M+1),&ii);CHKERRQ(ierr); 2935 ii[0] = 0; 2936 for (i=0; i<M; i++) { 2937 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2938 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2939 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2940 /* remove one from count of matrix has diagonal */ 2941 for (j=id[i]; j<id[i+1]; j++) { 2942 if (jd[j] == i) {ii[i+1]--;break;} 2943 } 2944 } 2945 ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr); 2946 cnt = 0; 2947 for (i=0; i<M; i++) { 2948 for (j=io[i]; j<io[i+1]; j++) { 2949 if (garray[jo[j]] > rstart) break; 2950 jj[cnt++] = garray[jo[j]]; 2951 } 2952 for (k=id[i]; k<id[i+1]; k++) { 2953 if (jd[k] != i) { 2954 jj[cnt++] = rstart + jd[k]; 2955 } 2956 } 2957 for (; j<io[i+1]; j++) { 2958 jj[cnt++] = garray[jo[j]]; 2959 } 2960 } 2961 ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 2962 PetscFunctionReturn(0); 2963 } 2964 2965 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2966 2967 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 2968 2969 #undef __FUNCT__ 2970 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 2971 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 2972 { 2973 PetscErrorCode ierr; 2974 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2975 Mat B; 2976 Mat_MPIAIJ *b; 2977 2978 PetscFunctionBegin; 2979 if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 2980 2981 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2982 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2983 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2984 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 2985 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 2986 b = (Mat_MPIAIJ*) B->data; 2987 2988 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 2989 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2990 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 2991 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 2992 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 2993 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2994 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2995 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2996 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2997 if (reuse == MAT_REUSE_MATRIX) { 2998 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2999 } else { 3000 *newmat = B; 3001 } 3002 PetscFunctionReturn(0); 3003 } 3004 3005 #if defined(PETSC_HAVE_MUMPS) 3006 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 3007 #endif 3008 3009 /*MC 3010 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3011 3012 Options Database Keys: 3013 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3014 . -mat_block_size <bs> - set the blocksize used to store the matrix 3015 - -mat_use_hash_table <fact> 3016 3017 Level: beginner 3018 3019 .seealso: MatCreateMPIBAIJ 3020 M*/ 3021 3022 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 3023 3024 #undef __FUNCT__ 3025 #define __FUNCT__ "MatCreate_MPIBAIJ" 3026 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3027 { 3028 Mat_MPIBAIJ *b; 3029 PetscErrorCode ierr; 3030 PetscBool flg; 3031 3032 PetscFunctionBegin; 3033 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3034 B->data = (void*)b; 3035 3036 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3037 B->assembled = PETSC_FALSE; 3038 3039 B->insertmode = NOT_SET_VALUES; 3040 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 3041 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 3042 3043 /* build local table of row and column ownerships */ 3044 ierr = PetscMalloc1((b->size+1),&b->rangebs);CHKERRQ(ierr); 3045 3046 /* build cache for off array entries formed */ 3047 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 3048 3049 b->donotstash = PETSC_FALSE; 3050 b->colmap = NULL; 3051 b->garray = NULL; 3052 b->roworiented = PETSC_TRUE; 3053 3054 /* stuff used in block assembly */ 3055 b->barray = 0; 3056 3057 /* stuff used for matrix vector multiply */ 3058 b->lvec = 0; 3059 b->Mvctx = 0; 3060 3061 /* stuff for MatGetRow() */ 3062 b->rowindices = 0; 3063 b->rowvalues = 0; 3064 b->getrowactive = PETSC_FALSE; 3065 3066 /* hash table stuff */ 3067 b->ht = 0; 3068 b->hd = 0; 3069 b->ht_size = 0; 3070 b->ht_flag = PETSC_FALSE; 3071 b->ht_fact = 0; 3072 b->ht_total_ct = 0; 3073 b->ht_insert_ct = 0; 3074 3075 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3076 b->ijonly = PETSC_FALSE; 3077 3078 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3079 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr); 3080 if (flg) { 3081 PetscReal fact = 1.39; 3082 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3083 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 3084 if (fact <= 1.0) fact = 1.39; 3085 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3086 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3087 } 3088 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3089 3090 #if defined(PETSC_HAVE_MUMPS) 3091 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3092 #endif 3093 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3094 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3095 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3096 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3097 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3098 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3099 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3100 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3101 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3102 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3103 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 3104 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3105 PetscFunctionReturn(0); 3106 } 3107 3108 /*MC 3109 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3110 3111 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3112 and MATMPIBAIJ otherwise. 3113 3114 Options Database Keys: 3115 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3116 3117 Level: beginner 3118 3119 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3120 M*/ 3121 3122 #undef __FUNCT__ 3123 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3124 /*@C 3125 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3126 (block compressed row). For good matrix assembly performance 3127 the user should preallocate the matrix storage by setting the parameters 3128 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3129 performance can be increased by more than a factor of 50. 3130 3131 Collective on Mat 3132 3133 Input Parameters: 3134 + B - the matrix 3135 . bs - size of block 3136 . d_nz - number of block nonzeros per block row in diagonal portion of local 3137 submatrix (same for all local rows) 3138 . d_nnz - array containing the number of block nonzeros in the various block rows 3139 of the in diagonal portion of the local (possibly different for each block 3140 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3141 set it even if it is zero. 3142 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3143 submatrix (same for all local rows). 3144 - o_nnz - array containing the number of nonzeros in the various block rows of the 3145 off-diagonal portion of the local submatrix (possibly different for 3146 each block row) or NULL. 3147 3148 If the *_nnz parameter is given then the *_nz parameter is ignored 3149 3150 Options Database Keys: 3151 + -mat_block_size - size of the blocks to use 3152 - -mat_use_hash_table <fact> 3153 3154 Notes: 3155 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3156 than it must be used on all processors that share the object for that argument. 3157 3158 Storage Information: 3159 For a square global matrix we define each processor's diagonal portion 3160 to be its local rows and the corresponding columns (a square submatrix); 3161 each processor's off-diagonal portion encompasses the remainder of the 3162 local matrix (a rectangular submatrix). 3163 3164 The user can specify preallocated storage for the diagonal part of 3165 the local submatrix with either d_nz or d_nnz (not both). Set 3166 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3167 memory allocation. Likewise, specify preallocated storage for the 3168 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3169 3170 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3171 the figure below we depict these three local rows and all columns (0-11). 3172 3173 .vb 3174 0 1 2 3 4 5 6 7 8 9 10 11 3175 -------------------------- 3176 row 3 |o o o d d d o o o o o o 3177 row 4 |o o o d d d o o o o o o 3178 row 5 |o o o d d d o o o o o o 3179 -------------------------- 3180 .ve 3181 3182 Thus, any entries in the d locations are stored in the d (diagonal) 3183 submatrix, and any entries in the o locations are stored in the 3184 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3185 stored simply in the MATSEQBAIJ format for compressed row storage. 3186 3187 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3188 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3189 In general, for PDE problems in which most nonzeros are near the diagonal, 3190 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3191 or you will get TERRIBLE performance; see the users' manual chapter on 3192 matrices. 3193 3194 You can call MatGetInfo() to get information on how effective the preallocation was; 3195 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3196 You can also run with the option -info and look for messages with the string 3197 malloc in them to see if additional memory allocation was needed. 3198 3199 Level: intermediate 3200 3201 .keywords: matrix, block, aij, compressed row, sparse, parallel 3202 3203 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3204 @*/ 3205 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3206 { 3207 PetscErrorCode ierr; 3208 3209 PetscFunctionBegin; 3210 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3211 PetscValidType(B,1); 3212 PetscValidLogicalCollectiveInt(B,bs,2); 3213 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3214 PetscFunctionReturn(0); 3215 } 3216 3217 #undef __FUNCT__ 3218 #define __FUNCT__ "MatCreateBAIJ" 3219 /*@C 3220 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3221 (block compressed row). For good matrix assembly performance 3222 the user should preallocate the matrix storage by setting the parameters 3223 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3224 performance can be increased by more than a factor of 50. 3225 3226 Collective on MPI_Comm 3227 3228 Input Parameters: 3229 + comm - MPI communicator 3230 . bs - size of blockk 3231 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3232 This value should be the same as the local size used in creating the 3233 y vector for the matrix-vector product y = Ax. 3234 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3235 This value should be the same as the local size used in creating the 3236 x vector for the matrix-vector product y = Ax. 3237 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3238 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3239 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3240 submatrix (same for all local rows) 3241 . d_nnz - array containing the number of nonzero blocks in the various block rows 3242 of the in diagonal portion of the local (possibly different for each block 3243 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3244 and set it even if it is zero. 3245 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3246 submatrix (same for all local rows). 3247 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3248 off-diagonal portion of the local submatrix (possibly different for 3249 each block row) or NULL. 3250 3251 Output Parameter: 3252 . A - the matrix 3253 3254 Options Database Keys: 3255 + -mat_block_size - size of the blocks to use 3256 - -mat_use_hash_table <fact> 3257 3258 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3259 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3260 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3261 3262 Notes: 3263 If the *_nnz parameter is given then the *_nz parameter is ignored 3264 3265 A nonzero block is any block that as 1 or more nonzeros in it 3266 3267 The user MUST specify either the local or global matrix dimensions 3268 (possibly both). 3269 3270 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3271 than it must be used on all processors that share the object for that argument. 3272 3273 Storage Information: 3274 For a square global matrix we define each processor's diagonal portion 3275 to be its local rows and the corresponding columns (a square submatrix); 3276 each processor's off-diagonal portion encompasses the remainder of the 3277 local matrix (a rectangular submatrix). 3278 3279 The user can specify preallocated storage for the diagonal part of 3280 the local submatrix with either d_nz or d_nnz (not both). Set 3281 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3282 memory allocation. Likewise, specify preallocated storage for the 3283 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3284 3285 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3286 the figure below we depict these three local rows and all columns (0-11). 3287 3288 .vb 3289 0 1 2 3 4 5 6 7 8 9 10 11 3290 -------------------------- 3291 row 3 |o o o d d d o o o o o o 3292 row 4 |o o o d d d o o o o o o 3293 row 5 |o o o d d d o o o o o o 3294 -------------------------- 3295 .ve 3296 3297 Thus, any entries in the d locations are stored in the d (diagonal) 3298 submatrix, and any entries in the o locations are stored in the 3299 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3300 stored simply in the MATSEQBAIJ format for compressed row storage. 3301 3302 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3303 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3304 In general, for PDE problems in which most nonzeros are near the diagonal, 3305 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3306 or you will get TERRIBLE performance; see the users' manual chapter on 3307 matrices. 3308 3309 Level: intermediate 3310 3311 .keywords: matrix, block, aij, compressed row, sparse, parallel 3312 3313 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3314 @*/ 3315 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3316 { 3317 PetscErrorCode ierr; 3318 PetscMPIInt size; 3319 3320 PetscFunctionBegin; 3321 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3322 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3323 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3324 if (size > 1) { 3325 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3326 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3327 } else { 3328 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3329 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3330 } 3331 PetscFunctionReturn(0); 3332 } 3333 3334 #undef __FUNCT__ 3335 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3336 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3337 { 3338 Mat mat; 3339 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3340 PetscErrorCode ierr; 3341 PetscInt len=0; 3342 3343 PetscFunctionBegin; 3344 *newmat = 0; 3345 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3346 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3347 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3348 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3349 3350 mat->factortype = matin->factortype; 3351 mat->preallocated = PETSC_TRUE; 3352 mat->assembled = PETSC_TRUE; 3353 mat->insertmode = NOT_SET_VALUES; 3354 3355 a = (Mat_MPIBAIJ*)mat->data; 3356 mat->rmap->bs = matin->rmap->bs; 3357 a->bs2 = oldmat->bs2; 3358 a->mbs = oldmat->mbs; 3359 a->nbs = oldmat->nbs; 3360 a->Mbs = oldmat->Mbs; 3361 a->Nbs = oldmat->Nbs; 3362 3363 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3364 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3365 3366 a->size = oldmat->size; 3367 a->rank = oldmat->rank; 3368 a->donotstash = oldmat->donotstash; 3369 a->roworiented = oldmat->roworiented; 3370 a->rowindices = 0; 3371 a->rowvalues = 0; 3372 a->getrowactive = PETSC_FALSE; 3373 a->barray = 0; 3374 a->rstartbs = oldmat->rstartbs; 3375 a->rendbs = oldmat->rendbs; 3376 a->cstartbs = oldmat->cstartbs; 3377 a->cendbs = oldmat->cendbs; 3378 3379 /* hash table stuff */ 3380 a->ht = 0; 3381 a->hd = 0; 3382 a->ht_size = 0; 3383 a->ht_flag = oldmat->ht_flag; 3384 a->ht_fact = oldmat->ht_fact; 3385 a->ht_total_ct = 0; 3386 a->ht_insert_ct = 0; 3387 3388 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3389 if (oldmat->colmap) { 3390 #if defined(PETSC_USE_CTABLE) 3391 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3392 #else 3393 ierr = PetscMalloc1((a->Nbs),&a->colmap);CHKERRQ(ierr); 3394 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3395 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3396 #endif 3397 } else a->colmap = 0; 3398 3399 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3400 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 3401 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3402 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3403 } else a->garray = 0; 3404 3405 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3406 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3407 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3408 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3409 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3410 3411 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3412 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3413 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3414 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3415 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3416 *newmat = mat; 3417 PetscFunctionReturn(0); 3418 } 3419 3420 #undef __FUNCT__ 3421 #define __FUNCT__ "MatLoad_MPIBAIJ" 3422 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3423 { 3424 PetscErrorCode ierr; 3425 int fd; 3426 PetscInt i,nz,j,rstart,rend; 3427 PetscScalar *vals,*buf; 3428 MPI_Comm comm; 3429 MPI_Status status; 3430 PetscMPIInt rank,size,maxnz; 3431 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3432 PetscInt *locrowlens = NULL,*procsnz = NULL,*browners = NULL; 3433 PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax; 3434 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3435 PetscInt *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount; 3436 PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 3437 3438 PetscFunctionBegin; 3439 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3440 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3441 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3442 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3443 3444 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3445 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3446 if (!rank) { 3447 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3448 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3449 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3450 } 3451 3452 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 3453 3454 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3455 M = header[1]; N = header[2]; 3456 3457 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3458 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 3459 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 3460 3461 /* If global sizes are set, check if they are consistent with that given in the file */ 3462 if (sizesset) { 3463 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 3464 } 3465 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3466 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3467 3468 if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices"); 3469 3470 /* 3471 This code adds extra rows to make sure the number of rows is 3472 divisible by the blocksize 3473 */ 3474 Mbs = M/bs; 3475 extra_rows = bs - M + bs*Mbs; 3476 if (extra_rows == bs) extra_rows = 0; 3477 else Mbs++; 3478 if (extra_rows && !rank) { 3479 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3480 } 3481 3482 /* determine ownership of all rows */ 3483 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3484 mbs = Mbs/size + ((Mbs % size) > rank); 3485 m = mbs*bs; 3486 } else { /* User set */ 3487 m = newmat->rmap->n; 3488 mbs = m/bs; 3489 } 3490 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 3491 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3492 3493 /* process 0 needs enough room for process with most rows */ 3494 if (!rank) { 3495 mmax = rowners[1]; 3496 for (i=2; i<=size; i++) { 3497 mmax = PetscMax(mmax,rowners[i]); 3498 } 3499 mmax*=bs; 3500 } else mmax = -1; /* unused, but compiler warns anyway */ 3501 3502 rowners[0] = 0; 3503 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3504 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3505 rstart = rowners[rank]; 3506 rend = rowners[rank+1]; 3507 3508 /* distribute row lengths to all processors */ 3509 ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr); 3510 if (!rank) { 3511 mend = m; 3512 if (size == 1) mend = mend - extra_rows; 3513 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3514 for (j=mend; j<m; j++) locrowlens[j] = 1; 3515 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3516 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3517 for (j=0; j<m; j++) { 3518 procsnz[0] += locrowlens[j]; 3519 } 3520 for (i=1; i<size; i++) { 3521 mend = browners[i+1] - browners[i]; 3522 if (i == size-1) mend = mend - extra_rows; 3523 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3524 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3525 /* calculate the number of nonzeros on each processor */ 3526 for (j=0; j<browners[i+1]-browners[i]; j++) { 3527 procsnz[i] += rowlengths[j]; 3528 } 3529 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3530 } 3531 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3532 } else { 3533 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3534 } 3535 3536 if (!rank) { 3537 /* determine max buffer needed and allocate it */ 3538 maxnz = procsnz[0]; 3539 for (i=1; i<size; i++) { 3540 maxnz = PetscMax(maxnz,procsnz[i]); 3541 } 3542 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3543 3544 /* read in my part of the matrix column indices */ 3545 nz = procsnz[0]; 3546 ierr = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr); 3547 mycols = ibuf; 3548 if (size == 1) nz -= extra_rows; 3549 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3550 if (size == 1) { 3551 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 3552 } 3553 3554 /* read in every ones (except the last) and ship off */ 3555 for (i=1; i<size-1; i++) { 3556 nz = procsnz[i]; 3557 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3558 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3559 } 3560 /* read in the stuff for the last proc */ 3561 if (size != 1) { 3562 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3563 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3564 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3565 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3566 } 3567 ierr = PetscFree(cols);CHKERRQ(ierr); 3568 } else { 3569 /* determine buffer space needed for message */ 3570 nz = 0; 3571 for (i=0; i<m; i++) { 3572 nz += locrowlens[i]; 3573 } 3574 ierr = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr); 3575 mycols = ibuf; 3576 /* receive message of column indices*/ 3577 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3578 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3579 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3580 } 3581 3582 /* loop over local rows, determining number of off diagonal entries */ 3583 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 3584 ierr = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 3585 rowcount = 0; nzcount = 0; 3586 for (i=0; i<mbs; i++) { 3587 dcount = 0; 3588 odcount = 0; 3589 for (j=0; j<bs; j++) { 3590 kmax = locrowlens[rowcount]; 3591 for (k=0; k<kmax; k++) { 3592 tmp = mycols[nzcount++]/bs; 3593 if (!mask[tmp]) { 3594 mask[tmp] = 1; 3595 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3596 else masked1[dcount++] = tmp; 3597 } 3598 } 3599 rowcount++; 3600 } 3601 3602 dlens[i] = dcount; 3603 odlens[i] = odcount; 3604 3605 /* zero out the mask elements we set */ 3606 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3607 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3608 } 3609 3610 3611 if (!sizesset) { 3612 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3613 } 3614 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3615 3616 if (!rank) { 3617 ierr = PetscMalloc1((maxnz+1),&buf);CHKERRQ(ierr); 3618 /* read in my part of the matrix numerical values */ 3619 nz = procsnz[0]; 3620 vals = buf; 3621 mycols = ibuf; 3622 if (size == 1) nz -= extra_rows; 3623 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3624 if (size == 1) { 3625 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 3626 } 3627 3628 /* insert into matrix */ 3629 jj = rstart*bs; 3630 for (i=0; i<m; i++) { 3631 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3632 mycols += locrowlens[i]; 3633 vals += locrowlens[i]; 3634 jj++; 3635 } 3636 /* read in other processors (except the last one) and ship out */ 3637 for (i=1; i<size-1; i++) { 3638 nz = procsnz[i]; 3639 vals = buf; 3640 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3641 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3642 } 3643 /* the last proc */ 3644 if (size != 1) { 3645 nz = procsnz[i] - extra_rows; 3646 vals = buf; 3647 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3648 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3649 ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3650 } 3651 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3652 } else { 3653 /* receive numeric values */ 3654 ierr = PetscMalloc1((nz+1),&buf);CHKERRQ(ierr); 3655 3656 /* receive message of values*/ 3657 vals = buf; 3658 mycols = ibuf; 3659 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3660 3661 /* insert into matrix */ 3662 jj = rstart*bs; 3663 for (i=0; i<m; i++) { 3664 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3665 mycols += locrowlens[i]; 3666 vals += locrowlens[i]; 3667 jj++; 3668 } 3669 } 3670 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3671 ierr = PetscFree(buf);CHKERRQ(ierr); 3672 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3673 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3674 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3675 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3676 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3677 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3678 PetscFunctionReturn(0); 3679 } 3680 3681 #undef __FUNCT__ 3682 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3683 /*@ 3684 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3685 3686 Input Parameters: 3687 . mat - the matrix 3688 . fact - factor 3689 3690 Not Collective, each process can use a different factor 3691 3692 Level: advanced 3693 3694 Notes: 3695 This can also be set by the command line option: -mat_use_hash_table <fact> 3696 3697 .keywords: matrix, hashtable, factor, HT 3698 3699 .seealso: MatSetOption() 3700 @*/ 3701 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3702 { 3703 PetscErrorCode ierr; 3704 3705 PetscFunctionBegin; 3706 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3707 PetscFunctionReturn(0); 3708 } 3709 3710 #undef __FUNCT__ 3711 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3712 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3713 { 3714 Mat_MPIBAIJ *baij; 3715 3716 PetscFunctionBegin; 3717 baij = (Mat_MPIBAIJ*)mat->data; 3718 baij->ht_fact = fact; 3719 PetscFunctionReturn(0); 3720 } 3721 3722 #undef __FUNCT__ 3723 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3724 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3725 { 3726 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3727 3728 PetscFunctionBegin; 3729 if (Ad) *Ad = a->A; 3730 if (Ao) *Ao = a->B; 3731 if (colmap) *colmap = a->garray; 3732 PetscFunctionReturn(0); 3733 } 3734 3735 /* 3736 Special version for direct calls from Fortran (to eliminate two function call overheads 3737 */ 3738 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3739 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3740 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3741 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3742 #endif 3743 3744 #undef __FUNCT__ 3745 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3746 /*@C 3747 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3748 3749 Collective on Mat 3750 3751 Input Parameters: 3752 + mat - the matrix 3753 . min - number of input rows 3754 . im - input rows 3755 . nin - number of input columns 3756 . in - input columns 3757 . v - numerical values input 3758 - addvin - INSERT_VALUES or ADD_VALUES 3759 3760 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3761 3762 Level: advanced 3763 3764 .seealso: MatSetValuesBlocked() 3765 @*/ 3766 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3767 { 3768 /* convert input arguments to C version */ 3769 Mat mat = *matin; 3770 PetscInt m = *min, n = *nin; 3771 InsertMode addv = *addvin; 3772 3773 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3774 const MatScalar *value; 3775 MatScalar *barray = baij->barray; 3776 PetscBool roworiented = baij->roworiented; 3777 PetscErrorCode ierr; 3778 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3779 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3780 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3781 3782 PetscFunctionBegin; 3783 /* tasks normally handled by MatSetValuesBlocked() */ 3784 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3785 #if defined(PETSC_USE_DEBUG) 3786 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3787 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3788 #endif 3789 if (mat->assembled) { 3790 mat->was_assembled = PETSC_TRUE; 3791 mat->assembled = PETSC_FALSE; 3792 } 3793 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3794 3795 3796 if (!barray) { 3797 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 3798 baij->barray = barray; 3799 } 3800 3801 if (roworiented) stepval = (n-1)*bs; 3802 else stepval = (m-1)*bs; 3803 3804 for (i=0; i<m; i++) { 3805 if (im[i] < 0) continue; 3806 #if defined(PETSC_USE_DEBUG) 3807 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3808 #endif 3809 if (im[i] >= rstart && im[i] < rend) { 3810 row = im[i] - rstart; 3811 for (j=0; j<n; j++) { 3812 /* If NumCol = 1 then a copy is not required */ 3813 if ((roworiented) && (n == 1)) { 3814 barray = (MatScalar*)v + i*bs2; 3815 } else if ((!roworiented) && (m == 1)) { 3816 barray = (MatScalar*)v + j*bs2; 3817 } else { /* Here a copy is required */ 3818 if (roworiented) { 3819 value = v + i*(stepval+bs)*bs + j*bs; 3820 } else { 3821 value = v + j*(stepval+bs)*bs + i*bs; 3822 } 3823 for (ii=0; ii<bs; ii++,value+=stepval) { 3824 for (jj=0; jj<bs; jj++) { 3825 *barray++ = *value++; 3826 } 3827 } 3828 barray -=bs2; 3829 } 3830 3831 if (in[j] >= cstart && in[j] < cend) { 3832 col = in[j] - cstart; 3833 ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3834 } else if (in[j] < 0) continue; 3835 #if defined(PETSC_USE_DEBUG) 3836 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3837 #endif 3838 else { 3839 if (mat->was_assembled) { 3840 if (!baij->colmap) { 3841 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3842 } 3843 3844 #if defined(PETSC_USE_DEBUG) 3845 #if defined(PETSC_USE_CTABLE) 3846 { PetscInt data; 3847 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3848 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3849 } 3850 #else 3851 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3852 #endif 3853 #endif 3854 #if defined(PETSC_USE_CTABLE) 3855 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3856 col = (col - 1)/bs; 3857 #else 3858 col = (baij->colmap[in[j]] - 1)/bs; 3859 #endif 3860 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3861 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3862 col = in[j]; 3863 } 3864 } else col = in[j]; 3865 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 3866 } 3867 } 3868 } else { 3869 if (!baij->donotstash) { 3870 if (roworiented) { 3871 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3872 } else { 3873 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3874 } 3875 } 3876 } 3877 } 3878 3879 /* task normally handled by MatSetValuesBlocked() */ 3880 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3881 PetscFunctionReturn(0); 3882 } 3883 3884 #undef __FUNCT__ 3885 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 3886 /*@ 3887 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 3888 CSR format the local rows. 3889 3890 Collective on MPI_Comm 3891 3892 Input Parameters: 3893 + comm - MPI communicator 3894 . bs - the block size, only a block size of 1 is supported 3895 . m - number of local rows (Cannot be PETSC_DECIDE) 3896 . n - This value should be the same as the local size used in creating the 3897 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3898 calculated if N is given) For square matrices n is almost always m. 3899 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3900 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3901 . i - row indices 3902 . j - column indices 3903 - a - matrix values 3904 3905 Output Parameter: 3906 . mat - the matrix 3907 3908 Level: intermediate 3909 3910 Notes: 3911 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3912 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3913 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3914 3915 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3916 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3917 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3918 with column-major ordering within blocks. 3919 3920 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3921 3922 .keywords: matrix, aij, compressed row, sparse, parallel 3923 3924 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3925 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3926 @*/ 3927 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3928 { 3929 PetscErrorCode ierr; 3930 3931 PetscFunctionBegin; 3932 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3933 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3934 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3935 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3936 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3937 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3938 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3939 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3940 PetscFunctionReturn(0); 3941 } 3942